aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-11-22 20:20:58 -0800
committerJakub Kicinski <kuba@kernel.org>2022-11-22 20:20:59 -0800
commit178a4ff11903cf19f35d82f3462958ab7f7c76ef (patch)
tree8a2124c07a334afc22e3504119f41defa4b97d81
parentipv4: Fix error return code in fib_table_insert() (diff)
parentnet/mlx5e: Fix possible race condition in macsec extended packet number update routine (diff)
downloadwireguard-linux-178a4ff11903cf19f35d82f3462958ab7f7c76ef.tar.xz
wireguard-linux-178a4ff11903cf19f35d82f3462958ab7f7c76ef.zip
Merge tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5 fixes 2022-11-21 This series provides bug fixes to mlx5 driver. * tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux: net/mlx5e: Fix possible race condition in macsec extended packet number update routine net/mlx5e: Fix MACsec update SecY net/mlx5e: Fix MACsec SA initialization routine net/mlx5e: Remove leftovers from old XSK queues enumeration net/mlx5e: Offload rule only when all encaps are valid net/mlx5e: Fix missing alignment in size of MTT/KLM entries net/mlx5: Fix sync reset event handler error flow net/mlx5: E-Switch, Set correctly vport destination net/mlx5: Lag, avoid lockdep warnings net/mlx5: Fix handling of entry refcount when command is not issued to FW net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint net/mlx5: SF: Fix probing active SFs during driver probe phase net/mlx5: Fix FW tracer timestamp calculation net/mlx5: Do not query pci info while pci disabled ==================== Link: https://lore.kernel.org/r/20221122022559.89459-1-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c88
-rw-r--r--include/linux/mlx5/driver.h1
18 files changed, 281 insertions, 118 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 2e0d59ca62b5..74bd05e5dda2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -45,6 +45,8 @@
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/tout.h"
+#define CREATE_TRACE_POINTS
+#include "diag/cmd_tracepoint.h"
enum {
CMD_IF_REV = 5,
@@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err);
static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
{
u16 opcode, op_mod;
- u32 syndrome;
- u8 status;
u16 uid;
- int err;
-
- syndrome = MLX5_GET(mbox_out, out, syndrome);
- status = MLX5_GET(mbox_out, out, status);
opcode = MLX5_GET(mbox_in, in, opcode);
op_mod = MLX5_GET(mbox_in, in, op_mod);
uid = MLX5_GET(mbox_in, in, uid);
- err = cmd_status_to_err(status);
-
if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
mlx5_cmd_out_err(dev, opcode, op_mod, out);
- else
- mlx5_core_dbg(dev,
- "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
- mlx5_command_str(opcode), opcode, op_mod, uid,
- cmd_status_str(status), status, syndrome, err);
}
int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
@@ -1016,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work)
cmd_ent_get(ent);
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+ cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* Skip sending command to fw if internal error */
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
ent->ret = -ENXIO;
@@ -1023,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work)
return;
}
- cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@@ -1672,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
cmd_ent_put(ent); /* timeout work was canceled */
if (!forced || /* Real FW completion */
- pci_channel_offline(dev->pdev) || /* FW is inaccessible */
- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ mlx5_cmd_is_down(dev) || /* No real FW completion is expected */
+ !opcode_allowed(cmd, ent->op))
cmd_ent_put(ent);
ent->ts2 = ktime_get_ns();
@@ -1892,6 +1881,16 @@ out_in:
return err;
}
+static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
+{
+ u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
+ u8 status = MLX5_GET(mbox_out, out, status);
+
+ trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome,
+ cmd_status_to_err(status));
+}
+
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
@@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
}
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
-static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
+static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
@@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
err = -EIO;
- if (!err && status != MLX5_CMD_STAT_OK)
+ if (!err && status != MLX5_CMD_STAT_OK) {
err = -EREMOTEIO;
+ mlx5_cmd_err_trace(dev, opcode, op_mod, out);
+ }
cmd_status_log(dev, opcode, status, syndrome, err);
return err;
@@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
- err = cmd_status_err(dev, err, opcode, out);
- return err;
+ return cmd_status_err(dev, err, opcode, op_mod, out);
}
EXPORT_SYMBOL(mlx5_cmd_do);
@@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
+ u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
- err = cmd_status_err(dev, err, opcode, out);
+ err = cmd_status_err(dev, err, opcode, op_mod, out);
return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec_polling);
@@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
struct mlx5_async_ctx *ctx;
ctx = work->ctx;
- status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
+ status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
complete(&ctx->inflight_done);
@@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
work->ctx = ctx;
work->user_callback = callback;
work->opcode = MLX5_GET(mbox_in, in, opcode);
+ work->op_mod = MLX5_GET(mbox_in, in, op_mod);
work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h
new file mode 100644
index 000000000000..406ebe17405f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_CMD_TP_H_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+TRACE_EVENT(mlx5_cmd,
+ TP_PROTO(const char *command_str, u16 opcode, u16 op_mod,
+ const char *status_str, u8 status, u32 syndrome, int err),
+ TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err),
+ TP_STRUCT__entry(__string(command_str, command_str)
+ __field(u16, opcode)
+ __field(u16, op_mod)
+ __string(status_str, status_str)
+ __field(u8, status)
+ __field(u32, syndrome)
+ __field(int, err)
+ ),
+ TP_fast_assign(__assign_str(command_str, command_str);
+ __entry->opcode = opcode;
+ __entry->op_mod = op_mod;
+ __assign_str(status_str, status_str);
+ __entry->status = status;
+ __entry->syndrome = syndrome;
+ __entry->err = err;
+ ),
+ TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)",
+ __get_str(command_str), __entry->opcode, __entry->op_mod,
+ __get_str(status_str), __entry->status, __entry->syndrome,
+ __entry->err)
+);
+
+#endif /* _MLX5_CMD_TP_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE cmd_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 978a2bb8e122..21831386b26e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
(str_frmt->timestamp & MASK_6_0);
else
- trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
+ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
(str_frmt->timestamp & MASK_6_0);
mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 5aff97914367..ff73d25bc6eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
continue;
- spec = &flow->attr->parse_attr->spec;
-
- /* update from encap rule to slow path rule */
- rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
/* mark the flow's encap dest as non-valid */
esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+ esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
+
+ /* update from encap rule to slow path rule */
+ spec = &flow->attr->parse_attr->spec;
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
/* we know that the encap is valid */
e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ e->pkt_reformat = NULL;
}
static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
@@ -762,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid)
+ struct net_device **encap_dev)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -878,9 +879,8 @@ attach_flow:
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
- *encap_valid = true;
} else {
- *encap_valid = false;
+ flow_flag_set(flow, SLOW);
}
mutex_unlock(&esw->offloads.encap_tbl_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
index d542b8476491..8ad273dde40e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
- struct net_device **encap_dev,
- bool *encap_valid);
+ struct net_device **encap_dev);
int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 2ef36cb9555a..3dc6c987b8da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
obj_attrs.aso_pdn = macsec->aso.pdn;
obj_attrs.epn_state = sa->epn_state;
- if (is_tx) {
- obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci);
- key = &ctx->sa.tx_sa->key;
- } else {
- obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
- key = &ctx->sa.rx_sa->key;
+ key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key;
+
+ if (sa->epn_state.epn_enabled) {
+ obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) :
+ cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
+
+ memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
}
- memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
obj_attrs.replay_window = ctx->secy->replay_window;
obj_attrs.replay_protect = ctx->secy->replay_protect;
@@ -1155,7 +1155,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
continue;
if (rx_sa->active) {
- err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false);
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
if (err)
goto out;
}
@@ -1536,6 +1536,8 @@ static void macsec_async_event(struct work_struct *work)
async_work = container_of(work, struct mlx5e_macsec_async_work, work);
macsec = async_work->macsec;
+ mutex_lock(&macsec->lock);
+
mdev = async_work->mdev;
obj_id = async_work->obj_id;
macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id);
@@ -1557,6 +1559,7 @@ static void macsec_async_event(struct work_struct *work)
out_async_work:
kfree(async_work);
+ mutex_unlock(&macsec->lock);
}
static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 24aa25da482b..1728e197558d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -35,7 +35,6 @@
#include "en.h"
#include "en/port.h"
#include "en/params.h"
-#include "en/xsk/pool.h"
#include "en/ptp.h"
#include "lib/clock.h"
#include "en/fs_ethtool.h"
@@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
mutex_lock(&priv->state_lock);
-
ch->max_combined = priv->max_nch;
ch->combined_count = priv->channels.params.num_channels;
- if (priv->xsk.refcnt) {
- /* The upper half are XSK queues. */
- ch->max_combined *= 2;
- ch->combined_count *= 2;
- }
-
mutex_unlock(&priv->state_lock);
}
@@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
- /* Don't allow changing the number of channels if there is an active
- * XSK, because the numeration of the XSK and regular RQs will change.
- */
- if (priv->xsk.refcnt) {
- err = -EINVAL;
- netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
- __func__);
- goto out;
- }
-
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e3a4f01bcceb..5e41dfdf79c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
{
u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u32 sz;
- WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
+ sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
- return entries * umr_entry_size / MLX5_OCTWORD;
+ return sz / MLX5_OCTWORD;
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5a6aa61ec82a..bd9936af4582 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1634,7 +1634,6 @@ set_encap_dests(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr,
struct netlink_ext_ack *extack,
- bool *encap_valid,
bool *vf_tun)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -1651,7 +1650,6 @@ set_encap_dests(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
esw_attr = attr->esw_attr;
*vf_tun = false;
- *encap_valid = true;
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
struct net_device *out_dev;
@@ -1668,7 +1666,7 @@ set_encap_dests(struct mlx5e_priv *priv,
goto out;
}
err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
- extack, &encap_dev, encap_valid);
+ extack, &encap_dev);
dev_put(out_dev);
if (err)
goto out;
@@ -1732,8 +1730,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
- bool vf_tun, encap_valid;
u32 max_prio, max_chain;
+ bool vf_tun;
int err = 0;
parse_attr = attr->parse_attr;
@@ -1823,7 +1821,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
esw_attr->int_port = int_port;
}
- err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
+ err = set_encap_dests(priv, flow, attr, extack, &vf_tun);
if (err)
goto err_out;
@@ -1853,7 +1851,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
* (1) there's no error
* (2) there's an encap action and we don't have valid neigh
*/
- if (!encap_valid || flow_flag_test(flow, SLOW))
+ if (flow_flag_test(flow, SLOW))
flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
@@ -3759,7 +3757,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
struct mlx5e_post_act *post_act = get_post_action(flow->priv);
struct mlx5_flow_attr *attr, *next_attr = NULL;
struct mlx5e_post_act_handle *handle;
- bool vf_tun, encap_valid = true;
+ bool vf_tun;
int err;
/* This is going in reverse order as needed.
@@ -3781,13 +3779,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
if (list_is_last(&attr->list, &flow->attrs))
break;
- err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
+ err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
if (err)
goto out_free;
- if (!encap_valid)
- flow_flag_set(flow, SLOW);
-
err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
if (err)
goto out_free;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 728ca9f2bb9d..3fda75fe168c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
- if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
+ if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
if (pkt_reformat) {
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 9d908a0ccfef..1e46f9afa40e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -9,7 +9,8 @@ enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
MLX5_FW_RESET_FLAGS_PENDING_COMP,
- MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
+ MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
};
struct mlx5_fw_reset {
@@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
err = mlx5_pci_link_toggle(dev);
if (err) {
mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
- goto done;
+ set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
mlx5_enter_error_state(dev, true);
@@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
goto out;
}
err = fw_reset->ret;
+ if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
+ mlx5_unload_one_devl_locked(dev);
+ mlx5_load_one_devl_locked(dev, false);
+ }
out:
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index a9f4ede4a9bf..be1307a63e6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref)
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
- mlx5_lag_mpesw_cleanup(ldev);
- cancel_work_sync(&ldev->mpesw_work);
destroy_workqueue(ldev->wq);
+ mlx5_lag_mpesw_cleanup(ldev);
mutex_destroy(&ldev->lock);
kfree(ldev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index ce2ce8ccbd70..f30ac2de639f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -50,6 +50,19 @@ struct lag_tracker {
enum netdev_lag_hash hash_type;
};
+enum mpesw_op {
+ MLX5_MPESW_OP_ENABLE,
+ MLX5_MPESW_OP_DISABLE,
+};
+
+struct mlx5_mpesw_work_st {
+ struct work_struct work;
+ struct mlx5_lag *lag;
+ enum mpesw_op op;
+ struct completion comp;
+ int result;
+};
+
/* LAG data of a ConnectX card.
* It serves both its phys functions.
*/
@@ -66,7 +79,6 @@ struct mlx5_lag {
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
- struct work_struct mpesw_work;
struct notifier_block nb;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index f643202b29c6..c17e8f1ec914 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -7,63 +7,95 @@
#include "eswitch.h"
#include "lib/mlx5.h"
-void mlx5_mpesw_work(struct work_struct *work)
+static int add_mpesw_rule(struct mlx5_lag *ldev)
{
- struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
+ struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
+ int err;
- mutex_lock(&ldev->lock);
- mlx5_disable_lag(ldev);
- mutex_unlock(&ldev->lock);
-}
+ if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
+ return 0;
-static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
-{
- struct mlx5_lag *ldev = dev->priv.lag;
+ if (ldev->mode != MLX5_LAG_MODE_NONE) {
+ err = -EINVAL;
+ goto out_err;
+ }
- if (!queue_work(ldev->wq, &ldev->mpesw_work))
- mlx5_core_warn(dev, "failed to queue work\n");
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ atomic_dec(&ldev->lag_mpesw.mpesw_rule_count);
+ return err;
}
-void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+static void del_mpesw_rule(struct mlx5_lag *ldev)
{
- struct mlx5_lag *ldev = dev->priv.lag;
+ if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
+ ldev->mode == MLX5_LAG_MODE_MPESW)
+ mlx5_disable_lag(ldev);
+}
- if (!ldev)
- return;
+static void mlx5_mpesw_work(struct work_struct *work)
+{
+ struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
+ struct mlx5_lag *ldev = mpesww->lag;
mutex_lock(&ldev->lock);
- if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
- ldev->mode == MLX5_LAG_MODE_MPESW)
- mlx5_lag_disable_mpesw(dev);
+ if (mpesww->op == MLX5_MPESW_OP_ENABLE)
+ mpesww->result = add_mpesw_rule(ldev);
+ else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
+ del_mpesw_rule(ldev);
mutex_unlock(&ldev->lock);
+
+ complete(&mpesww->comp);
}
-int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
+ enum mpesw_op op)
{
struct mlx5_lag *ldev = dev->priv.lag;
+ struct mlx5_mpesw_work_st *work;
int err = 0;
if (!ldev)
return 0;
- mutex_lock(&ldev->lock);
- if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
- goto out;
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
- if (ldev->mode != MLX5_LAG_MODE_NONE) {
+ INIT_WORK(&work->work, mlx5_mpesw_work);
+ init_completion(&work->comp);
+ work->op = op;
+ work->lag = ldev;
+
+ if (!queue_work(ldev->wq, &work->work)) {
+ mlx5_core_warn(dev, "failed to queue mpesw work\n");
err = -EINVAL;
goto out;
}
-
- err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
- if (err)
- mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
-
+ wait_for_completion(&work->comp);
+ err = work->result;
out:
- mutex_unlock(&ldev->lock);
+ kfree(work);
return err;
}
+void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
+}
+
+int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
+}
+
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
{
struct mlx5_lag *ldev = mdev->priv.lag;
@@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
if (!netif_is_bond_master(out_dev) || !ldev)
return 0;
- mutex_lock(&ldev->lock);
- if (ldev->mode == MLX5_LAG_MODE_MPESW) {
- mutex_unlock(&ldev->lock);
+ if (ldev->mode == MLX5_LAG_MODE_MPESW)
return -EOPNOTSUPP;
- }
- mutex_unlock(&ldev->lock);
+
return 0;
}
@@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
{
- INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
}
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
{
- cancel_delayed_work_sync(&ldev->bond_work);
+ WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
index be4abcb8fcd5..88e8daffcf92 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -12,7 +12,6 @@ struct lag_mpesw {
atomic_t mpesw_rule_count;
};
-void mlx5_mpesw_work(struct work_struct *work);
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 283c4cc28944..e58775a7d955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1798,7 +1798,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
res = state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
- mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res));
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n",
+ __func__, dev->state, dev->pci_status, res, result2str(res));
return res;
}
@@ -1837,7 +1838,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
- mlx5_pci_trace(dev, "Enter\n");
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n",
+ __func__, dev->state, dev->pci_status);
err = mlx5_pci_enable_device(dev);
if (err) {
@@ -1859,7 +1861,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
res = PCI_ERS_RESULT_RECOVERED;
out:
- mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
+ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n",
+ __func__, dev->state, dev->pci_status, err, res, result2str(res));
return res;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index 7da012ff0d41..8e2abbab05f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -18,6 +18,10 @@ struct mlx5_sf_dev_table {
phys_addr_t base_address;
u64 sf_bar_length;
struct notifier_block nb;
+ struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */
+ struct workqueue_struct *active_wq;
+ struct work_struct work;
+ u8 stop_active_wq:1;
struct mlx5_core_dev *dev;
};
@@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
return 0;
sf_index = event->function_id - base_id;
+ mutex_lock(&table->table_lock);
sf_dev = xa_load(&table->devices, sf_index);
switch (event->new_vhca_state) {
case MLX5_VHCA_STATE_INVALID:
@@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
default:
break;
}
+ mutex_unlock(&table->table_lock);
return 0;
}
@@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
return 0;
}
+static void mlx5_sf_dev_add_active_work(struct work_struct *work)
+{
+ struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work);
+ u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_core_dev *dev = table->dev;
+ u16 max_functions;
+ u16 function_id;
+ u16 sw_func_id;
+ int err = 0;
+ u8 state;
+ int i;
+
+ max_functions = mlx5_sf_max_functions(dev);
+ function_id = MLX5_CAP_GEN(dev, sf_base_id);
+ for (i = 0; i < max_functions; i++, function_id++) {
+ if (table->stop_active_wq)
+ return;
+ err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
+ if (err)
+ /* A failure of specific vhca doesn't mean others will
+ * fail as well.
+ */
+ continue;
+ state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
+ if (state != MLX5_VHCA_STATE_ACTIVE)
+ continue;
+
+ sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
+ mutex_lock(&table->table_lock);
+ /* Don't probe device which is already probe */
+ if (!xa_load(&table->devices, i))
+ mlx5_sf_dev_add(dev, i, function_id, sw_func_id);
+ /* There is a race where SF got inactive after the query
+ * above. e.g.: the query returns that the state of the
+ * SF is active, and after that the eswitch manager set it to
+ * inactive.
+ * This case cannot be managed in SW, since the probing of the
+ * SF is on one system, and the inactivation is on a different
+ * system.
+ * If the inactive is done after the SF perform init_hca(),
+ * the SF will fully probe and then removed. If it was
+ * done before init_hca(), the SF probe will fail.
+ */
+ mutex_unlock(&table->table_lock);
+ }
+}
+
+/* In case SFs are generated externally, probe active SFs */
+static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table)
+{
+ if (MLX5_CAP_GEN(table->dev, eswitch_manager))
+ return 0; /* the table is local */
+
+ /* Use a workqueue to probe active SFs, which are in large
+ * quantity and may take up to minutes to probe.
+ */
+ table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
+ if (!table->active_wq)
+ return -ENOMEM;
+ INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work);
+ queue_work(table->active_wq, &table->work);
+ return 0;
+}
+
+static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table)
+{
+ if (table->active_wq) {
+ table->stop_active_wq = true;
+ destroy_workqueue(table->active_wq);
+ }
+}
+
void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table;
@@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
table->base_address = pci_resource_start(dev->pdev, 2);
table->max_sfs = max_sfs;
xa_init(&table->devices);
+ mutex_init(&table->table_lock);
dev->priv.sf_dev_table = table;
err = mlx5_vhca_event_notifier_register(dev, &table->nb);
if (err)
goto vhca_err;
+
+ err = mlx5_sf_dev_queue_active_work(table);
+ if (err)
+ goto add_active_err;
+
err = mlx5_sf_dev_vhca_arm_all(table);
if (err)
goto arm_err;
@@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
return;
arm_err:
+ mlx5_sf_dev_destroy_active_work(table);
+add_active_err:
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
vhca_err:
table->max_sfs = 0;
@@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
if (!table)
return;
+ mlx5_sf_dev_destroy_active_work(table);
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+ mutex_destroy(&table->table_lock);
/* Now that event handler is not running, it is safe to destroy
* the sf device without race.
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index af2ceb4160bc..06cbad166225 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -981,6 +981,7 @@ struct mlx5_async_work {
struct mlx5_async_ctx *ctx;
mlx5_async_cbk_t user_callback;
u16 opcode; /* cmd opcode */
+ u16 op_mod; /* cmd op_mod */
void *out; /* pointer to the cmd output buffer */
};