aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c256
1 files changed, 126 insertions, 130 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 6e54fefea410..bfed558637c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -1,15 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019 Mellanox Technologies. */
-#include "reporter.h"
-#include "lib/eq.h"
-
-#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
-
-struct mlx5e_tx_err_ctx {
- int (*recover)(struct mlx5e_txqsq *sq);
- struct mlx5e_txqsq *sq;
-};
+#include "health.h"
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
@@ -39,42 +31,18 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
sq->pc = 0;
}
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
- struct mlx5e_modify_sq_param msp = {0};
- int err;
-
- msp.curr_state = curr_state;
- msp.next_state = MLX5_SQC_STATE_RST;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
- return err;
- }
-
- memset(&msp, 0, sizeof(msp));
- msp.curr_state = MLX5_SQC_STATE_RST;
- msp.next_state = MLX5_SQC_STATE_RDY;
-
- err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
- if (err) {
- netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
- return err;
- }
-
- return 0;
-}
-
-static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
+static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
{
- struct mlx5_core_dev *mdev = sq->channel->mdev;
- struct net_device *dev = sq->channel->netdev;
+ struct mlx5_core_dev *mdev;
+ struct net_device *dev;
+ struct mlx5e_txqsq *sq;
u8 state;
int err;
+ sq = ctx;
+ mdev = sq->channel->mdev;
+ dev = sq->channel->netdev;
+
if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
return 0;
@@ -82,145 +50,130 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
if (err) {
netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
sq->sqn, err);
- return err;
+ goto out;
}
- if (state != MLX5_SQC_STATE_ERR) {
- netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
- return -EINVAL;
- }
+ if (state != MLX5_SQC_STATE_ERR)
+ goto out;
mlx5e_tx_disable_queue(sq->txq);
err = mlx5e_wait_for_sq_flush(sq);
if (err)
- return err;
+ goto out;
/* At this point, no new packets will arrive from the stack as TXQ is
* marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
* pending WQEs. SQ can safely reset the SQ.
*/
- err = mlx5e_sq_to_ready(sq, state);
+ err = mlx5e_health_sq_to_ready(sq->channel, sq->sqn);
if (err)
- return err;
+ goto out;
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats->recover++;
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
mlx5e_activate_txqsq(sq);
return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ return err;
}
-static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter,
- char *err_str,
- struct mlx5e_tx_err_ctx *err_ctx)
-{
- if (!tx_reporter) {
- netdev_err(err_ctx->sq->channel->netdev, err_str);
- return err_ctx->recover(err_ctx->sq);
- }
-
- return devlink_health_report(tx_reporter, err_str, err_ctx);
-}
-
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
+void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
{
- char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
- struct mlx5e_tx_err_ctx err_ctx = {0};
+ struct mlx5e_priv *priv = sq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx = {0};
- err_ctx.sq = sq;
- err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+ err_ctx.ctx = sq;
+ err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
- mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
- &err_ctx);
+ mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
}
-static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
+static int mlx5e_tx_reporter_timeout_recover(void *ctx)
{
- struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
- u32 eqe_count;
-
- netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
- eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+ struct mlx5_eq_comp *eq;
+ struct mlx5e_txqsq *sq;
+ int err;
- eqe_count = mlx5_eq_poll_irq_disabled(eq);
- if (!eqe_count) {
+ sq = ctx;
+ eq = sq->cq.mcq.eq;
+ err = mlx5e_health_channel_eq_recover(eq, sq->channel);
+ if (err)
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- return -EIO;
- }
- netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
- eqe_count, eq->core.eqn);
- sq->channel->stats->eq_rearm++;
- return 0;
+ return err;
}
-int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
+int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
{
- char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
- struct mlx5e_tx_err_ctx err_ctx;
+ struct mlx5e_priv *priv = sq->channel->priv;
+ char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+ struct mlx5e_err_ctx err_ctx;
- err_ctx.sq = sq;
- err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+ err_ctx.ctx = sq;
+ err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
sprintf(err_str,
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
jiffies_to_usecs(jiffies - sq->txq->trans_start));
- return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str,
- &err_ctx);
+ return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
}
/* state lock cannot be grabbed within this function.
* It can cause a dead lock or a read-after-free.
*/
-static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
+static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
{
- return err_ctx->recover(err_ctx->sq);
-}
-
-static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
-{
- int err = 0;
-
- rtnl_lock();
- mutex_lock(&priv->state_lock);
-
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
- goto out;
-
- err = mlx5e_safe_reopen_channels(priv);
-
-out:
- mutex_unlock(&priv->state_lock);
- rtnl_unlock();
-
- return err;
+ return err_ctx->recover(err_ctx->ctx);
}
static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
void *context)
{
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
- struct mlx5e_tx_err_ctx *err_ctx = context;
+ struct mlx5e_err_ctx *err_ctx = context;
return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
- mlx5e_tx_reporter_recover_all(priv);
+ mlx5e_health_recover_channels(priv);
}
static int
mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
- u32 sqn, u8 state, bool stopped)
+ struct mlx5e_txqsq *sq, int tc)
{
+ struct mlx5e_priv *priv = sq->channel->priv;
+ bool stopped = netif_xmit_stopped(sq->txq);
+ u8 state;
int err;
+ err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
+ if (err)
+ return err;
+
err = devlink_fmsg_obj_nest_start(fmsg);
if (err)
return err;
- err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn);
+ err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
if (err)
return err;
@@ -232,6 +185,18 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg,
if (err)
return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc);
+ if (err)
+ return err;
+
+ err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg);
+ if (err)
+ return err;
+
err = devlink_fmsg_obj_nest_end(fmsg);
if (err)
return err;
@@ -243,31 +208,61 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg)
{
struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
- int i, err = 0;
+ struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+ u32 sq_stride, sq_sz;
+
+ int i, tc, err = 0;
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto unlock;
+ sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq);
+ sq_stride = MLX5_SEND_WQE_BB;
+
+ err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config");
+ if (err)
+ goto unlock;
+
+ err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride);
+ if (err)
+ goto unlock;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz);
+ if (err)
+ goto unlock;
+
+ err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg);
+ if (err)
+ goto unlock;
+
+ err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
+ err = mlx5e_reporter_named_obj_nest_end(fmsg);
+ if (err)
+ goto unlock;
+
err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
if (err)
goto unlock;
- for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
- i++) {
- struct mlx5e_txqsq *sq = priv->txq2sq[i];
- u8 state;
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
- err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
- if (err)
- goto unlock;
+ for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+ struct mlx5e_txqsq *sq = &c->sq[tc];
- err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn,
- state,
- netif_xmit_stopped(sq->txq));
- if (err)
- goto unlock;
+ err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
+ if (err)
+ goto unlock;
+ }
}
err = devlink_fmsg_arr_pair_nest_end(fmsg);
if (err)
@@ -286,12 +281,13 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
+int mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
{
struct devlink_health_reporter *reporter;
struct mlx5_core_dev *mdev = priv->mdev;
- struct devlink *devlink = priv_to_devlink(mdev);
+ struct devlink *devlink;
+ devlink = priv_to_devlink(mdev);
reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
@@ -306,7 +302,7 @@ int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
return 0;
}
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
+void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
{
if (!priv->tx_reporter)
return;