aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c164
1 files changed, 125 insertions, 39 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 0b0234f9d694..9d908a0ccfef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -8,7 +8,8 @@
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
- MLX5_FW_RESET_FLAGS_PENDING_COMP
+ MLX5_FW_RESET_FLAGS_PENDING_COMP,
+ MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
};
struct mlx5_fw_reset {
@@ -57,7 +58,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
}
-static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
+static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
+ u8 *reset_type, u8 *reset_state)
{
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -71,25 +73,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r
*reset_level = MLX5_GET(mfrl_reg, out, reset_level);
if (reset_type)
*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
+ if (reset_state)
+ *reset_state = MLX5_GET(mfrl_reg, out, reset_state);
return 0;
}
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
{
- return mlx5_reg_mfrl_query(dev, reset_level, reset_type);
+ return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
}
-int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel)
+static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 reset_state;
+
+ if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+ goto out;
+
+ switch (reset_state) {
+ case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
+ case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ return -EBUSY;
+ case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ return -ETIMEDOUT;
+ case MLX5_MFRL_REG_RESET_STATE_NACK:
+ NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
+ return -EPERM;
+ }
+
+out:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
+ return -EIO;
+}
+
+int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
+ struct netlink_ext_ack *extack)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
int err;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
- if (err)
- clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- return err;
+
+ MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
+ MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
+ MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
+ err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MFRL, 0, 1, false);
+ if (!err)
+ return 0;
+
+ clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
+ return mlx5_fw_reset_get_reset_state_err(dev, extack);
+
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
+ return mlx5_cmd_check(dev, err, in, out);
}
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
@@ -105,44 +149,48 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
complete(&fw_reset->done);
} else {
- mlx5_load_one(dev);
+ mlx5_unload_one(dev);
+ if (mlx5_health_wait_pci_up(dev))
+ mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
+ else
+ mlx5_load_one(dev, false);
devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
}
}
-static void mlx5_sync_reset_reload_work(struct work_struct *work)
-{
- struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
- reset_reload_work);
- struct mlx5_core_dev *dev = fw_reset->dev;
- int err;
-
- mlx5_enter_error_state(dev, true);
- mlx5_unload_one(dev);
- err = mlx5_health_wait_pci_up(dev);
- if (err)
- mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
- fw_reset->ret = err;
- mlx5_fw_reset_complete_reload(dev);
-}
-
static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
- del_timer(&fw_reset->timer);
+ del_timer_sync(&fw_reset->timer);
}
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
+ return 0;
+}
+
+static void mlx5_sync_reset_reload_work(struct work_struct *work)
+{
+ struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+ reset_reload_work);
+ struct mlx5_core_dev *dev = fw_reset->dev;
+
+ mlx5_sync_reset_clear_reset_requested(dev, false);
+ mlx5_enter_error_state(dev, true);
+ mlx5_fw_reset_complete_reload(dev);
}
#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
@@ -159,8 +207,10 @@ static void poll_sync_reset(struct timer_list *t)
if (fatal_error) {
mlx5_core_warn(dev, "Got Device Reset\n");
- mlx5_sync_reset_clear_reset_requested(dev, false);
- queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ queue_work(fw_reset->wq, &fw_reset->reset_reload_work);
+ else
+ mlx5_core_err(dev, "Device is being removed, Drop new reset work\n");
return;
}
@@ -186,13 +236,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -221,7 +275,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -303,6 +359,23 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
err = -ETIMEDOUT;
}
+ do {
+ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &reg16);
+ if (err)
+ return err;
+ if (reg16 == dev_id)
+ break;
+ msleep(20);
+ } while (!time_after(jiffies, timeout));
+
+ if (reg16 == dev_id) {
+ mlx5_core_info(dev, "Firmware responds to PCI config cycles again\n");
+ } else {
+ mlx5_core_err(dev, "Firmware is not responsive (0x%04x) after %llu ms\n",
+ reg16, mlx5_tout_ms(dev, PCI_TOGGLE));
+ err = -ETIMEDOUT;
+ }
+
restore:
list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
pci_cfg_access_unlock(sdev);
@@ -319,7 +392,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
@@ -336,7 +410,6 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
}
mlx5_enter_error_state(dev, true);
- mlx5_unload_one(dev);
done:
fw_reset->ret = err;
mlx5_fw_reset_complete_reload(dev);
@@ -348,10 +421,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
@@ -380,9 +451,12 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
struct mlx5_eqe *eqe = data;
+ if (test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags))
+ return NOTIFY_DONE;
+
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_FW_LIVE_PATCH_EVENT:
- queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
+ queue_work(fw_reset->wq, &fw_reset->fw_live_patch_work);
break;
case MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT:
mlx5_sync_reset_events_handle(fw_reset, eqe);
@@ -426,6 +500,18 @@ void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev)
mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb);
}
+void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+
+ set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
+ cancel_work_sync(&fw_reset->fw_live_patch_work);
+ cancel_work_sync(&fw_reset->reset_request_work);
+ cancel_work_sync(&fw_reset->reset_reload_work);
+ cancel_work_sync(&fw_reset->reset_now_work);
+ cancel_work_sync(&fw_reset->reset_abort_work);
+}
+
int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL);