aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorHuy Nguyen <huyn@mellanox.com>2017-10-04 17:58:21 -0500
committerSaeed Mahameed <saeedm@mellanox.com>2017-10-26 00:47:27 -0700
commit4ca637a20a524cd8ddbca696f12bfa92111c96e3 (patch)
tree15cc4d6c5d5a9b2bf5e66ec9cc6fb1415f5ad403 /drivers/net
parentnet/mlx5: Fix health work queue spin lock to IRQ safe (diff)
downloadlinux-dev-4ca637a20a524cd8ddbca696f12bfa92111c96e3.tar.xz
linux-dev-4ca637a20a524cd8ddbca696f12bfa92111c96e3.zip
net/mlx5: Delay events till mlx5 interface's add complete for pci resume
mlx5_ib_add is called during mlx5_pci_resume after a pci error. Before mlx5_ib_add completes, there are multiple events which trigger function mlx5_ib_event. This cause kernel panic because mlx5_ib_event accesses unitialized resources. The fix is to extend Erez Shitrit's patch <97834eba7c19> ("net/mlx5: Delay events till ib registration ends") to cover the pci resume code path. Trace: mlx5_core 0001:01:00.6: mlx5_pci_resume was called mlx5_core 0001:01:00.6: firmware version: 16.20.1011 mlx5_core 0001:01:00.6: mlx5_attach_interface:164:(pid 779): mlx5_ib_event:2996:(pid 34777): warning: event on port 1 mlx5_ib_event:2996:(pid 34782): warning: event on port 1 Unable to handle kernel paging request for data at address 0x0001c104 Faulting instruction address: 0xd000000008f411fc Oops: Kernel access of bad area, sig: 11 [#1] ... ... Call Trace: [c000000fff77bb70] [d000000008f4119c] mlx5_ib_event+0x64/0x470 [mlx5_ib] (unreliable) [c000000fff77bc60] [d000000008e67130] mlx5_core_event+0xb8/0x210 [mlx5_core] [c000000fff77bd10] [d000000008e4bd00] mlx5_eq_int+0x528/0x860[mlx5_core] Fixes: 97834eba7c19 ("net/mlx5: Delay events till ib registration ends") Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c70
1 files changed, 41 insertions, 29 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ff60cf7342ca..fc281712869b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -77,35 +77,41 @@ static void add_delayed_event(struct mlx5_priv *priv,
list_add_tail(&delayed_event->list, &priv->waiting_events_list);
}
-static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx,
- struct mlx5_core_dev *dev,
- struct mlx5_priv *priv)
+static void delayed_event_release(struct mlx5_device_context *dev_ctx,
+ struct mlx5_priv *priv)
{
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
struct mlx5_delayed_event *de;
struct mlx5_delayed_event *n;
+ struct list_head temp;
- /* stop delaying events */
- priv->is_accum_events = false;
+ INIT_LIST_HEAD(&temp);
+
+ spin_lock_irq(&priv->ctx_lock);
- /* fire all accumulated events before new event comes */
- list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
+ priv->is_accum_events = false;
+ list_splice_init(&priv->waiting_events_list, &temp);
+ if (!dev_ctx->context)
+ goto out;
+ list_for_each_entry_safe(de, n, &priv->waiting_events_list, list)
dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
+
+out:
+ spin_unlock_irq(&priv->ctx_lock);
+
+ list_for_each_entry_safe(de, n, &temp, list) {
list_del(&de->list);
kfree(de);
}
}
-static void cleanup_delayed_evets(struct mlx5_priv *priv)
+/* accumulating events that can come after mlx5_ib calls to
+ * ib_register_device, till adding that interface to the events list.
+ */
+static void delayed_event_start(struct mlx5_priv *priv)
{
- struct mlx5_delayed_event *de;
- struct mlx5_delayed_event *n;
-
spin_lock_irq(&priv->ctx_lock);
- priv->is_accum_events = false;
- list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
- list_del(&de->list);
- kfree(de);
- }
+ priv->is_accum_events = true;
spin_unlock_irq(&priv->ctx_lock);
}
@@ -122,11 +128,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
return;
dev_ctx->intf = intf;
- /* accumulating events that can come after mlx5_ib calls to
- * ib_register_device, till adding that interface to the events list.
- */
- priv->is_accum_events = true;
+ delayed_event_start(priv);
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -137,8 +140,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
- fire_delayed_event_locked(dev_ctx, dev, priv);
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (dev_ctx->intf->pfault) {
if (priv->pfault) {
@@ -150,11 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
}
#endif
spin_unlock_irq(&priv->ctx_lock);
- } else {
- kfree(dev_ctx);
- /* delete all accumulated events */
- cleanup_delayed_evets(priv);
}
+
+ delayed_event_release(dev_ctx, priv);
+
+ if (!dev_ctx->context)
+ kfree(dev_ctx);
}
static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
@@ -205,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
if (!dev_ctx)
return;
+ delayed_event_start(priv);
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
- return;
+ goto out;
intf->attach(dev, dev_ctx->context);
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
- return;
+ goto out;
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
+
+out:
+ delayed_event_release(dev_ctx, priv);
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -414,8 +420,14 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
if (priv->is_accum_events)
add_delayed_event(priv, dev, event, param);
+ /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
+ * still in priv->ctx_list. In this case, only notify the dev_ctx if its
+ * ADDED or ATTACHED bit are set.
+ */
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->event)
+ if (dev_ctx->intf->event &&
+ (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
+ test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);