aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/infiniband/core/cq.c
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2020-12-08 09:35:43 +0200
committerJason Gunthorpe <jgg@nvidia.com>2020-12-10 15:05:17 -0400
commit286e1d3f9ba89c7db5eecd30f47f9e333843ea13 (patch)
tree8430c166f482bf64563261831d344db7b6ed9a5b /drivers/infiniband/core/cq.c
parentRDMA/core: Update kernel documentation for ib_create_named_qp() (diff)
downloadwireguard-linux-286e1d3f9ba89c7db5eecd30f47f9e333843ea13.tar.xz
wireguard-linux-286e1d3f9ba89c7db5eecd30f47f9e333843ea13.zip
RDMA/core: Clean up cq pool mechanism
The CQ pool mechanism had two problems: 1. The CQ pool lists were uninitialized in the device registration error flow. As a result, all the list pointers remained NULL. This caused the kernel to crash (in procedure ib_cq_pool_destroy) when that error flow was taken (and unregister called). The stack trace snippet: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0×0000) ? not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI . . . RIP: 0010:ib_cq_pool_destroy+0x1b/0×70 [ib_core] . . . Call Trace: disable_device+0x9f/0×130 [ib_core] __ib_unregister_device+0x35/0×90 [ib_core] ib_register_device+0x529/0×610 [ib_core] __mlx5_ib_add+0x3a/0×70 [mlx5_ib] mlx5_add_device+0x87/0×1c0 [mlx5_core] mlx5_register_interface+0x74/0xc0 [mlx5_core] do_one_initcall+0x4b/0×1f4 do_init_module+0x5a/0×223 load_module+0x1938/0×1d40 2. At device unregister, when cleaning up the cq pool, the cq's in the pool lists were freed, but the cq entries were left in the list. The fix for the first issue is to initialize the cq pool lists when the ib_device structure is allocated for a new device (in procedure _ib_alloc_device). The fix for the second problem is to delete cq entries from the pool lists when cleaning up the cq pool. In addition, procedure ib_cq_pool_destroy() is renamed to the more appropriate name ib_cq_pool_cleanup(). Fixes: 4aa1615268a8 ("RDMA/core: Fix ordering of CQ pool destruction") Link: https://lore.kernel.org/r/20201208073545.9723-2-leon@kernel.org Suggested-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/core/cq.c')
-rw-r--r--drivers/infiniband/core/cq.c12
1 files changed, 2 insertions, 10 deletions
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index d4248bbe74da..433b426729d4 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -349,16 +349,7 @@ void ib_free_cq(struct ib_cq *cq)
}
EXPORT_SYMBOL(ib_free_cq);
-void ib_cq_pool_init(struct ib_device *dev)
-{
- unsigned int i;
-
- spin_lock_init(&dev->cq_pools_lock);
- for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++)
- INIT_LIST_HEAD(&dev->cq_pools[i]);
-}
-
-void ib_cq_pool_destroy(struct ib_device *dev)
+void ib_cq_pool_cleanup(struct ib_device *dev)
{
struct ib_cq *cq, *n;
unsigned int i;
@@ -367,6 +358,7 @@ void ib_cq_pool_destroy(struct ib_device *dev)
list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
pool_entry) {
WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
cq->shared = false;
ib_free_cq(cq);
}