From 621e55ff5b8e0ab5d1063f0eae0ef3960bef8f6e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:40 +0300 Subject: RDMA/devices: Do not deadlock during client removal lockdep reports: WARNING: possible circular locking dependency detected modprobe/302 is trying to acquire lock: 0000000007c8919c ((wq_completion)ib_cm){+.+.}, at: flush_workqueue+0xdf/0x990 but task is already holding lock: 000000002d3d2ca9 (&device->client_data_rwsem){++++}, at: remove_client_context+0x79/0xd0 [ib_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&device->client_data_rwsem){++++}: down_read+0x3f/0x160 ib_get_net_dev_by_params+0xd5/0x200 [ib_core] cma_ib_req_handler+0x5f6/0x2090 [rdma_cm] cm_process_work+0x29/0x110 [ib_cm] cm_req_handler+0x10f5/0x1c00 [ib_cm] cm_work_handler+0x54c/0x311d [ib_cm] process_one_work+0x4aa/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #1 ((work_completion)(&(&work->work)->work)){+.+.}: process_one_work+0x45f/0xa30 worker_thread+0x62/0x5b0 kthread+0x1ca/0x1f0 ret_from_fork+0x24/0x30 -> #0 ((wq_completion)ib_cm){+.+.}: lock_acquire+0xc8/0x1d0 flush_workqueue+0x102/0x990 cm_remove_one+0x30e/0x3c0 [ib_cm] remove_client_context+0x94/0xd0 [ib_core] disable_device+0x10a/0x1f0 [ib_core] __ib_unregister_device+0x5a/0xe0 [ib_core] ib_unregister_device+0x21/0x30 [ib_core] mlx5_ib_stage_ib_reg_cleanup+0x9/0x10 [mlx5_ib] __mlx5_ib_remove+0x3d/0x70 [mlx5_ib] mlx5_ib_remove+0x12e/0x140 [mlx5_ib] mlx5_remove_device+0x144/0x150 [mlx5_core] mlx5_unregister_interface+0x3f/0xf0 [mlx5_core] mlx5_ib_cleanup+0x10/0x3a [mlx5_ib] __x64_sys_delete_module+0x227/0x350 do_syscall_64+0xc3/0x6a4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Which is due to the read side of the client_data_rwsem being obtained recursively through a work queue flush during cm client removal. The lock is being held across the remove in remove_client_context() so that the function is a fence, once it returns the client is removed. This is required so that the two callers do not proceed with destruction until the client completes removal. Instead of using client_data_rwsem use the existing device unregistration refcount and add a similar client unregistration (client->uses) refcount. This will fence the two unregistration paths without holding any locks. Cc: Fixes: 921eab1143aa ("RDMA/devices: Re-organize device.c locking") Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-2-leon@kernel.org Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma/ib_verbs.h') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c5f8a9f17063..7b80ec822043 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2647,6 +2647,9 @@ struct ib_client { const union ib_gid *gid, const struct sockaddr *addr, void *client_data); + + refcount_t uses; + struct completion uses_zero; struct list_head list; u32 client_id; -- cgit v1.2.3-59-g8ed1b From 9cd5881719e9555cae300ec8b389eda3c8101339 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 31 Jul 2019 11:18:41 +0300 Subject: RDMA/devices: Remove the lock around remove_client_context Due to the complexity of client->remove() callbacks it is desirable to not hold any locks while calling them. Remove the last one by tracking only the highest client ID and running backwards from there over the xarray. Since the only purpose of that lock was to protect the linked list, we can drop the lock. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190731081841.32345-3-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 48 ++++++++++++++++++++++------------------ include/rdma/ib_verbs.h | 1 - 2 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include/rdma/ib_verbs.h') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d86fbabe48d6..ea8661a00651 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -94,7 +94,7 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); static DECLARE_RWSEM(devices_rwsem); #define DEVICE_REGISTERED XA_MARK_1 -static LIST_HEAD(client_list); +static u32 highest_client_id; #define CLIENT_REGISTERED XA_MARK_1 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); static DECLARE_RWSEM(clients_rwsem); @@ -1237,7 +1237,7 @@ static int setup_device(struct ib_device *device) static void disable_device(struct ib_device *device) { - struct ib_client *client; + u32 cid; WARN_ON(!refcount_read(&device->refcount)); @@ -1245,10 +1245,19 @@ static void disable_device(struct ib_device *device) xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); up_write(&devices_rwsem); + /* + * Remove clients in LIFO order, see assign_client_id. This could be + * more efficient if xarray learns to reverse iterate. Since no new + * clients can be added to this ib_device past this point we only need + * the maximum possible client_id value here. + */ down_read(&clients_rwsem); - list_for_each_entry_reverse(client, &client_list, list) - remove_client_context(device, client->client_id); + cid = highest_client_id; up_read(&clients_rwsem); + while (cid) { + cid--; + remove_client_context(device, cid); + } /* Pairs with refcount_set in enable_device */ ib_device_put(device); @@ -1675,30 +1684,31 @@ static int assign_client_id(struct ib_client *client) /* * The add/remove callbacks must be called in FIFO/LIFO order. To * achieve this we assign client_ids so they are sorted in - * registration order, and retain a linked list we can reverse iterate - * to get the LIFO order. The extra linked list can go away if xarray - * learns to reverse iterate. + * registration order. */ - if (list_empty(&client_list)) { - client->client_id = 0; - } else { - struct ib_client *last; - - last = list_last_entry(&client_list, struct ib_client, list); - client->client_id = last->client_id + 1; - } + client->client_id = highest_client_id; ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); if (ret) goto out; + highest_client_id++; xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); - list_add_tail(&client->list, &client_list); out: up_write(&clients_rwsem); return ret; } +static void remove_client_id(struct ib_client *client) +{ + down_write(&clients_rwsem); + xa_erase(&clients, client->client_id); + for (; highest_client_id; highest_client_id--) + if (xa_load(&clients, highest_client_id - 1)) + break; + up_write(&clients_rwsem); +} + /** * ib_register_client - Register an IB client * @client:Client to register @@ -1778,11 +1788,7 @@ void ib_unregister_client(struct ib_client *client) * removal is ongoing. Wait until all removals are completed. */ wait_for_completion(&client->uses_zero); - - down_write(&clients_rwsem); - list_del(&client->list); - xa_erase(&clients, client->client_id); - up_write(&clients_rwsem); + remove_client_id(client); } EXPORT_SYMBOL(ib_unregister_client); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7b80ec822043..4f225175cb91 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2650,7 +2650,6 @@ struct ib_client { refcount_t uses; struct completion uses_zero; - struct list_head list; u32 client_id; /* kverbs are not required by the client */ -- cgit v1.2.3-59-g8ed1b From ce51346feede2ea41de0ad58af2b514223e11dad Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 19 Aug 2019 14:17:08 +0300 Subject: RDMA/core: Make invalidate_range a device operation The callback function 'invalidate_range' is implemented in a driver so the place for it is in the ib_device_ops structure and not in ib_ucontext. Signed-off-by: Moni Shoua Reviewed-by: Guy Levi Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20190819111710.18440-11-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/umem_odp.c | 10 +++++----- drivers/infiniband/core/uverbs_cmd.c | 2 -- drivers/infiniband/hw/mlx5/main.c | 4 ---- drivers/infiniband/hw/mlx5/odp.c | 1 + include/rdma/ib_verbs.h | 4 ++-- 6 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/rdma/ib_verbs.h') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ea8661a00651..b5631b8a0397 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2562,6 +2562,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_vf_config); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); + SET_DEVICE_OP(dev_ops, invalidate_range); SET_DEVICE_OP(dev_ops, iw_accept); SET_DEVICE_OP(dev_ops, iw_add_ref); SET_DEVICE_OP(dev_ops, iw_connect); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 72765b110f4b..32fb0b579dec 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -96,7 +96,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn, */ ib_umem_notifier_start_account(umem_odp); complete_all(&umem_odp->notifier_completion); - umem_odp->umem.context->invalidate_range( + umem_odp->umem.context->device->ops.invalidate_range( umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); } @@ -109,7 +109,7 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); - item->umem.context->invalidate_range(item, start, end); + item->umem.context->device->ops.invalidate_range(item, start, end); return 0; } @@ -385,7 +385,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, if (!context) return ERR_PTR(-EIO); - if (WARN_ON_ONCE(!context->invalidate_range)) + if (WARN_ON_ONCE(!context->device->ops.invalidate_range)) return ERR_PTR(-EINVAL); umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); @@ -479,7 +479,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, return ERR_PTR(-EIO); if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) || - WARN_ON_ONCE(!context->invalidate_range)) + WARN_ON_ONCE(!context->device->ops.invalidate_range)) return ERR_PTR(-EINVAL); umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); @@ -607,7 +607,7 @@ out: if (remove_existing_mapping) { ib_umem_notifier_start_account(umem_odp); - context->invalidate_range( + dev->ops.invalidate_range( umem_odp, ib_umem_start(umem_odp) + (page_index << umem_odp->page_shift), diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7ddd0e5bc6b3..8f4fd4fac159 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,8 +275,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata); if (ret) goto err_file; - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) - ucontext->invalidate_range = NULL; rdma_restrack_uadd(&ucontext->res); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e12a4404096b..7d5c2763b691 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; - if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) - context->ibucontext.invalidate_range = - &mlx5_ib_invalidate_range; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); if (err < 0) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index a660dc2b21f4..6d940d8f5247 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1612,6 +1612,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, + .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4f225175cb91..c2b39dda44cc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1417,8 +1417,6 @@ struct ib_ucontext { bool cleanup_retryable; - void (*invalidate_range)(struct ib_umem_odp *umem_odp, - unsigned long start, unsigned long end); struct mutex per_mm_list_lock; struct list_head per_mm_list; @@ -2378,6 +2376,8 @@ struct ib_device_ops { u64 iova); int (*unmap_fmr)(struct list_head *fmr_list); int (*dealloc_fmr)(struct ib_fmr *fmr); + void (*invalidate_range)(struct ib_umem_odp *umem_odp, + unsigned long start, unsigned long end); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, -- cgit v1.2.3-59-g8ed1b