From 954a8e3aea87e896e320cf648c1a5bbe47de443e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 30 Aug 2018 08:35:19 +0300 Subject: RDMA/cma: Protect cma dev list with lock When AF_IB addresses are used during rdma_resolve_addr() a lock is not held. A cma device can get removed while list traversal is in progress which may lead to crash. ie CPU0 CPU1 ==== ==== rdma_resolve_addr() cma_resolve_ib_dev() list_for_each() cma_remove_one() cur_dev->device mutex_lock(&lock) list_del(); mutex_unlock(&lock); cma_process_remove(); Therefore, hold a lock while traversing the list which avoids such situation. Cc: # 3.10 Fixes: f17df3b0dede ("RDMA/cma: Add support for AF_IB to rdma_resolve_addr()") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f72677291b69..a36c94930c31 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -724,6 +724,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) dgid = (union ib_gid *) &addr->sib_addr; pkey = ntohs(addr->sib_pkey); + mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!rdma_cap_af_ib(cur_dev->device, p)) @@ -750,18 +751,19 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; + goto found; } } } } - - if (!cma_dev) - return -ENODEV; + mutex_unlock(&lock); + return -ENODEV; found: cma_attach_to_dev(id_priv, cma_dev); - addr = (struct sockaddr_ib *) cma_src_addr(id_priv); - memcpy(&addr->sib_addr, &sgid, sizeof sgid); + mutex_unlock(&lock); + addr = (struct sockaddr_ib *)cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof(sgid)); cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); return 0; } -- cgit v1.2.3-59-g8ed1b From 67e3816842fe6414d629c7515b955952ec40c7d7 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 31 Aug 2018 07:16:03 -0700 Subject: RDMA/uverbs: Atomically flush and mark closed the comp event queue Currently a uverbs completion event queue is flushed of events in ib_uverbs_comp_event_close() with the queue spinlock held and then released. Yet setting ev_queue->is_closed is not set until later in uverbs_hot_unplug_completion_event_file(). In between the time ib_uverbs_comp_event_close() releases the lock and uverbs_hot_unplug_completion_event_file() acquires the lock, a completion event can arrive and be inserted into the event queue by ib_uverbs_comp_handler(). This can cause a "double add" list_add warning or crash depending on the kernel configuration, or a memory leak because the event is never dequeued since the queue is already closed down. So add setting ev_queue->is_closed = 1 to ib_uverbs_comp_event_close(). Cc: stable@vger.kernel.org Fixes: 1e7710f3f656 ("IB/core: Change completion channel to use the reworked objects schema") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6d974e2363df..50152c1b1004 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -440,6 +440,7 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) list_del(&entry->obj_list); kfree(entry); } + file->ev_queue.is_closed = 1; spin_unlock_irq(&file->ev_queue.lock); uverbs_close_fd(filp); -- cgit v1.2.3-59-g8ed1b From 5fe23f262e0548ca7f19fb79f89059a60d087d22 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 12 Sep 2018 16:27:44 -0700 Subject: ucma: fix a use-after-free in ucma_resolve_ip() There is a race condition between ucma_close() and ucma_resolve_ip(): CPU0 CPU1 ucma_resolve_ip(): ucma_close(): ctx = ucma_get_ctx(file, cmd.id); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); ... mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); rdma_destroy_id(ctx->cm_id); ... ucma_free_ctx(ctx); ret = rdma_resolve_addr(); ucma_put_ctx(ctx); Before idr_remove(), ucma_get_ctx() could still find the ctx and after rdma_destroy_id(), rdma_resolve_addr() may still access id_priv pointer. Also, ucma_put_ctx() may use ctx after ucma_free_ctx() too. ucma_close() should call ucma_put_ctx() too which tests the refcnt and waits for the last one releasing it. The similar pattern is already used by ucma_destroy_id(). Reported-and-tested-by: syzbot+da2591e115d57a9cbb8b@syzkaller.appspotmail.com Reported-by: syzbot+cfe3c1e8ef634ba8964b@syzkaller.appspotmail.com Cc: Jason Gunthorpe Cc: Doug Ledford Cc: Leon Romanovsky Signed-off-by: Cong Wang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5f437d1570fb..21863ddde63e 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1759,6 +1759,8 @@ static int ucma_close(struct inode *inode, struct file *filp) mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); + ucma_put_ctx(ctx); + wait_for_completion(&ctx->comp); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ -- cgit v1.2.3-59-g8ed1b From 4eeed3686981ff887bbdd7254139e2eca276534c Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Tue, 18 Sep 2018 10:51:37 +0300 Subject: RDMA/uverbs: Fix validity check for modify QP Uverbs shouldn't enforce QP state in the command unless the user set the QP state bit in the attribute mask. In addition, only copy qp attr fields which have the corresponding bit set in the attribute mask over to the internal attr structure. Fixes: 88de869bbe4f ("RDMA/uverbs: Ensure validity of current QP state value") Fixes: bc38a6abdd5a ("[PATCH] IB uverbs: core implementation") Signed-off-by: Majd Dibbiny Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 68 ++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a21d5214afc3..e012ca80f9d1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2027,33 +2027,55 @@ static int modify_qp(struct ib_uverbs_file *file, if ((cmd->base.attr_mask & IB_QP_CUR_STATE && cmd->base.cur_qp_state > IB_QPS_ERR) || - cmd->base.qp_state > IB_QPS_ERR) { + (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state > IB_QPS_ERR)) { ret = -EINVAL; goto release_qp; } - attr->qp_state = cmd->base.qp_state; - attr->cur_qp_state = cmd->base.cur_qp_state; - attr->path_mtu = cmd->base.path_mtu; - attr->path_mig_state = cmd->base.path_mig_state; - attr->qkey = cmd->base.qkey; - attr->rq_psn = cmd->base.rq_psn; - attr->sq_psn = cmd->base.sq_psn; - attr->dest_qp_num = cmd->base.dest_qp_num; - attr->qp_access_flags = cmd->base.qp_access_flags; - attr->pkey_index = cmd->base.pkey_index; - attr->alt_pkey_index = cmd->base.alt_pkey_index; - attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; - attr->max_rd_atomic = cmd->base.max_rd_atomic; - attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; - attr->min_rnr_timer = cmd->base.min_rnr_timer; - attr->port_num = cmd->base.port_num; - attr->timeout = cmd->base.timeout; - attr->retry_cnt = cmd->base.retry_cnt; - attr->rnr_retry = cmd->base.rnr_retry; - attr->alt_port_num = cmd->base.alt_port_num; - attr->alt_timeout = cmd->base.alt_timeout; - attr->rate_limit = cmd->rate_limit; + if (cmd->base.attr_mask & IB_QP_STATE) + attr->qp_state = cmd->base.qp_state; + if (cmd->base.attr_mask & IB_QP_CUR_STATE) + attr->cur_qp_state = cmd->base.cur_qp_state; + if (cmd->base.attr_mask & IB_QP_PATH_MTU) + attr->path_mtu = cmd->base.path_mtu; + if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE) + attr->path_mig_state = cmd->base.path_mig_state; + if (cmd->base.attr_mask & IB_QP_QKEY) + attr->qkey = cmd->base.qkey; + if (cmd->base.attr_mask & IB_QP_RQ_PSN) + attr->rq_psn = cmd->base.rq_psn; + if (cmd->base.attr_mask & IB_QP_SQ_PSN) + attr->sq_psn = cmd->base.sq_psn; + if (cmd->base.attr_mask & IB_QP_DEST_QPN) + attr->dest_qp_num = cmd->base.dest_qp_num; + if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS) + attr->qp_access_flags = cmd->base.qp_access_flags; + if (cmd->base.attr_mask & IB_QP_PKEY_INDEX) + attr->pkey_index = cmd->base.pkey_index; + if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + attr->max_rd_atomic = cmd->base.max_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER) + attr->min_rnr_timer = cmd->base.min_rnr_timer; + if (cmd->base.attr_mask & IB_QP_PORT) + attr->port_num = cmd->base.port_num; + if (cmd->base.attr_mask & IB_QP_TIMEOUT) + attr->timeout = cmd->base.timeout; + if (cmd->base.attr_mask & IB_QP_RETRY_CNT) + attr->retry_cnt = cmd->base.retry_cnt; + if (cmd->base.attr_mask & IB_QP_RNR_RETRY) + attr->rnr_retry = cmd->base.rnr_retry; + if (cmd->base.attr_mask & IB_QP_ALT_PATH) { + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + } + if (cmd->base.attr_mask & IB_QP_RATE_LIMIT) + attr->rate_limit = cmd->rate_limit; if (cmd->base.attr_mask & IB_QP_AV) copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr, -- cgit v1.2.3-59-g8ed1b From a9360abd3de0aad745d25d003923d56afb28a04b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 25 Sep 2018 11:23:55 +0300 Subject: IB/uverbs: Free uapi on destroy Make sure we free struct uverbs_api once we clean the radix tree. It was allocated by uverbs_alloc_api(). Fixes: 9ed3e5f44772 ("IB/uverbs: Build the specs into a radix tree at runtime") Reported-by: Bart Van Assche Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_uapi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 73ea6f0db88f..be854628a7c6 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -248,6 +248,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi) kfree(rcu_dereference_protected(*slot, true)); radix_tree_iter_delete(&uapi->radix, &iter, slot); } + kfree(uapi); } struct uverbs_api *uverbs_alloc_api( -- cgit v1.2.3-59-g8ed1b From 5c5702e259dc66e6fceed5117effab79c186e87a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 25 Sep 2018 12:10:40 +0300 Subject: RDMA/core: Set right entry state before releasing reference Currently add_modify_gid() for IB link layer has followong issue in cache update path. When GID update event occurs, core releases reference to the GID table without updating its state and/or entry pointer. CPU-0 CPU-1 ------ ----- ib_cache_update() IPoIB ULP add_modify_gid() [..] put_gid_entry() refcnt = 0, but state = valid, entry is valid. (work item is not yet executed). ipoib_create_ah() rdma_create_ah() rdma_get_gid_attr() <-- Tries to acquire gid_attr which has refcnt = 0. This is incorrect. GID entry state and entry pointer is provides the accurate GID enty state. Such fields must be updated with rwlock to protect against readers and, such fields must be in sane state before refcount can drop to zero. Otherwise above race condition can happen leading to use-after-free situation. Following backtrace has been observed when cache update for an IB port is triggered while IPoIB ULP is creating an AH. Therefore, when updating GID entry, first mark a valid entry as invalid through state and set the barrier so that no callers can acquired the GID entry, followed by release reference to it. refcount_t: increment on 0; use-after-free. WARNING: CPU: 4 PID: 29106 at lib/refcount.c:153 refcount_inc_checked+0x30/0x50 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] RIP: 0010:refcount_inc_checked+0x30/0x50 RSP: 0018:ffff8802ad36f600 EFLAGS: 00010082 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000008 RDI: ffffffff86710100 RBP: ffff8802d6e60a30 R08: ffffed005d67bf8b R09: ffffed005d67bf8b R10: 0000000000000001 R11: ffffed005d67bf8a R12: ffff88027620cee8 R13: ffff8802d6e60988 R14: ffff8802d6e60a78 R15: 0000000000000202 FS: 0000000000000000(0000) GS:ffff8802eb200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3ab35e5c88 CR3: 00000002ce84a000 CR4: 00000000000006e0 IPv6: ADDRCONF(NETDEV_CHANGE): ib1: link becomes ready Call Trace: rdma_get_gid_attr+0x220/0x310 [ib_core] ? lock_acquire+0x145/0x3a0 rdma_fill_sgid_attr+0x32c/0x470 [ib_core] rdma_create_ah+0x89/0x160 [ib_core] ? rdma_fill_sgid_attr+0x470/0x470 [ib_core] ? ipoib_create_ah+0x52/0x260 [ib_ipoib] ipoib_create_ah+0xf5/0x260 [ib_ipoib] ipoib_mcast_join_complete+0xbbe/0x2540 [ib_ipoib] Fixes: b150c3862d21 ("IB/core: Introduce GID entry reference counts") Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 0bee1f4b914e..3208ad6ad540 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -337,6 +337,39 @@ static int add_roce_gid(struct ib_gid_table_entry *entry) return 0; } +/** + * del_gid - Delete GID table entry + * + * @ib_dev: IB device whose GID entry to be deleted + * @port: Port number of the IB device + * @table: GID table of the IB device for a port + * @ix: GID entry index to delete + * + */ +static void del_gid(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table, int ix) +{ + struct ib_gid_table_entry *entry; + + lockdep_assert_held(&table->lock); + + pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, + ib_dev->name, port, ix, + table->data_vec[ix]->attr.gid.raw); + + write_lock_irq(&table->rwlock); + entry = table->data_vec[ix]; + entry->state = GID_TABLE_ENTRY_PENDING_DEL; + /* + * For non RoCE protocol, GID entry slot is ready to use. + */ + if (!rdma_protocol_roce(ib_dev, port)) + table->data_vec[ix] = NULL; + write_unlock_irq(&table->rwlock); + + put_gid_entry_locked(entry); +} + /** * add_modify_gid - Add or modify GID table entry * @@ -358,7 +391,7 @@ static int add_modify_gid(struct ib_gid_table *table, * this index. */ if (is_gid_entry_valid(table->data_vec[attr->index])) - put_gid_entry(table->data_vec[attr->index]); + del_gid(attr->device, attr->port_num, table, attr->index); /* * Some HCA's report multiple GID entries with only one valid GID, and @@ -386,39 +419,6 @@ done: return ret; } -/** - * del_gid - Delete GID table entry - * - * @ib_dev: IB device whose GID entry to be deleted - * @port: Port number of the IB device - * @table: GID table of the IB device for a port - * @ix: GID entry index to delete - * - */ -static void del_gid(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table, int ix) -{ - struct ib_gid_table_entry *entry; - - lockdep_assert_held(&table->lock); - - pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, - ib_dev->name, port, ix, - table->data_vec[ix]->attr.gid.raw); - - write_lock_irq(&table->rwlock); - entry = table->data_vec[ix]; - entry->state = GID_TABLE_ENTRY_PENDING_DEL; - /* - * For non RoCE protocol, GID entry slot is ready to use. - */ - if (!rdma_protocol_roce(ib_dev, port)) - table->data_vec[ix] = NULL; - write_unlock_irq(&table->rwlock); - - put_gid_entry_locked(entry); -} - /* rwlock should be read locked, or lock should be held */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, -- cgit v1.2.3-59-g8ed1b