From d114c6feedfe0600c19b9f9479a4026354d1f7fd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 18 Aug 2020 15:05:18 +0300 Subject: RDMA/cma: Add missing locking to rdma_accept() In almost all cases rdma_accept() is called under the handler_mutex by ULPs from their handler callbacks. The one exception was ucma which did not get the handler_mutex. To improve the understand-ability of the locking scheme obtain the mutex for ucma as well. This improves how ucma works by allowing it to directly use handler_mutex for some of its internal locking against the handler callbacks intead of the global file->mut lock. There does not seem to be a serious bug here, other than a DISCONNECT event can be delivered concurrently with accept succeeding. Link: https://lore.kernel.org/r/20200818120526.702120-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 26de0dab60bb..78641858abe2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4154,14 +4154,15 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, const char *caller) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); + lockdep_assert_held(&id_priv->handler_mutex); rdma_restrack_set_task(&id_priv->res, caller); - if (!cma_comp(id_priv, RDMA_CM_CONNECT)) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) return -EINVAL; if (!id->qp && conn_param) { @@ -4214,6 +4215,24 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, } EXPORT_SYMBOL(__rdma_accept_ece); +void rdma_lock_handler(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_lock(&id_priv->handler_mutex); +} +EXPORT_SYMBOL(rdma_lock_handler); + +void rdma_unlock_handler(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_unlock(&id_priv->handler_mutex); +} +EXPORT_SYMBOL(rdma_unlock_handler); + int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; -- cgit v1.2.3-59-g8ed1b From 2a7cec538169ada48a7810e77abff2ca99dbb098 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:15 +0300 Subject: RDMA/cma: Fix locking for the RDMA_CM_CONNECT state It is currently a bit confusing, but the design is if the handler_mutex is held, and the state is in RDMA_CM_CONNECT, then the state cannot leave RDMA_CM_CONNECT without also serializing with the handler_mutex. Make this clearer by adding a direct assertion, fixing the usage in rdma_connect and generally using READ_ONCE to read the state value. Link: https://lore.kernel.org/r/20200902081122.745412-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f9ff8b7f05e7..6f492906939b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -421,6 +421,15 @@ static int cma_comp_exch(struct rdma_id_private *id_priv, unsigned long flags; int ret; + /* + * The FSM uses a funny double locking where state is protected by both + * the handler_mutex and the spinlock. State is not allowed to change + * away from a handler_mutex protected value without also holding + * handler_mutex. + */ + if (comp == RDMA_CM_CONNECT) + lockdep_assert_held(&id_priv->handler_mutex); + spin_lock_irqsave(&id_priv->lock, flags); if ((ret = (id_priv->state == comp))) id_priv->state = exch; @@ -1949,13 +1958,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event = {}; + enum rdma_cm_state state; int ret; mutex_lock(&id_priv->handler_mutex); + state = READ_ONCE(id_priv->state); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_CONNECT) || + state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_DISCONNECT)) + state != RDMA_CM_DISCONNECT)) goto out; switch (ib_event->event) { @@ -1965,7 +1976,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: - if (cma_comp(id_priv, RDMA_CM_CONNECT) && + if (state == RDMA_CM_CONNECT && (id_priv->id.qp_type != IB_QPT_UD)) { trace_cm_send_mra(id_priv); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); @@ -2226,8 +2237,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, goto net_dev_put; } - if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) { + if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT && + conn_id->id.qp_type != IB_QPT_UD) { trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); } @@ -2288,7 +2299,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) goto out; switch (iw_event->event) { @@ -3778,7 +3789,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, int ret; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) goto out; switch (ib_event->event) { @@ -4014,12 +4025,15 @@ out: int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) - return -EINVAL; + mutex_lock(&id_priv->handler_mutex); + if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) { + ret = -EINVAL; + goto err_unlock; + } if (!id->qp) { id_priv->qp_num = conn_param->qp_num; @@ -4036,11 +4050,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) else ret = -ENOSYS; if (ret) - goto err; - + goto err_state; + mutex_unlock(&id_priv->handler_mutex); return 0; -err: +err_state: cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); +err_unlock: + mutex_unlock(&id_priv->handler_mutex); return ret; } EXPORT_SYMBOL(rdma_connect); -- cgit v1.2.3-59-g8ed1b From 732d41c545bb359cbb8c94698bdc1f8bcf82279c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:16 +0300 Subject: RDMA/cma: Make the locking for automatic state transition more clear Re-organize things so the state variable is not read unlocked. The first attempt to go directly from ADDR_BOUND immediately tells us if the ID is already bound, if we can't do that then the attempt inside rdma_bind_addr() to go from IDLE to ADDR_BOUND confirms the ID needs binding. Link: https://lore.kernel.org/r/20200902081122.745412-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 67 +++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6f492906939b..11d369b7faca 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3248,32 +3248,54 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return rdma_bind_addr(id, src_addr); } -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) { - struct rdma_id_private *id_priv; int ret; - id_priv = container_of(id, struct rdma_id_private, id); memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (id_priv->state == RDMA_CM_IDLE) { - ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, - rdma_addr_size(dst_addr)); - return ret; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + goto err_dst; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) { + ret = -EINVAL; + goto err_dst; } } if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; + ret = -EINVAL; + goto err_state; } + return 0; - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; - } +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +err_dst: + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); @@ -3646,20 +3668,21 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, int rdma_listen(struct rdma_cm_id *id, int backlog) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == RDMA_CM_IDLE) { + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ id->route.addr.src_addr.ss_family = AF_INET; ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_LISTEN))) + return -EINVAL; } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) - return -EINVAL; - if (id_priv->reuseaddr) { ret = cma_bind_listen(id_priv); if (ret) -- cgit v1.2.3-59-g8ed1b From d490ee52f0a5dbd9d1bccabd6bf00c40c1234a79 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:17 +0300 Subject: RDMA/cma: Fix locking for the RDMA_CM_LISTEN state There is a strange unlocked read of the ID state when checking for reuseaddr. This is because an ID cannot be reusable once it becomes a listening ID. Instead of using the state to exclude reuse, just clear it as part of rdma_listen()'s flow to convert reusable into not reusable. Once a ID goes to listen there is no way back out, and the only use of reusable is on the bind_list check. Finally, update the checks under handler_mutex to use READ_ONCE and audit that once RDMA_CM_LISTEN is observed in a req callback it is stable under the handler_mutex. Link: https://lore.kernel.org/r/20200902081122.745412-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 11d369b7faca..95beaebab4bb 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2195,7 +2195,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, } mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) { + if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) { ret = -ECONNABORTED; goto err_unlock; } @@ -2373,7 +2373,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) + if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) goto out; /* Create a new RDMA id for the new IW CM ID */ @@ -3327,7 +3327,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); - if (reuse || id_priv->state == RDMA_CM_IDLE) { + if ((reuse && id_priv->state != RDMA_CM_LISTEN) || + id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { @@ -3521,8 +3522,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, if (id_priv == cur_id) continue; - if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && - cur_id->reuseaddr) + if (reuseaddr && cur_id->reuseaddr) continue; cur_addr = cma_src_addr(cur_id); @@ -3563,18 +3563,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps, return ret; } -static int cma_bind_listen(struct rdma_id_private *id_priv) -{ - struct rdma_bind_list *bind_list = id_priv->bind_list; - int ret = 0; - - mutex_lock(&lock); - if (bind_list->owners.first->next) - ret = cma_check_port(bind_list, id_priv, 0); - mutex_unlock(&lock); - return ret; -} - static enum rdma_ucm_port_space cma_select_inet_ps(struct rdma_id_private *id_priv) { @@ -3683,8 +3671,16 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) return -EINVAL; } + /* + * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable + * any more, and has to be unique in the bind list. + */ if (id_priv->reuseaddr) { - ret = cma_bind_listen(id_priv); + mutex_lock(&lock); + ret = cma_check_port(id_priv->bind_list, id_priv, 0); + if (!ret) + id_priv->reuseaddr = 0; + mutex_unlock(&lock); if (ret) goto err; } @@ -3709,6 +3705,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) return 0; err: id_priv->backlog = 0; + /* + * All the failure paths that lead here will not allow the req_handler's + * to have run. + */ cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } -- cgit v1.2.3-59-g8ed1b From 5cfbf9291e1d56b28df30929a5b4bb2d70193855 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:18 +0300 Subject: RDMA/cma: Remove cma_comp() The only place that still uses it is rdma_join_multicast() which is only doing a sanity check that the caller hasn't done something wrong and doesn't need the spinlock. At least in the case of rdma_join_multicast() the information it needs will remain until the ID is destroyed once it enters these states. Similarly there is no reason to check for these specific states in the handler callback, instead use the usual check for a destroyed id under the handler_mutex. Link: https://lore.kernel.org/r/20200902081122.745412-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 95beaebab4bb..bd3621654366 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -404,17 +404,6 @@ struct cma_req_info { u16 pkey; }; -static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - ret = (id_priv->state == comp); - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - static int cma_comp_exch(struct rdma_id_private *id_priv, enum rdma_cm_state comp, enum rdma_cm_state exch) { @@ -4363,8 +4352,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) id_priv = mc->id_priv; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_ADDR_BOUND && - id_priv->state != RDMA_CM_ADDR_RESOLVED) + if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL || + READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) goto out; if (!status) @@ -4630,16 +4619,14 @@ out1: int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); struct cma_multicast *mc; int ret; - if (!id->device) - return -EINVAL; - - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && - !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) + /* ULP is calling this wrong. */ + if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND && + READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 7e85bcda8bfe883f4244672ed79f81b7762a1a7e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:19 +0300 Subject: RDMA/cma: Combine cma_ndev_work with cma_work These are the same thing, except that cma_ndev_work doesn't have a state transition. Signal no state transition by setting old_state and new_state == 0. In all cases the handler function should not be called once rdma_destroy_id() has progressed passed setting the state. Link: https://lore.kernel.org/r/20200902081122.745412-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index bd3621654366..df0a5bc4d6b7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -363,12 +363,6 @@ struct cma_work { struct rdma_cm_event event; }; -struct cma_ndev_work { - struct work_struct work; - struct rdma_id_private *id; - struct rdma_cm_event event; -}; - struct iboe_mcast_work { struct work_struct work; struct rdma_id_private *id; @@ -2647,32 +2641,14 @@ static void cma_work_handler(struct work_struct *_work) struct rdma_id_private *id_priv = work->id; mutex_lock(&id_priv->handler_mutex); - if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) goto out_unlock; - - if (cma_cm_event_handler(id_priv, &work->event)) { - cma_id_put(id_priv); - destroy_id_handler_unlock(id_priv); - goto out_free; + if (work->old_state != 0 || work->new_state != 0) { + if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + goto out_unlock; } -out_unlock: - mutex_unlock(&id_priv->handler_mutex); - cma_id_put(id_priv); -out_free: - kfree(work); -} - -static void cma_ndev_work_handler(struct work_struct *_work) -{ - struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); - struct rdma_id_private *id_priv = work->id; - - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state == RDMA_CM_DESTROYING || - id_priv->state == RDMA_CM_DEVICE_REMOVAL) - goto out_unlock; - if (cma_cm_event_handler(id_priv, &work->event)) { cma_id_put(id_priv); destroy_id_handler_unlock(id_priv); @@ -4698,7 +4674,7 @@ EXPORT_SYMBOL(rdma_leave_multicast); static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr; - struct cma_ndev_work *work; + struct cma_work *work; dev_addr = &id_priv->id.route.addr.dev_addr; @@ -4711,7 +4687,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id if (!work) return -ENOMEM; - INIT_WORK(&work->work, cma_ndev_work_handler); + INIT_WORK(&work->work, cma_work_handler); work->id = id_priv; work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; cma_id_get(id_priv); -- cgit v1.2.3-59-g8ed1b From 1bb5091def706732c749df9aae45fbca003696f2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:20 +0300 Subject: RDMA/cma: Remove dead code for kernel rdmacm multicast There is no kernel user of RDMA CM multicast so this code managing the multicast subscription of the kernel-only internal QP is dead. Remove it. This makes the bug fixes in the next patches much simpler. Link: https://lore.kernel.org/r/20200902081122.745412-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index df0a5bc4d6b7..79d14fb629bd 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4337,16 +4337,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) else pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", status); - mutex_lock(&id_priv->qp_mutex); - if (!status && id_priv->id.qp) { - status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - be16_to_cpu(multicast->rec.mlid)); - if (status) - pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", - status); - } - mutex_unlock(&id_priv->qp_mutex); - event.status = status; event.param.ud.private_data = mc->context; if (!status) { @@ -4600,6 +4590,10 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, struct cma_multicast *mc; int ret; + /* Not supported for kernel QPs */ + if (WARN_ON(id->qp)) + return -EINVAL; + /* ULP is calling this wrong. */ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND && READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) @@ -4651,11 +4645,6 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) list_del(&mc->list); spin_unlock_irq(&id_priv->lock); - if (id->qp) - ib_detach_mcast(id->qp, - &mc->multicast.ib->rec.mgid, - be16_to_cpu(mc->multicast.ib->rec.mlid)); - BUG_ON(id_priv->cma_dev->device != id->device); if (rdma_cap_ib_mcast(id->device, id->port_num)) { -- cgit v1.2.3-59-g8ed1b From 3788d2997bc0150ea911a964d5b5a2e11808a936 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:21 +0300 Subject: RDMA/cma: Consolidate the destruction of a cma_multicast in one place Two places were open coding this sequence, and also pull in cma_leave_roce_mc_group() which was called only once. Link: https://lore.kernel.org/r/20200902081122.745412-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 63 +++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 79d14fb629bd..906717ca1a46 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1775,19 +1775,30 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, - struct cma_multicast *mc) +static void destroy_mc(struct rdma_id_private *id_priv, + struct cma_multicast *mc) { - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + return; + } - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); - dev_put(ndev); + if (rdma_protocol_roce(id_priv->id.device, + id_priv->id.port_num)) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + dev_put(ndev); + } + kref_put(&mc->mcref, release_mc); } - kref_put(&mc->mcref, release_mc); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) @@ -1795,16 +1806,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { - mc = container_of(id_priv->mc_list.next, - struct cma_multicast, list); + mc = list_first_entry(&id_priv->mc_list, struct cma_multicast, + list); list_del(&mc->list); - if (rdma_cap_ib_mcast(id_priv->cma_dev->device, - id_priv->id.port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else { - cma_leave_roce_mc_group(id_priv, mc); - } + destroy_mc(id_priv, mc); } } @@ -4641,20 +4646,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); - - BUG_ON(id_priv->cma_dev->device != id->device); + if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0) + continue; + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); - if (rdma_cap_ib_mcast(id->device, id->port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else if (rdma_protocol_roce(id->device, id->port_num)) { - cma_leave_roce_mc_group(id_priv, mc); - } - return; - } + WARN_ON(id_priv->cma_dev->device != id->device); + destroy_mc(id_priv, mc); + return; } spin_unlock_irq(&id_priv->lock); } -- cgit v1.2.3-59-g8ed1b From b5de0c60cc30c2a3513c7188c73f3f29acc29234 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 2 Sep 2020 11:11:22 +0300 Subject: RDMA/cma: Fix use after free race in roce multicast join The roce path triggers a work queue that continues to touch the id_priv but doesn't hold any reference on it. Futher, unlike in the IB case, the work queue is not fenced during rdma_destroy_id(). This can trigger a use after free if a destroy is triggered in the incredibly narrow window after the queue_work and the work starting and obtaining the handler_mutex. The only purpose of this work queue is to run the ULP event callback from the standard context, so switch the design to use the existing cma_work_handler() scheme. This simplifies quite a lot of the flow: - Use the cma_work_handler() callback to launch the work for roce. This requires generating the event synchronously inside the rdma_join_multicast(), which in turn means the dummy struct ib_sa_multicast can become a simple stack variable. - cm_work_handler() used the id_priv kref, so we can entirely eliminate the kref inside struct cma_multicast. Since the cma_multicast never leaks into an unprotected work queue the kfree can be done at the same time as for IB. - Eliminating the general multicast.ib requires using cma_set_mgid() in a few places to recompute the mgid. Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Link: https://lore.kernel.org/r/20200902081122.745412-9-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 196 +++++++++++++++++++----------------------- 1 file changed, 88 insertions(+), 108 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 906717ca1a46..22bd892c4aa8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,6 +68,9 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; +static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, + union ib_gid *mgid); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -345,13 +348,10 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - union { - struct ib_sa_multicast *ib; - } multicast; + struct ib_sa_multicast *sa_mc; struct list_head list; void *context; struct sockaddr_storage addr; - struct kref mcref; u8 join_state; }; @@ -363,12 +363,6 @@ struct cma_work { struct rdma_cm_event event; }; -struct iboe_mcast_work { - struct work_struct work; - struct rdma_id_private *id; - struct cma_multicast *mc; -}; - union cma_ip_addr { struct in6_addr ip6; struct { @@ -475,14 +469,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, rdma_start_port(cma_dev->device)]; } -static inline void release_mc(struct kref *kref) -{ - struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); - - kfree(mc->multicast.ib); - kfree(mc); -} - static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); @@ -1778,14 +1764,10 @@ static void cma_release_port(struct rdma_id_private *id_priv) static void destroy_mc(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - return; - } + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) + ib_sa_free_multicast(mc->sa_mc); - if (rdma_protocol_roce(id_priv->id.device, - id_priv->id.port_num)) { + if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct net_device *ndev = NULL; @@ -1794,11 +1776,15 @@ static void destroy_mc(struct rdma_id_private *id_priv, ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (ndev) { - cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); + union ib_gid mgid; + + cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, + &mgid); + cma_igmp_send(ndev, &mgid, false); dev_put(ndev); } - kref_put(&mc->mcref, release_mc); } + kfree(mc); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) @@ -2664,6 +2650,8 @@ out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); out_free: + if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&work->event.param.ud.ah_attr); kfree(work); } @@ -4324,53 +4312,66 @@ out: } EXPORT_SYMBOL(rdma_disconnect); +static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, + struct ib_sa_multicast *multicast, + struct rdma_cm_event *event, + struct cma_multicast *mc) +{ + struct rdma_dev_addr *dev_addr; + enum ib_gid_type gid_type; + struct net_device *ndev; + + if (!status) + status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); + else + pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", + status); + + event->status = status; + event->param.ud.private_data = mc->context; + if (status) { + event->event = RDMA_CM_EVENT_MULTICAST_ERROR; + return; + } + + dev_addr = &id_priv->id.route.addr.dev_addr; + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + gid_type = + id_priv->cma_dev + ->default_gid_type[id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; + + event->event = RDMA_CM_EVENT_MULTICAST_JOIN; + if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, + &multicast->rec, ndev, gid_type, + &event->param.ud.ah_attr)) { + event->event = RDMA_CM_EVENT_MULTICAST_ERROR; + goto out; + } + + event->param.ud.qp_num = 0xFFFFFF; + event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey); + +out: + if (ndev) + dev_put(ndev); +} + static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { - struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; + struct rdma_id_private *id_priv = mc->id_priv; struct rdma_cm_event event = {}; int ret = 0; - id_priv = mc->id_priv; mutex_lock(&id_priv->handler_mutex); if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL || READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) goto out; - if (!status) - status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); - else - pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", - status); - event.status = status; - event.param.ud.private_data = mc->context; - if (!status) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = - dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - enum ib_gid_type gid_type = - id_priv->cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(id_priv->cma_dev->device)]; - - event.event = RDMA_CM_EVENT_MULTICAST_JOIN; - ret = ib_init_ah_from_mcmember(id_priv->id.device, - id_priv->id.port_num, - &multicast->rec, - ndev, gid_type, - &event.param.ud.ah_attr); - if (ret) - event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - - event.param.ud.qp_num = 0xFFFFFF; - event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); - if (ndev) - dev_put(ndev); - } else - event.event = RDMA_CM_EVENT_MULTICAST_ERROR; - + cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); - rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { destroy_id_handler_unlock(id_priv); @@ -4460,23 +4461,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, IB_SA_MCMEMBER_REC_MTU | IB_SA_MCMEMBER_REC_HOP_LIMIT; - mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, - id_priv->id.port_num, &rec, - comp_mask, GFP_KERNEL, - cma_ib_mc_handler, mc); - return PTR_ERR_OR_ZERO(mc->multicast.ib); -} - -static void iboe_mcast_work_handler(struct work_struct *work) -{ - struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); - struct cma_multicast *mc = mw->mc; - struct ib_sa_multicast *m = mc->multicast.ib; - - mc->multicast.ib->context = mc; - cma_ib_mc_handler(0, m); - kref_put(&mc->mcref, release_mc); - kfree(mw); + mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device, + id_priv->id.port_num, &rec, comp_mask, + GFP_KERNEL, cma_ib_mc_handler, mc); + return PTR_ERR_OR_ZERO(mc->sa_mc); } static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, @@ -4511,52 +4499,47 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct iboe_mcast_work *work; + struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; + struct ib_sa_multicast ib; enum ib_gid_type gid_type; bool send_only; send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); - if (cma_zero_addr((struct sockaddr *)&mc->addr)) + if (cma_zero_addr(addr)) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; - mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); - if (!mc->multicast.ib) { - err = -ENOMEM; - goto out1; - } - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; - cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); + cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); - mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + ib.rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) - mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) { err = -ENODEV; - goto out2; + goto err_free; } - mc->multicast.ib->rec.rate = iboe_get_rate(ndev); - mc->multicast.ib->rec.hop_limit = 1; - mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); + ib.rec.rate = iboe_get_rate(ndev); + ib.rec.hop_limit = 1; + ib.rec.mtu = iboe_get_mtu(ndev->mtu); if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; if (!send_only) { - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, + err = cma_igmp_send(ndev, &ib.rec.mgid, true); } } @@ -4565,24 +4548,22 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !mc->multicast.ib->rec.mtu) { + if (err || !ib.rec.mtu) { if (!err) err = -EINVAL; - goto out2; + goto err_free; } rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, - &mc->multicast.ib->rec.port_gid); + &ib.rec.port_gid); work->id = id_priv; - work->mc = mc; - INIT_WORK(&work->work, iboe_mcast_work_handler); - kref_get(&mc->mcref); + INIT_WORK(&work->work, cma_work_handler); + cma_make_mc_event(0, id_priv, &ib, &work->event, mc); + /* Balances with cma_id_put() in cma_work_handler */ + cma_id_get(id_priv); queue_work(cma_wq, &work->work); - return 0; -out2: - kfree(mc->multicast.ib); -out1: +err_free: kfree(work); return err; } @@ -4604,7 +4585,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; - mc = kmalloc(sizeof *mc, GFP_KERNEL); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; @@ -4614,7 +4595,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, mc->join_state = join_state; if (rdma_protocol_roce(id->device, id->port_num)) { - kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); if (ret) goto out_err; -- cgit v1.2.3-59-g8ed1b From 60aaeffa367c2ac72cac96edfd10452c613882f5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 22 Sep 2020 12:11:02 +0300 Subject: RDMA/cma: Delete from restrack DB after successful destroy Update the code to have similar destroy pattern like other IB objects. This change create asymmetry to the rdma_id_private create flow to make sure that memory is managed by restrack. Link: https://lore.kernel.org/r/20200922091106.2152715-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 22bd892c4aa8..22f0b7ccdd6c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1804,7 +1804,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, { cma_cancel_operation(id_priv, state); - rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) @@ -1830,6 +1829,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); put_net(id_priv->id.route.addr.dev_addr.net); + rdma_restrack_del(&id_priv->res); kfree(id_priv); } @@ -3721,7 +3721,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) cma_release_dev(id_priv); err1: -- cgit v1.2.3-59-g8ed1b From 13ef5539def732dc7b9c58c320d97a0a95b52634 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 22 Sep 2020 12:11:04 +0300 Subject: RDMA/restrack: Count references to the verbs objects Refactor the restrack code to make sure the kref inside the restrack entry properly kref's the object in which it is embedded. This slight change is needed for future conversions of MR and QP which are refcounted before the release and kfree. The ideal flow from ib_core perspective as follows: * Allocate ib_* structure with rdma_zalloc_*. * Set everything that is known to ib_core to that newly created object. * Initialize kref with restrack help * Call to driver specific allocation functions. * Insert into restrack DB .... * Return and release restrack with restrack_put. Largely this means a rdma_restrack_new() should be called near allocating the containing structure. Link: https://lore.kernel.org/r/20200922091106.2152715-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 5 ++-- drivers/infiniband/core/core_priv.h | 3 +- drivers/infiniband/core/counters.c | 6 ++-- drivers/infiniband/core/cq.c | 7 ++--- drivers/infiniband/core/restrack.c | 42 ++++++++++++++++++++------- drivers/infiniband/core/restrack.h | 3 ++ drivers/infiniband/core/uverbs_cmd.c | 13 ++++++--- drivers/infiniband/core/uverbs_std_types_cq.c | 4 ++- drivers/infiniband/core/verbs.c | 18 +++++++----- include/rdma/restrack.h | 7 ----- 10 files changed, 70 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 22f0b7ccdd6c..568ad223d40f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -833,8 +833,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, if (!id_priv) return ERR_PTR(-ENOMEM); - rdma_restrack_set_task(&id_priv->res, caller); - id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; @@ -854,6 +852,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); + rdma_restrack_set_task(&id_priv->res, caller); + return &id_priv->id; } EXPORT_SYMBOL(__rdma_create_id); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1e6a67b2c4a..2ad276cd9781 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -44,6 +44,7 @@ #include #include #include "mad_priv.h" +#include "restrack.h" /* Total number of ports combined across all struct ib_devices's */ #define RDMA_MAX_PORTS 8192 @@ -352,6 +353,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->rdma_mrs); INIT_LIST_HEAD(&qp->sig_mrs); + rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, @@ -359,7 +361,6 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, */ is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - qp->res.type = RDMA_RESTRACK_QP; if (uobj) rdma_restrack_uadd(&qp->res); else diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 636166880442..c059de99d19c 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -80,8 +80,9 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, counter->device = dev; counter->port = port; - counter->res.type = RDMA_RESTRACK_COUNTER; - counter->stats = dev->ops.counter_alloc_stats(counter); + + rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); + counter->stats = dev->ops.counter_alloc_stats(counter); if (!counter->stats) goto err_stats; @@ -107,6 +108,7 @@ err_mode: mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: + rdma_restrack_put(&counter->res); kfree(counter); return NULL; } diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 19e36e52181b..2c3ff8dda717 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -235,15 +235,13 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, if (!cq->wc) goto out_free_cq; - cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); rdma_restrack_set_task(&cq->res, caller); ret = dev->ops.create_cq(cq, &cq_attr, NULL); if (ret) goto out_free_wc; - rdma_restrack_kadd(&cq->res); - rdma_dim_init(cq); switch (cq->poll_ctx) { @@ -269,14 +267,15 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, goto out_destroy_cq; } + rdma_restrack_kadd(&cq->res); trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; out_destroy_cq: rdma_dim_destroy(cq); - rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, NULL); out_free_wc: + rdma_restrack_put(&cq->res); kfree(cq->wc); out_free_cq: kfree(cq); diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 62fbb0ae9cb4..22c658e8c157 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -202,6 +202,21 @@ void rdma_restrack_attach_task(struct rdma_restrack_entry *res, res->task = task; } +/** + * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface + * to release memory in fully automatic way. + * @res - Entry to initialize + * @type - REstrack type + */ +void rdma_restrack_new(struct rdma_restrack_entry *res, + enum rdma_restrack_type type) +{ + kref_init(&res->kref); + init_completion(&res->comp); + res->type = type; +} +EXPORT_SYMBOL(rdma_restrack_new); + static void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); @@ -213,8 +228,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; - kref_init(&res->kref); - init_completion(&res->comp); if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); @@ -305,6 +318,10 @@ static void restrack_release(struct kref *kref) struct rdma_restrack_entry *res; res = container_of(kref, struct rdma_restrack_entry, kref); + if (res->task) { + put_task_struct(res->task); + res->task = NULL; + } complete(&res->comp); } @@ -314,14 +331,23 @@ int rdma_restrack_put(struct rdma_restrack_entry *res) } EXPORT_SYMBOL(rdma_restrack_put); +/** + * rdma_restrack_del() - delete object from the reource tracking database + * @res: resource entry + */ void rdma_restrack_del(struct rdma_restrack_entry *res) { struct rdma_restrack_entry *old; struct rdma_restrack_root *rt; struct ib_device *dev; - if (!res->valid) - goto out; + if (!res->valid) { + if (res->task) { + put_task_struct(res->task); + res->task = NULL; + } + return; + } dev = res_to_dev(res); if (WARN_ON(!dev)) @@ -330,16 +356,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; old = xa_erase(&rt->xa, res->id); + if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) + return; WARN_ON(old != res); res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); - -out: - if (res->task) { - put_task_struct(res->task); - res->task = NULL; - } } EXPORT_SYMBOL(rdma_restrack_del); diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index d084e5f89849..16006a5e7408 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -25,6 +25,9 @@ struct rdma_restrack_root { int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); +void rdma_restrack_del(struct rdma_restrack_entry *res); +void rdma_restrack_new(struct rdma_restrack_entry *res, + enum rdma_restrack_type type); void rdma_restrack_attach_task(struct rdma_restrack_entry *res, struct task_struct *task); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e5c0784e151d..6ddd26548da8 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -218,10 +218,11 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) if (!ucontext) return -ENOMEM; - ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; ucontext->ufile = ufile; xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + + rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX); attrs->context = ucontext; return 0; } @@ -313,6 +314,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) err_uobj: rdma_alloc_abort_uobject(uobj, attrs, false); err_ucontext: + rdma_restrack_put(&attrs->context->res); kfree(attrs->context); attrs->context = NULL; return ret; @@ -439,8 +441,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->device = ib_dev; pd->uobject = uobj; atomic_set(&pd->usecnt, 0); - pd->res.type = RDMA_RESTRACK_PD; + rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; @@ -453,6 +455,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) return uverbs_response(attrs, &resp, sizeof(resp)); err_alloc: + rdma_restrack_put(&pd->res); kfree(pd); err: uobj_alloc_abort(uobj, attrs); @@ -742,8 +745,9 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); - mr->res.type = RDMA_RESTRACK_MR; mr->iova = cmd.hca_va; + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -1002,8 +1006,8 @@ static int create_cq(struct uverbs_attr_bundle *attrs, cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; @@ -1021,6 +1025,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs, return uverbs_response(attrs, &resp, sizeof(resp)); err_free: + rdma_restrack_put(&cq->res); kfree(cq); err_file: if (ev_file) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index b1c7dacc02de..3a5fd6c9ba72 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -33,6 +33,7 @@ #include #include "rdma_core.h" #include "uverbs.h" +#include "restrack.h" static int uverbs_free_cq(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -123,8 +124,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; @@ -139,6 +140,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return ret; err_free: + rdma_restrack_put(&cq->res); kfree(cq); err_event_file: if (obj->uevent.event_file) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d0c3767b0f9f..2607c410b09f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -272,11 +272,12 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, atomic_set(&pd->usecnt, 0); pd->flags = flags; - pd->res.type = RDMA_RESTRACK_PD; + rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); rdma_restrack_set_task(&pd->res, caller); ret = device->ops.alloc_pd(pd, NULL); if (ret) { + rdma_restrack_put(&pd->res); kfree(pd); return ERR_PTR(ret); } @@ -1996,11 +1997,13 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, cq->event_handler = event_handler; cq->cq_context = cq_context; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; + + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); rdma_restrack_set_task(&cq->res, caller); ret = device->ops.create_cq(cq, cq_attr, NULL); if (ret) { + rdma_restrack_put(&cq->res); kfree(cq); return ERR_PTR(ret); } @@ -2076,7 +2079,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); - mr->res.type = RDMA_RESTRACK_MR; + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_kadd(&mr->res); return mr; @@ -2156,11 +2160,11 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); mr->type = mr_type; mr->sig_attrs = NULL; + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_kadd(&mr->res); out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; @@ -2216,11 +2220,11 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_kadd(&mr->res); out: trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 7682d1bcf789..d7c166237939 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -110,13 +110,6 @@ int rdma_restrack_count(struct ib_device *dev, void rdma_restrack_kadd(struct rdma_restrack_entry *res); void rdma_restrack_uadd(struct rdma_restrack_entry *res); -/** - * rdma_restrack_del() - delete object from the reource tracking database - * @res: resource entry - * @type: actual type of object to operate - */ -void rdma_restrack_del(struct rdma_restrack_entry *res); - /** * rdma_is_kernel_res() - check the owner of resource * @res: resource entry -- cgit v1.2.3-59-g8ed1b From c34a23c28c6b0045b1f21649de30f68da72547af Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 22 Sep 2020 12:11:05 +0300 Subject: RDMA/restrack: Simplify restrack tracking in kernel flows Have a single rdma_restrack_add() that adds an entry, there is no reason to split the user/kernel here, the rdma_restrack_set_task() is responsible for this difference. This patch prepares the code to the future requirement of making restrack is mandatory for managing ib objects. Link: https://lore.kernel.org/r/20200922091106.2152715-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/core_priv.h | 6 +++-- drivers/infiniband/core/counters.c | 2 +- drivers/infiniband/core/cq.c | 2 +- drivers/infiniband/core/restrack.c | 46 +++++-------------------------------- drivers/infiniband/core/restrack.h | 1 + drivers/infiniband/core/verbs.c | 13 +++++++---- include/rdma/restrack.h | 1 - 8 files changed, 22 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 568ad223d40f..99a8d61bcbb2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -454,7 +454,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); if (id_priv->res.kern_name) - rdma_restrack_kadd(&id_priv->res); + rdma_restrack_add(&id_priv->res); else rdma_restrack_uadd(&id_priv->res); trace_cm_id_attach(id_priv, cma_dev->device); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 2ad276cd9781..cf5a50cefa39 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -363,8 +363,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { if (uobj) rdma_restrack_uadd(&qp->res); - else - rdma_restrack_kadd(&qp->res); + else { + rdma_restrack_set_task(&qp->res, pd->res.kern_name); + rdma_restrack_add(&qp->res); + } } else qp->res.valid = false; diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index c059de99d19c..e13c500a9ec0 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -252,7 +252,7 @@ static void rdma_counter_res_add(struct rdma_counter *counter, { if (rdma_is_kernel_res(&qp->res)) { rdma_restrack_set_task(&counter->res, qp->res.kern_name); - rdma_restrack_kadd(&counter->res); + rdma_restrack_add(&counter->res); } else { rdma_restrack_attach_task(&counter->res, qp->res.task); rdma_restrack_uadd(&counter->res); diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 2c3ff8dda717..620cf7c2696d 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -267,7 +267,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, goto out_destroy_cq; } - rdma_restrack_kadd(&cq->res); + rdma_restrack_add(&cq->res); trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 22c658e8c157..88d3852676a9 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -123,32 +123,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) } EXPORT_SYMBOL(rdma_restrack_count); -static void set_kern_name(struct rdma_restrack_entry *res) -{ - struct ib_pd *pd; - - switch (res->type) { - case RDMA_RESTRACK_QP: - pd = container_of(res, struct ib_qp, res)->pd; - if (!pd) { - WARN_ONCE(true, "XRC QPs are not supported\n"); - /* Survive, despite the programmer's error */ - res->kern_name = " "; - } - break; - case RDMA_RESTRACK_MR: - pd = container_of(res, struct ib_mr, res)->pd; - break; - default: - /* Other types set kern_name directly */ - pd = NULL; - break; - } - - if (pd) - res->kern_name = pd->res.kern_name; -} - static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) { switch (res->type) { @@ -217,7 +191,11 @@ void rdma_restrack_new(struct rdma_restrack_entry *res, } EXPORT_SYMBOL(rdma_restrack_new); -static void rdma_restrack_add(struct rdma_restrack_entry *res) +/** + * rdma_restrack_add() - add object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; @@ -249,19 +227,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) if (!ret) res->valid = true; } - -/** - * rdma_restrack_kadd() - add kernel object to the reource tracking database - * @res: resource entry - */ -void rdma_restrack_kadd(struct rdma_restrack_entry *res) -{ - res->task = NULL; - set_kern_name(res); - res->user = false; - rdma_restrack_add(res); -} -EXPORT_SYMBOL(rdma_restrack_kadd); +EXPORT_SYMBOL(rdma_restrack_add); /** * rdma_restrack_uadd() - add user object to the reource tracking database diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index 16006a5e7408..d35c4c41d2ff 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -25,6 +25,7 @@ struct rdma_restrack_root { int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); +void rdma_restrack_add(struct rdma_restrack_entry *res); void rdma_restrack_del(struct rdma_restrack_entry *res); void rdma_restrack_new(struct rdma_restrack_entry *res, enum rdma_restrack_type type); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2607c410b09f..8117e551a866 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -281,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, kfree(pd); return ERR_PTR(ret); } - rdma_restrack_kadd(&pd->res); + rdma_restrack_add(&pd->res); if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; @@ -2008,7 +2008,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, return ERR_PTR(ret); } - rdma_restrack_kadd(&cq->res); + rdma_restrack_add(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); @@ -2081,7 +2081,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, atomic_inc(&pd->usecnt); rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_kadd(&mr->res); + rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_add(&mr->res); return mr; } @@ -2164,7 +2165,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig_attrs = NULL; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_kadd(&mr->res); + rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_add(&mr->res); out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; @@ -2224,7 +2226,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->sig_attrs = sig_attrs; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_kadd(&mr->res); + rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_add(&mr->res); out: trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d7c166237939..db59e208f5e8 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -107,7 +107,6 @@ struct rdma_restrack_entry { int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type); -void rdma_restrack_kadd(struct rdma_restrack_entry *res); void rdma_restrack_uadd(struct rdma_restrack_entry *res); /** -- cgit v1.2.3-59-g8ed1b From b09c4d70122091c1865cb63a9c4dad1a94a8e339 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 22 Sep 2020 12:11:06 +0300 Subject: RDMA/restrack: Improve readability in task name management Use rdma_restrack_set_name() and rdma_restrack_parent_name() instead of tricky uses of rdma_restrack_attach_task()/rdma_restrack_uadd(). This uniformly makes all restracks add'd using rdma_restrack_add(). Link: https://lore.kernel.org/r/20200922091106.2152715-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 134 +++++++++++++++++--------- drivers/infiniband/core/core_priv.h | 12 +-- drivers/infiniband/core/counters.c | 9 +- drivers/infiniband/core/cq.c | 2 +- drivers/infiniband/core/restrack.c | 73 +++++++------- drivers/infiniband/core/restrack.h | 6 +- drivers/infiniband/core/ucma.c | 7 +- drivers/infiniband/core/uverbs_cmd.c | 14 ++- drivers/infiniband/core/uverbs_std_types_cq.c | 4 +- drivers/infiniband/core/verbs.c | 10 +- include/rdma/rdma_cm.h | 47 +++------ include/rdma/restrack.h | 13 +-- 12 files changed, 172 insertions(+), 159 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 99a8d61bcbb2..6419b798cd2e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,10 +453,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - if (id_priv->res.kern_name) - rdma_restrack_add(&id_priv->res); - else - rdma_restrack_uadd(&id_priv->res); + rdma_restrack_add(&id_priv->res); + trace_cm_id_attach(id_priv, cma_dev->device); } @@ -822,10 +820,10 @@ static void cma_id_put(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -struct rdma_cm_id *__rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_ucm_port_space ps, - enum ib_qp_type qp_type, const char *caller) +static struct rdma_id_private * +__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const struct rdma_id_private *parent) { struct rdma_id_private *id_priv; @@ -853,11 +851,44 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->seq_num &= 0x00ffffff; rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); - rdma_restrack_set_task(&id_priv->res, caller); + if (parent) + rdma_restrack_parent_name(&id_priv->res, &parent->res); - return &id_priv->id; + return id_priv; +} + +struct rdma_cm_id * +__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const char *caller) +{ + struct rdma_id_private *ret; + + ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL); + if (IS_ERR(ret)) + return ERR_CAST(ret); + + rdma_restrack_set_name(&ret->res, caller); + return &ret->id; +} +EXPORT_SYMBOL(__rdma_create_kernel_id); + +struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, + void *context, + enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type) +{ + struct rdma_id_private *ret; + + ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context, + ps, qp_type, NULL); + if (IS_ERR(ret)) + return ERR_CAST(ret); + + rdma_restrack_set_name(&ret->res, NULL); + return &ret->id; } -EXPORT_SYMBOL(__rdma_create_id); +EXPORT_SYMBOL(rdma_create_user_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { @@ -2029,14 +2060,15 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, int ret; listen_id_priv = container_of(listen_id, struct rdma_id_private, id); - id = __rdma_create_id(listen_id->route.addr.dev_addr.net, - listen_id->event_handler, listen_id->context, - listen_id->ps, ib_event->param.req_rcvd.qp_type, - listen_id_priv->res.kern_name); - if (IS_ERR(id)) + id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, + listen_id->ps, + ib_event->param.req_rcvd.qp_type, + listen_id_priv); + if (IS_ERR(id_priv)) return NULL; - id_priv = container_of(id, struct rdma_id_private, id); + id = &id_priv->id; if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, service_id)) @@ -2090,13 +2122,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, int ret; listen_id_priv = container_of(listen_id, struct rdma_id_private, id); - id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, - listen_id->ps, IB_QPT_UD, - listen_id_priv->res.kern_name); - if (IS_ERR(id)) + id_priv = __rdma_create_id(net, listen_id->event_handler, + listen_id->context, listen_id->ps, IB_QPT_UD, + listen_id_priv); + if (IS_ERR(id_priv)) return NULL; - id_priv = container_of(id, struct rdma_id_private, id); + id = &id_priv->id; if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, @@ -2332,7 +2364,6 @@ out: static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { - struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event = {}; int ret = -ECONNABORTED; @@ -2352,16 +2383,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, goto out; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, - listen_id->id.event_handler, - listen_id->id.context, - RDMA_PS_TCP, IB_QPT_RC, - listen_id->res.kern_name); - if (IS_ERR(new_cm_id)) { + conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, + listen_id->id.context, RDMA_PS_TCP, + IB_QPT_RC, listen_id); + if (IS_ERR(conn_id)) { ret = -ENOMEM; goto out; } - conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; @@ -2466,7 +2495,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; - struct rdma_cm_id *id; struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; @@ -2475,13 +2503,12 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, - id_priv->id.qp_type, id_priv->res.kern_name); - if (IS_ERR(id)) + dev_id_priv = + __rdma_create_id(net, cma_listen_handler, id_priv, + id_priv->id.ps, id_priv->id.qp_type, id_priv); + if (IS_ERR(dev_id_priv)) return; - dev_id_priv = container_of(id, struct rdma_id_private, id); - dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); @@ -2494,7 +2521,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv->tos_set = id_priv->tos_set; dev_id_priv->tos = id_priv->tos; - ret = rdma_listen(id, id_priv->backlog); + ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) dev_warn(&cma_dev->device->dev, "RDMA CMA: cma_listen_on_dev, error %d\n", ret); @@ -4149,8 +4176,25 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } -int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller) +/** + * rdma_accept - Called to accept a connection request or response. + * @id: Connection identifier associated with the request. + * @conn_param: Information needed to establish the connection. This must be + * provided if accepting a connection request. If accepting a connection + * response, this parameter must be NULL. + * + * Typically, this routine is only called by the listener to accept a connection + * request. It must also be called on the active side of a connection if the + * user is performing their own QP transitions. + * + * In the case of error, a reject message is sent to the remote side and the + * state of the qp associated with the id is modified to error, such that any + * previously posted receive buffers would be flushed. + * + * This function is for use by kernel ULPs and must be called from under the + * handler callback. + */ +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv = container_of(id, struct rdma_id_private, id); @@ -4158,8 +4202,6 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, lockdep_assert_held(&id_priv->handler_mutex); - rdma_restrack_set_task(&id_priv->res, caller); - if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) return -EINVAL; @@ -4198,10 +4240,10 @@ reject: rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } -EXPORT_SYMBOL(__rdma_accept); +EXPORT_SYMBOL(rdma_accept); -int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller, struct rdma_ucm_ece *ece) +int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece) { struct rdma_id_private *id_priv = container_of(id, struct rdma_id_private, id); @@ -4209,9 +4251,9 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, id_priv->ece.vendor_id = ece->vendor_id; id_priv->ece.attr_mod = ece->attr_mod; - return __rdma_accept(id, conn_param, caller); + return rdma_accept(id, conn_param); } -EXPORT_SYMBOL(__rdma_accept_ece); +EXPORT_SYMBOL(rdma_accept_ece); void rdma_lock_handler(struct rdma_cm_id *id) { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cf5a50cefa39..e84b0fedaacb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -361,15 +361,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, */ is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - if (uobj) - rdma_restrack_uadd(&qp->res); - else { - rdma_restrack_set_task(&qp->res, pd->res.kern_name); - rdma_restrack_add(&qp->res); - } - } else - qp->res.valid = false; - + rdma_restrack_parent_name(&qp->res, &pd->res); + rdma_restrack_add(&qp->res); + } return qp; } diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index e13c500a9ec0..e4ff0d3328b6 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -250,13 +250,8 @@ next: static void rdma_counter_res_add(struct rdma_counter *counter, struct ib_qp *qp) { - if (rdma_is_kernel_res(&qp->res)) { - rdma_restrack_set_task(&counter->res, qp->res.kern_name); - rdma_restrack_add(&counter->res); - } else { - rdma_restrack_attach_task(&counter->res, qp->res.task); - rdma_restrack_uadd(&counter->res); - } + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); } static void counter_release(struct kref *kref) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 620cf7c2696d..12ebacf52958 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -236,7 +236,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, goto out_free_cq; rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); - rdma_restrack_set_task(&cq->res, caller); + rdma_restrack_set_name(&cq->res, caller); ret = dev->ops.create_cq(cq, &cq_attr, NULL); if (ret) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 88d3852676a9..4aeeaaed0f17 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -147,34 +147,56 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) } } -void rdma_restrack_set_task(struct rdma_restrack_entry *res, - const char *caller) +/** + * rdma_restrack_attach_task() - attach the task onto this resource, + * valid for user space restrack entries. + * @res: resource entry + * @task: the task to attach + */ +static void rdma_restrack_attach_task(struct rdma_restrack_entry *res, + struct task_struct *task) { - if (caller) { - res->kern_name = caller; + if (WARN_ON_ONCE(!task)) return; - } if (res->task) put_task_struct(res->task); - get_task_struct(current); - res->task = current; + get_task_struct(task); + res->task = task; + res->user = true; } -EXPORT_SYMBOL(rdma_restrack_set_task); /** - * rdma_restrack_attach_task() - attach the task onto this resource + * rdma_restrack_set_name() - set the task for this resource * @res: resource entry - * @task: the task to attach, the current task will be used if it is NULL. + * @caller: kernel name, the current task will be used if the caller is NULL. */ -void rdma_restrack_attach_task(struct rdma_restrack_entry *res, - struct task_struct *task) +void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller) { - if (res->task) - put_task_struct(res->task); - get_task_struct(task); - res->task = task; + if (caller) { + res->kern_name = caller; + return; + } + + rdma_restrack_attach_task(res, current); +} +EXPORT_SYMBOL(rdma_restrack_set_name); + +/** + * rdma_restrack_parent_name() - set the restrack name properties based + * on parent restrack + * @dst: destination resource entry + * @parent: parent resource entry + */ +void rdma_restrack_parent_name(struct rdma_restrack_entry *dst, + const struct rdma_restrack_entry *parent) +{ + if (rdma_is_kernel_res(parent)) + dst->kern_name = parent->kern_name; + else + rdma_restrack_attach_task(dst, parent->task); } +EXPORT_SYMBOL(rdma_restrack_parent_name); /** * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface @@ -229,25 +251,6 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) } EXPORT_SYMBOL(rdma_restrack_add); -/** - * rdma_restrack_uadd() - add user object to the reource tracking database - * @res: resource entry - */ -void rdma_restrack_uadd(struct rdma_restrack_entry *res) -{ - if ((res->type != RDMA_RESTRACK_CM_ID) && - (res->type != RDMA_RESTRACK_COUNTER)) - res->task = NULL; - - if (!res->task) - rdma_restrack_set_task(res, NULL); - res->kern_name = NULL; - - res->user = true; - rdma_restrack_add(res); -} -EXPORT_SYMBOL(rdma_restrack_uadd); - int __must_check rdma_restrack_get(struct rdma_restrack_entry *res) { return kref_get_unless_zero(&res->kref); diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index d35c4c41d2ff..6a04fc41f738 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -29,6 +29,8 @@ void rdma_restrack_add(struct rdma_restrack_entry *res); void rdma_restrack_del(struct rdma_restrack_entry *res); void rdma_restrack_new(struct rdma_restrack_entry *res, enum rdma_restrack_type type); -void rdma_restrack_attach_task(struct rdma_restrack_entry *res, - struct task_struct *task); +void rdma_restrack_set_name(struct rdma_restrack_entry *res, + const char *caller); +void rdma_restrack_parent_name(struct rdma_restrack_entry *dst, + const struct rdma_restrack_entry *parent); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b3a7dbb12f25..08a628246bd6 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -456,8 +456,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - cm_id = __rdma_create_id(current->nsproxy->net_ns, - ucma_event_handler, ctx, cmd.ps, qp_type, NULL); + cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); goto err1; @@ -1126,7 +1125,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&ctx->mutex); rdma_lock_handler(ctx->cm_id); - ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece); + ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); if (!ret) { /* The uid must be set atomically with the handler */ ctx->uid = cmd.uid; @@ -1136,7 +1135,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, } else { mutex_lock(&ctx->mutex); rdma_lock_handler(ctx->cm_id); - ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece); + ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); rdma_unlock_handler(ctx->cm_id); mutex_unlock(&ctx->mutex); } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6ddd26548da8..7b8d6b3409d5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -223,6 +223,7 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX); + rdma_restrack_set_name(&ucontext->res, NULL); attrs->context = ucontext; return 0; } @@ -251,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs) if (ret) goto err_uncharge; - rdma_restrack_uadd(&ucontext->res); + rdma_restrack_add(&ucontext->res); /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update @@ -443,10 +444,12 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) atomic_set(&pd->usecnt, 0); rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); + rdma_restrack_set_name(&pd->res, NULL); + ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; - rdma_restrack_uadd(&pd->res); + rdma_restrack_add(&pd->res); uobj->object = pd; uobj_finalize_uobj_create(uobj, attrs); @@ -748,7 +751,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->iova = cmd.hca_va; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_uadd(&mr->res); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uobj_put_obj_read(pd); @@ -1008,10 +1012,12 @@ static int create_cq(struct uverbs_attr_bundle *attrs, atomic_set(&cq->usecnt, 0); rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, NULL); + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; - rdma_restrack_uadd(&cq->res); + rdma_restrack_add(&cq->res); obj->uevent.uobject.object = cq; obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 3a5fd6c9ba72..8dabd05988b2 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -126,13 +126,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( atomic_set(&cq->usecnt, 0); rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, NULL); + ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; obj->uevent.uobject.object = cq; obj->uevent.uobject.user_handle = user_handle; - rdma_restrack_uadd(&cq->res); + rdma_restrack_add(&cq->res); uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 8117e551a866..53dd8284260a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -273,7 +273,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->flags = flags; rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); - rdma_restrack_set_task(&pd->res, caller); + rdma_restrack_set_name(&pd->res, caller); ret = device->ops.alloc_pd(pd, NULL); if (ret) { @@ -1999,7 +1999,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, atomic_set(&cq->usecnt, 0); rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); - rdma_restrack_set_task(&cq->res, caller); + rdma_restrack_set_name(&cq->res, caller); ret = device->ops.create_cq(cq, cq_attr, NULL); if (ret) { @@ -2081,7 +2081,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, atomic_inc(&pd->usecnt); rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); return mr; @@ -2165,7 +2165,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig_attrs = NULL; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); @@ -2226,7 +2226,7 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->sig_attrs = sig_attrs; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); - rdma_restrack_set_task(&mr->res, pd->res.kern_name); + rdma_restrack_parent_name(&mr->res, &pd->res); rdma_restrack_add(&mr->res); out: trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c1334c9a7aa8..c672ae1da26b 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -110,11 +110,14 @@ struct rdma_cm_id { u8 port_num; }; -struct rdma_cm_id *__rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_ucm_port_space ps, - enum ib_qp_type qp_type, - const char *caller); +struct rdma_cm_id * +__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const char *caller); +struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, + void *context, + enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type); /** * rdma_create_id - Create an RDMA identifier. @@ -132,9 +135,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, * The event handler callback serializes on the id's mutex and is * allowed to sleep. */ -#define rdma_create_id(net, event_handler, context, ps, qp_type) \ - __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ - KBUILD_MODNAME) +#define rdma_create_id(net, event_handler, context, ps, qp_type) \ + __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \ + KBUILD_MODNAME) /** * rdma_destroy_id - Destroys an RDMA identifier. @@ -250,34 +253,12 @@ int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, */ int rdma_listen(struct rdma_cm_id *id, int backlog); -int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller); +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); void rdma_lock_handler(struct rdma_cm_id *id); void rdma_unlock_handler(struct rdma_cm_id *id); -int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller, struct rdma_ucm_ece *ece); - -/** - * rdma_accept - Called to accept a connection request or response. - * @id: Connection identifier associated with the request. - * @conn_param: Information needed to establish the connection. This must be - * provided if accepting a connection request. If accepting a connection - * response, this parameter must be NULL. - * - * Typically, this routine is only called by the listener to accept a connection - * request. It must also be called on the active side of a connection if the - * user is performing their own QP transitions. - * - * In the case of error, a reject message is sent to the remote side and the - * state of the qp associated with the id is modified to error, such that any - * previously posted receive buffers would be flushed. - * - * This function is for use by kernel ULPs and must be called from under the - * handler callback. - */ -#define rdma_accept(id, conn_param) \ - __rdma_accept((id), (conn_param), KBUILD_MODNAME) +int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece); /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index db59e208f5e8..d3a1cc5be7bc 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -106,14 +106,11 @@ struct rdma_restrack_entry { int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type); - -void rdma_restrack_uadd(struct rdma_restrack_entry *res); - /** * rdma_is_kernel_res() - check the owner of resource * @res: resource entry */ -static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res) +static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res) { return !res->user; } @@ -130,14 +127,6 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res); */ int rdma_restrack_put(struct rdma_restrack_entry *res); -/** - * rdma_restrack_set_task() - set the task for this resource - * @res: resource entry - * @caller: kernel name, the current task will be used if the caller is NULL. - */ -void rdma_restrack_set_task(struct rdma_restrack_entry *res, - const char *caller); - /* * Helper functions for rdma drivers when filling out * nldev driver attributes. -- cgit v1.2.3-59-g8ed1b From 1c15b4f2a42ff6697767c22c8ff5f9bcc22fdbe5 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Wed, 23 Sep 2020 19:50:13 +0300 Subject: RDMA/core: Modify enum ib_gid_type and enum rdma_network_type Separate IB_GID_TYPE_IB and IB_GID_TYPE_ROCE to two different values, so enum ib_gid_type will match the gid types of the new query GID table API which will be introduced in the following patches. This change in enum ib_gid_type requires to change also enum rdma_network_type by separating RDMA_NETWORK_IB and RDMA_NETWORK_ROCE_V1 values. Link: https://lore.kernel.org/r/20200923165015.2491894-3-leon@kernel.org Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 4 ++++ drivers/infiniband/core/cma.c | 4 ++++ drivers/infiniband/core/cma_configfs.c | 9 +++++---- drivers/infiniband/core/verbs.c | 2 +- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.c | 4 +++- include/rdma/ib_verbs.h | 17 ++++++++++------- 8 files changed, 30 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/core/cma.c') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 6079f1f7e678..cf49ac0b0aa6 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) } static const char * const gid_type_str[] = { + /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for + * user space compatibility reasons. + */ [IB_GID_TYPE_IB] = "IB/RoCE v1", + [IB_GID_TYPE_ROCE] = "IB/RoCE v1", [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", }; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6419b798cd2e..09a844755882 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -304,6 +304,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev, if (!rdma_is_port_valid(cma_dev->device, port)) return -EINVAL; + if (default_gid_type == IB_GID_TYPE_IB && + rdma_protocol_roce_eth_encap(cma_dev->device, port)) + default_gid_type = IB_GID_TYPE_ROCE; + supported_gids = roce_gid_type_mask_support(cma_dev->device, port); if (!(supported_gids & 1 << default_gid_type)) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 3c1e2ca564fe..7ec4af2ed87a 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -123,16 +123,17 @@ static ssize_t default_roce_mode_store(struct config_item *item, { struct cma_device *cma_dev; struct cma_dev_port_group *group; - int gid_type = ib_cache_gid_parse_type_str(buf); + int gid_type; ssize_t ret; - if (gid_type < 0) - return -EINVAL; - ret = cma_configfs_params_get(item, &cma_dev, &group); if (ret) return ret; + gid_type = ib_cache_gid_parse_type_str(buf); + if (gid_type < 0) + return -EINVAL; + ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); cma_configfs_params_put(cma_dev); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 53dd8284260a..740f8454b6b4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -733,7 +733,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, (struct in6_addr *)dgid); return 0; } else if (net_type == RDMA_NETWORK_IPV6 || - net_type == RDMA_NETWORK_IB) { + net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) { *dgid = hdr->ibgrh.dgid; *sgid = hdr->ibgrh.sgid; return 0; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 35e5bbb44d3d..fb62f1d04afa 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -255,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (roce_packet_type) { case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: - wc->network_hdr_type = RDMA_NETWORK_IB; + wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: wc->network_hdr_type = RDMA_NETWORK_IPV6; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index db602ee8f730..7082172b5b61 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -546,7 +546,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { - enum ib_gid_type gid_type = IB_GID_TYPE_IB; + enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; @@ -561,7 +561,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } switch (gid_type) { - case IB_GID_TYPE_IB: + case IB_GID_TYPE_ROCE: roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index b5603b3ed6a4..019642ff24a7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1157,7 +1157,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, SET_FIELD(qp_params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; - case RDMA_NETWORK_IB: + case RDMA_NETWORK_ROCE_V1: memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], @@ -1177,6 +1177,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); qp_params->roce_mode = ROCE_V2_IPV4; break; + default: + return -EINVAL; } for (i = 0; i < 4; i++) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5ad997346f7f..3b61fba531d0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -138,10 +138,9 @@ union ib_gid { extern union ib_gid zgid; enum ib_gid_type { - /* If link layer is Ethernet, this is RoCE V1 */ IB_GID_TYPE_IB = 0, - IB_GID_TYPE_ROCE = 0, - IB_GID_TYPE_ROCE_UDP_ENCAP = 1, + IB_GID_TYPE_ROCE = 1, + IB_GID_TYPE_ROCE_UDP_ENCAP = 2, IB_GID_TYPE_SIZE }; @@ -180,7 +179,7 @@ rdma_node_get_transport(unsigned int node_type); enum rdma_network_type { RDMA_NETWORK_IB, - RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB, + RDMA_NETWORK_ROCE_V1, RDMA_NETWORK_IPV4, RDMA_NETWORK_IPV6 }; @@ -190,9 +189,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net if (network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) return IB_GID_TYPE_ROCE_UDP_ENCAP; - - /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */ - return IB_GID_TYPE_IB; + else if (network_type == RDMA_NETWORK_ROCE_V1) + return IB_GID_TYPE_ROCE; + else + return IB_GID_TYPE_IB; } static inline enum rdma_network_type @@ -201,6 +201,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr) if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; + if (attr->gid_type == IB_GID_TYPE_ROCE) + return RDMA_NETWORK_ROCE_V1; + if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else -- cgit v1.2.3-59-g8ed1b