aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/qedr/verbs.c
diff options
context:
space:
mode:
authorMichal Kalderon <michal.kalderon@marvell.com>2019-10-27 22:04:50 +0200
committerJason Gunthorpe <jgg@mellanox.com>2019-10-28 14:01:35 -0300
commit82af6d19d8d9227c22a53ff00b40fb2a4f9fce69 (patch)
tree35750c8f4b4ed206967b9f19edc51a71e4172226 /drivers/infiniband/hw/qedr/verbs.c
parentRDMA/qedr: Fix qpids xarray api used (diff)
downloadlinux-dev-82af6d19d8d9227c22a53ff00b40fb2a4f9fce69.tar.xz
linux-dev-82af6d19d8d9227c22a53ff00b40fb2a4f9fce69.zip
RDMA/qedr: Fix synchronization methods and memory leaks in qedr
Re-design of the iWARP CM related objects reference counting and synchronization methods, to ensure operations are synchronized correctly and that memory allocated for "ep" is properly released. Also makes sure QP memory is not released before ep is finished accessing it. Where as the QP object is created/destroyed by external operations, the ep is created/destroyed by internal operations and represents the tcp connection associated with the QP. QP destruction flow: - needs to wait for ep establishment to complete (either successfully or with error) - needs to wait for ep disconnect to be fully posted to avoid a race condition of disconnect being called after reset. - both the operations above don't always happen, so we use atomic flags to indicate whether the qp destruction flow needs to wait for these completions or not, if the destroy is called before these operations began, the flows will check the flags and not execute them ( connect / disconnect). We use completion structure for waiting for the completions mentioned above. The QP refcnt was modified to kref object. The EP has a kref added to it to handle additional worker thread accessing it. Memory Leaks - https://www.spinics.net/lists/linux-rdma/msg83762.html Concurrency not managed correctly - https://www.spinics.net/lists/linux-rdma/msg67949.html Fixes: de0089e692a9 ("RDMA/qedr: Add iWARP connection management qp related callbacks") Link: https://lore.kernel.org/r/20191027200451.28187-4-michal.kalderon@marvell.com Reported-by: Chuck Lever <chuck.lever@oracle.com> Reported-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Ariel Elior <ariel.elior@marvell.com> Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/hw/qedr/verbs.c')
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c62
1 files changed, 39 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 84b666c67cff..a17b388ee3b3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,7 @@
#include "verbs.h"
#include <rdma/qedr-abi.h>
#include "qedr_roce_cm.h"
+#include "qedr_iw_cm.h"
#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
#define RDMA_MAX_SGE_PER_SRQ (4)
@@ -1193,7 +1194,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
spin_lock_init(&qp->q_lock);
- atomic_set(&qp->refcnt, 1);
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ kref_init(&qp->refcnt);
+ init_completion(&qp->iwarp_cm_comp);
+ }
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
@@ -1592,6 +1596,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
+ qp->create_type = QEDR_QP_CREATE_USER;
memset(&ureq, 0, sizeof(ureq));
rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
if (rc) {
@@ -1805,6 +1810,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_entries;
memset(&in_params, 0, sizeof(in_params));
+ qp->create_type = QEDR_QP_CREATE_KERNEL;
/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
* the ring. The ring should allow at least a single WR, even if the
@@ -2437,7 +2443,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
return rc;
}
- if (udata)
+ if (qp->create_type == QEDR_QP_CREATE_USER)
qedr_cleanup_user(dev, qp);
else
qedr_cleanup_kernel(dev, qp);
@@ -2467,34 +2473,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
}
} else {
- /* Wait for the connect/accept to complete */
- if (qp->ep) {
- int wait_count = 1;
-
- while (qp->ep->during_connect) {
- DP_DEBUG(dev, QEDR_MSG_QP,
- "Still in during connect/accept\n");
-
- msleep(100);
- if (wait_count++ > 200) {
- DP_NOTICE(dev,
- "during connect timeout\n");
- break;
- }
- }
- }
+ /* If connection establishment started the WAIT_FOR_CONNECT
+ * bit will be on and we need to Wait for the establishment
+ * to complete before destroying the qp.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
+
+ /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
+ * bit will be on, and we need to wait for the disconnect to
+ * complete before continuing. We can use the same completion,
+ * iwarp_cm_comp, since this is the only place that waits for
+ * this completion and it is sequential. In addition,
+ * disconnect can't occur before the connection is fully
+ * established, therefore if WAIT_FOR_DISCONNECT is on it
+ * means WAIT_FOR_CONNECT is also on and the completion for
+ * CONNECT already occurred.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
}
if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
+ /* We need to remove the entry from the xarray before we release the
+ * qp_id to avoid a race of the qp_id being reallocated and failing
+ * on xa_insert
+ */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ xa_erase(&dev->qps, qp->qp_id);
+
qedr_free_qp_resources(dev, qp, udata);
- if (atomic_dec_and_test(&qp->refcnt) &&
- rdma_protocol_iwarp(&dev->ibdev, 1)) {
- xa_erase(&dev->qps, qp->qp_id);
- kfree(qp);
- }
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_iw_qp_rem_ref(&qp->ibqp);
+
return 0;
}