From 747f4d7a9d1bc07e3f9f22c84201ffb0abee1634 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 12 Apr 2016 10:46:10 -0700 Subject: IB/qib, IB/hfi1: Fix up UD loopback use of irq flags The dual lock patch moved locking around and missed an issue with handling irq flags when processing UD loopback packets. This issue was revealed by smatch. Fix for both qib and hfi1 to pass the saved flags to the UD request builder and handle the changes correctly. Fixes: 46a80d62e6e0 ("IB/qib, staging/rdma/hfi1: add s_hlock for use in post send") Reported-by: Dan Carpenter Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_rc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 4 ++-- drivers/infiniband/hw/qib/qib_uc.c | 2 +- drivers/infiniband/hw/qib/qib_ud.c | 10 +++++----- drivers/infiniband/hw/qib/qib_verbs.h | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 9088e26d3ac8..444028a3582a 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -230,7 +230,7 @@ bail: * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_rc_req(struct rvt_qp *qp) +int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index a5f07a64b228..b67779256297 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -739,7 +739,7 @@ void qib_do_send(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - int (*make_req)(struct rvt_qp *qp); + int (*make_req)(struct rvt_qp *qp, unsigned long *flags); unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || @@ -781,7 +781,7 @@ void qib_do_send(struct rvt_qp *qp) qp->s_hdrwords = 0; spin_lock_irqsave(&qp->s_lock, flags); } - } while (make_req(qp)); + } while (make_req(qp, &flags)); spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 7bdbc79ceaa3..1d61bd04f449 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -45,7 +45,7 @@ * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_uc_req(struct rvt_qp *qp) +int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index d9502137de62..846e6c726df7 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -238,7 +238,7 @@ drop: * * Return 1 if constructed; otherwise, return 0. */ -int qib_make_ud_req(struct rvt_qp *qp) +int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; @@ -294,7 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { - unsigned long flags; + unsigned long tflags = *flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -307,10 +307,10 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; - local_irq_save(flags); - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irqrestore(&qp->s_lock, tflags); qib_ud_loopback(qp, wqe); - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irqsave(&qp->s_lock, tflags); + *flags = tflags; qib_send_complete(qp, wqe, IB_WC_SUCCESS); goto done; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 4b76a8d59337..6888f03c6d61 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -430,11 +430,11 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, void qib_send_rc_ack(struct rvt_qp *qp); -int qib_make_rc_req(struct rvt_qp *qp); +int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags); -int qib_make_uc_req(struct rvt_qp *qp); +int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags); -int qib_make_ud_req(struct rvt_qp *qp); +int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags); int qib_register_ib_device(struct qib_devdata *); -- cgit v1.2.3-59-g8ed1b From ea0e4ce3bcccef360e1aa69d17a210d1221ab80c Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 20 Apr 2016 06:05:24 -0700 Subject: IB/rdmavt,hfi1,qib: Fix memory leak rdi->ports has memory allocated in rvt_alloc_device(), but does not get freed because the hfi1 and qib drivers drivers call ib_dealloc_device() directly instead of going through rdmavt. Add a rvt_dealloc_device() that frees rdi->ports and then calls ib_dealloc_device(). Switch hfi1 and qib drivers to calling rvt_dealloc_device() instead of ib_dealloc_device() directly. Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Jubin John Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_init.c | 4 ++-- drivers/infiniband/sw/rdmavt/vt.c | 13 +++++++++++++ drivers/staging/rdma/hfi1/init.c | 4 ++-- include/rdma/rdma_vt.h | 1 + 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 3f062f0dd9d8..f253111e682e 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1090,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd) qib_dbg_ibdev_exit(&dd->verbs_dev); #endif free_percpu(dd->int_counter); - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); + rvt_dealloc_device(&dd->verbs_dev.rdi); } u64 qib_int_counter(struct qib_devdata *dd) @@ -1183,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) bail: if (!list_empty(&dd->list)) list_del_init(&dd->list); - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); + rvt_dealloc_device(&dd->verbs_dev.rdi); return ERR_PTR(ret); } diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 6caf5272ba1f..e1cc2cc42f25 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -106,6 +106,19 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) } EXPORT_SYMBOL(rvt_alloc_device); +/** + * rvt_dealloc_device - deallocate rdi + * @rdi: structure to free + * + * Free a structure allocated with rvt_alloc_device() + */ +void rvt_dealloc_device(struct rvt_dev_info *rdi) +{ + kfree(rdi->ports); + ib_dealloc_device(&rdi->ibdev); +} +EXPORT_SYMBOL(rvt_dealloc_device); + static int rvt_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index cfcdc16b41c3..00edd500a69a 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1007,7 +1007,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) free_percpu(dd->rcv_limit); hfi1_dev_affinity_free(dd); free_percpu(dd->send_schedule); - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); + rvt_dealloc_device(&dd->verbs_dev.rdi); } /* @@ -1110,7 +1110,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) bail: if (!list_empty(&dd->list)) list_del_init(&dd->list); - ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); + rvt_dealloc_device(&dd->verbs_dev.rdi); return ERR_PTR(ret); } diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index a8696551abb1..d57ceee90d26 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -467,6 +467,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, } struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); +void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); -- cgit v1.2.3-59-g8ed1b From 6b90036587508675b9ef73181c9f0f02894d1588 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:54 -0500 Subject: RDMA/i40iw: Fix overflow of region length Change region_length to u64 as a region can be > 4GB. Signed-off-by: Mustafa Ismail Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_user.h | 2 ++ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index 5cd971bb8cc7..eac95240fbdc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -102,6 +102,8 @@ enum i40iw_device_capabilities_const { #define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8) +#define I40IW_MAX_MR_SIZE 0x10000000000L + struct i40iw_qp_uk; struct i40iw_cq_uk; struct i40iw_srq_uk; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 1fe3b84a06e4..d7c4dd15f1c0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1526,14 +1526,16 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, struct i40iw_mr *iwmr; struct ib_umem *region; struct i40iw_mem_reg_req req; - u32 pbl_depth = 0; + u64 pbl_depth = 0; u32 stag = 0; u16 access; - u32 region_length; + u64 region_length; bool use_pbles = false; unsigned long flags; int err = -ENOSYS; + if (length > I40IW_MAX_MR_SIZE) + return ERR_PTR(-EINVAL); region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) return (struct ib_mr *)region; @@ -1564,7 +1566,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, palloc = &iwpbl->pble_alloc; iwmr->type = req.reg_type; - iwmr->page_cnt = pbl_depth; + iwmr->page_cnt = (u32)pbl_depth; switch (req.reg_type) { case IW_MEMREG_TYPE_QP: -- cgit v1.2.3-59-g8ed1b From 23ef48ad6cfb981f6a1a605306d87c5ad0d1e1ac Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:55 -0500 Subject: RDMA/i40iw: Correct QP size calculation Include inline data size as part of SQ size calculation. RQ size calculation uses only number of SGEs and does not support 96 byte WQE size. Signed-off-by: Mustafa Ismail Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_d.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_uk.c | 80 ++++++++++++------------------- drivers/infiniband/hw/i40iw/i40iw_user.h | 34 +++++++------ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 8 +++- 4 files changed, 58 insertions(+), 66 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index aab88d65f805..e8951a71cc13 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1290,7 +1290,7 @@ /* wqe size considering 32 bytes per wqe*/ #define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */ -#define I40IWQP_SW_MAX_WQSIZE 16384 /* 524288 bytes */ +#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */ #define I40IWQP_OP_RDMA_WRITE 0 #define I40IWQP_OP_RDMA_READ 1 diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index f78c3dc8bdb2..9e3a700d5a2d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -130,7 +130,10 @@ static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx) */ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, - u8 wqe_size) + u8 wqe_size, + u32 total_size, + u64 wr_id + ) { u64 *wqe = NULL; u64 wqe_ptr; @@ -171,6 +174,10 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, wqe_0 = qp->sq_base[peek_head].elem; if (peek_head & 0x3) wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); + + qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id; + qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; + qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size; return wqe; } @@ -249,12 +256,9 @@ static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp, if (ret_code) return ret_code; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; - - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); if (!op_info->rem_addr.stag) @@ -309,12 +313,9 @@ static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp, ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size); if (ret_code) return ret_code; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; - - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len; local_fence |= info->local_fence; set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); @@ -366,13 +367,11 @@ static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp, if (ret_code) return ret_code; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; read_fence |= info->read_fence; - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = total_size; set_64bit_val(wqe, 16, 0); header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | LS_64(info->op_type, I40IWQPSQ_OPCODE) | @@ -427,13 +426,11 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, if (ret_code) return ret_code; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; read_fence |= info->read_fence; - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO)); @@ -507,14 +504,11 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, if (ret_code) return ret_code; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; read_fence |= info->read_fence; - - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len; header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) | LS_64(info->op_type, I40IWQPSQ_OPCODE) | LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) | @@ -574,12 +568,9 @@ static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp op_info = &info->op.inv_local_stag; local_fence = info->local_fence; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; - - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; set_64bit_val(wqe, 0, 0); set_64bit_val(wqe, 8, LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG)); @@ -619,12 +610,9 @@ static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp, op_info = &info->op.bind_window; local_fence |= info->local_fence; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; - - qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; set_64bit_val(wqe, 0, (uintptr_t)op_info->va); set_64bit_val(wqe, 8, LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) | @@ -760,7 +748,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, enum i40iw_status_code ret_code2 = 0; bool move_cq_head = true; u8 polarity; - u8 addl_frag_cnt, addl_wqes = 0; + u8 addl_wqes = 0; if (cq->avoid_mem_cflct) cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq); @@ -827,11 +815,8 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, info->op_type = (u8)RS_64(qword3, I40IWCQ_OP); sw_wqe = qp->sq_base[wqe_idx].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); - addl_frag_cnt = - (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT); - i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes); - addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE); + addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE; I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes)); } else { do { @@ -843,9 +828,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, get_64bit_val(sw_wqe, 24, &wqe_qword); op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE); info->op_type = op_type; - addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT); - i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes); - addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE); + addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE; I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes)); if (op_type != I40IWQP_OP_NOP) { info->wr_id = qp->sq_wrtrk_array[tail].wrid; @@ -893,19 +876,21 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, * i40iw_get_wqe_shift - get shift count for maximum wqe size * @wqdepth: depth of wq required. * @sge: Maximum Scatter Gather Elements wqe + * @inline_data: Maximum inline data size * @shift: Returns the shift needed based on sge * - * Shift can be used to left shift the wqe size based on sge. - * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1 - * (64 bytes wqes) and 2 otherwise (128 bytes wqe). + * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size. + * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes). + * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes). + * Shift of 2 otherwise (wqe size of 128 bytes). */ -enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift) +enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift) { u32 size; *shift = 0; - if (sge > 1) - *shift = (sge < 4) ? 1 : 2; + if (sge > 1 || inline_data > 16) + *shift = (sge < 4 && inline_data <= 48) ? 1 : 2; /* check if wqdepth is multiple of 2 or not */ @@ -968,11 +953,11 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT) return I40IW_ERR_INVALID_FRAG_COUNT; - ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift); + ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift); if (ret_code) return ret_code; - ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift); + ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift); if (ret_code) return ret_code; @@ -1097,12 +1082,9 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 header, *wqe; u32 wqe_idx; - wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE); + wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id); if (!wqe) return I40IW_ERR_QP_TOOMANY_WRS_POSTED; - - qp->sq_wrtrk_array[wqe_idx].wrid = wr_id; - qp->sq_wrtrk_array[wqe_idx].wr_len = 0; set_64bit_val(wqe, 0, 0); set_64bit_val(wqe, 8, 0); set_64bit_val(wqe, 16, 0); @@ -1125,7 +1107,7 @@ enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, * @frag_cnt: number of fragments * @wqe_size: size of sq wqe returned */ -enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size) +enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size) { switch (frag_cnt) { case 0: @@ -1156,7 +1138,7 @@ enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size) * @frag_cnt: number of fragments * @wqe_size: size of rq wqe returned */ -enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size) +enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size) { switch (frag_cnt) { case 0: diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h index eac95240fbdc..4627646fe8cd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_user.h +++ b/drivers/infiniband/hw/i40iw/i40iw_user.h @@ -61,7 +61,7 @@ enum i40iw_device_capabilities_const { I40IW_MAX_CQ_SIZE = 1048575, I40IW_MAX_AEQ_ALLOCATE_COUNT = 255, I40IW_DB_ID_ZERO = 0, - I40IW_MAX_WQ_FRAGMENT_COUNT = 6, + I40IW_MAX_WQ_FRAGMENT_COUNT = 3, I40IW_MAX_SGE_RD = 1, I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647, I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647, @@ -70,8 +70,8 @@ enum i40iw_device_capabilities_const { I40IW_MAX_VF_FPM_ID = 47, I40IW_MAX_VF_PER_PF = 127, I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, - I40IW_MAX_INLINE_DATA_SIZE = 112, - I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 112, + I40IW_MAX_INLINE_DATA_SIZE = 48, + I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, I40IW_MAX_IRD_SIZE = 32, I40IW_QPCTX_ENCD_MAXIRD = 3, I40IW_MAX_WQ_ENTRIES = 2048, @@ -200,7 +200,7 @@ enum i40iw_completion_notify { struct i40iw_post_send { i40iw_sgl sg_list; - u8 num_sges; + u32 num_sges; }; struct i40iw_post_inline_send { @@ -222,7 +222,7 @@ struct i40iw_post_inline_send_w_inv { struct i40iw_rdma_write { i40iw_sgl lo_sg_list; - u8 num_lo_sges; + u32 num_lo_sges; struct i40iw_sge rem_addr; }; @@ -347,7 +347,9 @@ struct i40iw_dev_uk { struct i40iw_sq_uk_wr_trk_info { u64 wrid; - u64 wr_len; + u32 wr_len; + u8 wqe_size; + u8 reserved[3]; }; struct i40iw_qp_quanta { @@ -369,6 +371,8 @@ struct i40iw_qp_uk { u32 qp_id; u32 sq_size; u32 rq_size; + u32 max_sq_frag_cnt; + u32 max_rq_frag_cnt; struct i40iw_qp_uk_ops ops; bool use_srq; u8 swqe_polarity; @@ -376,8 +380,6 @@ struct i40iw_qp_uk { u8 rwqe_polarity; u8 rq_wqe_size; u8 rq_wqe_size_multiplier; - u8 max_sq_frag_cnt; - u8 max_rq_frag_cnt; bool deferred_flag; }; @@ -406,8 +408,9 @@ struct i40iw_qp_uk_init_info { u32 qp_id; u32 sq_size; u32 rq_size; - u8 max_sq_frag_cnt; - u8 max_rq_frag_cnt; + u32 max_sq_frag_cnt; + u32 max_rq_frag_cnt; + u32 max_inline_data; }; @@ -424,7 +427,10 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev); void i40iw_qp_post_wr(struct i40iw_qp_uk *qp); u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, - u8 wqe_size); + u8 wqe_size, + u32 total_size, + u64 wr_id + ); u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx); u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx); @@ -436,9 +442,9 @@ enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp, void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq); enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); -enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size); -enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size); +enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size); +enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size); enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, u8 *wqe_size); -enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift); +enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift); #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index d7c4dd15f1c0..329f59a9f18a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -526,9 +526,9 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1); rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1); - status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift); + status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift); if (!status) - status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift); + status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, 0, &rqshift); if (status) return -ENOSYS; @@ -609,6 +609,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE) init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; + if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT) + init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + memset(&init_info, 0, sizeof(init_info)); sq_size = init_attr->cap.max_send_wr; @@ -618,6 +621,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, init_info.qp_uk_init_info.rq_size = rq_size; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; + init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); if (!mem) -- cgit v1.2.3-59-g8ed1b From b3437e0d5ab56d0439ff0ac50e190cfbb6711096 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:56 -0500 Subject: RDMA/i40iw: Fix refused connections Make sure cm_node is setup before sending SYN packet and ORD/IRD negotiation. Signed-off-by: Mustafa Ismail Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 85 ++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 38f917a6c778..bdd4104db40d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2852,7 +2852,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node( void *private_data, struct i40iw_cm_info *cm_info) { - int ret; struct i40iw_cm_node *cm_node; struct i40iw_cm_listener *loopback_remotelistener; struct i40iw_cm_node *loopback_remotenode; @@ -2922,29 +2921,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node( memcpy(cm_node->pdata_buf, private_data, private_data_len); cm_node->state = I40IW_CM_STATE_SYN_SENT; - ret = i40iw_send_syn(cm_node, 0); - - if (ret) { - if (cm_node->ipv4) - i40iw_debug(cm_node->dev, - I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI4", - cm_node->rem_addr); - else - i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, - "Api - connect() FAILED: dest addr=%pI6", - cm_node->rem_addr); - i40iw_rem_ref_cm_node(cm_node); - cm_node = NULL; - } - - if (cm_node) - i40iw_debug(cm_node->dev, - I40IW_DEBUG_CM, - "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n", - cm_node->rem_port, - cm_node, - cm_node->cm_id); return cm_node; } @@ -3828,23 +3804,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data_len, (void *)conn_param->private_data, &cm_info); - if (!cm_node) { - i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); - - if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, - cm_info.loc_port)) - i40iw_manage_apbvt(iwdev, - cm_info.loc_port, - I40IW_MANAGE_APBVT_DEL); - cm_id->rem_ref(cm_id); - iwdev->cm_core.stats_connect_errs++; - return -ENOMEM; - } + if (!cm_node) + goto err; i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && @@ -3857,7 +3818,49 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; i40iw_add_ref(&iwqp->ibqp); + + if (cm_node->state == I40IW_CM_STATE_SYN_SENT) { + if (i40iw_send_syn(cm_node, 0)) { + i40iw_rem_ref_cm_node(cm_node); + goto err; + } + } + + i40iw_debug(cm_node->dev, + I40IW_DEBUG_CM, + "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_port, + cm_node, + cm_node->cm_id); return 0; + +err: + if (cm_node) { + if (cm_node->ipv4) + i40iw_debug(cm_node->dev, + I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI4", + cm_node->rem_addr); + else + i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, + "Api - connect() FAILED: dest addr=%pI6", + cm_node->rem_addr); + } + i40iw_manage_qhash(iwdev, + &cm_info, + I40IW_QHASH_TYPE_TCP_ESTABLISHED, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, + false); + + if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, + cm_info.loc_port)) + i40iw_manage_apbvt(iwdev, + cm_info.loc_port, + I40IW_MANAGE_APBVT_DEL); + cm_id->rem_ref(cm_id); + iwdev->cm_core.stats_connect_errs++; + return -ENOMEM; } /** -- cgit v1.2.3-59-g8ed1b From bd57aeae563c8f032d6eee1c151f12b03191f053 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:57 -0500 Subject: RDMA/i40iw: Correct max message size in query port Fix to correct max reported message size in query port. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 329f59a9f18a..2534c5d1d7ec 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -120,7 +120,7 @@ static int i40iw_query_port(struct ib_device *ibdev, props->pkey_tbl_len = 1; props->active_width = IB_WIDTH_4X; props->active_speed = 1; - props->max_msg_sz = 0x80000000; + props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; return 0; } -- cgit v1.2.3-59-g8ed1b From 36a479335051ea5ad552f8234722a908179fc8f0 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:58 -0500 Subject: RDMA/i40iw: Do not set self-referencing pointer to NULL after free iwqp->allocated_buffer is a self-referencing pointer to iwqp. Do not set iwqp->allocated_buffer to NULL after freeing it. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2534c5d1d7ec..aa297365cdde 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -437,7 +437,6 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev, kfree(iwqp->kqp.wrid_mem); iwqp->kqp.wrid_mem = NULL; kfree(iwqp->allocated_buffer); - iwqp->allocated_buffer = NULL; } /** -- cgit v1.2.3-59-g8ed1b From 996abf0a52e62e844b50344157060bb6ec609bc7 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:32:59 -0500 Subject: RDMA/i40iw: Add qp table lock around AE processing QP may be freed during Async Event processing. Add a lock around QP table to prevent it. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_hw.c | 7 +++++++ drivers/infiniband/hw/i40iw/i40iw_utils.c | 9 +++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 819767681445..bf3627fea222 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -254,6 +254,7 @@ struct i40iw_device { u32 arp_table_size; u32 next_arp_index; spinlock_t resource_lock; /* hw resource access */ + spinlock_t qptable_lock; u32 vendor_id; u32 vendor_part_id; u32 of_device_registered; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 9fd302425563..27cfdd854754 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -106,6 +106,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev) set_bit(2, iwdev->allocated_pds); spin_lock_init(&iwdev->resource_lock); + spin_lock_init(&iwdev->qptable_lock); mrdrvbits = 24 - get_count_order(iwdev->max_mr); iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); return 0; @@ -301,11 +302,15 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) "%s ae_id = 0x%x bool qp=%d qp_id = %d\n", __func__, info->ae_id, info->qp, info->qp_cq_id); if (info->qp) { + spin_lock_irqsave(&iwdev->qptable_lock, flags); iwqp = iwdev->qp_table[info->qp_cq_id]; if (!iwqp) { + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id); continue; } + i40iw_add_ref(&iwqp->ibqp); + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = info->tcp_state; @@ -411,6 +416,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) i40iw_terminate_connection(qp, info); break; } + if (info->qp) + i40iw_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 1ceec81bd8eb..7ed998c573a4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -506,14 +506,19 @@ void i40iw_rem_ref(struct ib_qp *ibqp) struct cqp_commands_info *cqp_info; struct i40iw_device *iwdev; u32 qp_num; + unsigned long flags; iwqp = to_iwqp(ibqp); - if (!atomic_dec_and_test(&iwqp->refcount)) + iwdev = iwqp->iwdev; + spin_lock_irqsave(&iwdev->qptable_lock, flags); + if (!atomic_dec_and_test(&iwqp->refcount)) { + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); return; + } - iwdev = iwqp->iwdev; qp_num = iwqp->ibqp.qp_num; iwdev->qp_table[qp_num] = NULL; + spin_unlock_irqrestore(&iwdev->qptable_lock, flags); cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); if (!cqp_request) return; -- cgit v1.2.3-59-g8ed1b From df35630af33fb8f470b6739eced5a2ad3a7cb55d Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:00 -0500 Subject: RDMA/i40iw: Set vendor_err only if there is an actual error Add a check for cq_poll_info.error before setting vendor_err instead of always setting it. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index aa297365cdde..d3b4b58147d2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2049,10 +2049,12 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, } entry->wc_flags = 0; entry->wr_id = cq_poll_info.wr_id; - if (!cq_poll_info.error) - entry->status = IB_WC_SUCCESS; - else + if (cq_poll_info.error) { entry->status = IB_WC_WR_FLUSH_ERR; + entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err; + } else { + entry->status = IB_WC_SUCCESS; + } switch (cq_poll_info.op_type) { case I40IW_OP_TYPE_RDMA_WRITE: @@ -2076,8 +2078,6 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, break; } - entry->vendor_err = - cq_poll_info.major_err << 16 | cq_poll_info.minor_err; entry->ex.imm_data = 0; qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle; entry->qp = (struct ib_qp *)qp->back_qp; -- cgit v1.2.3-59-g8ed1b From 4920dc311c77779fbbd71621ecbb9f03f296d72d Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:01 -0500 Subject: RDMA/i40iw: Populate vendor_id and vendor_part_id fields Populate PCI info fields from PCI device structure. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index d3b4b58147d2..40444c0557e4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -63,8 +63,8 @@ static int i40iw_query_device(struct ib_device *ibdev, ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr); props->fw_ver = I40IW_FW_VERSION; props->device_cap_flags = iwdev->device_cap_flags; - props->vendor_id = iwdev->vendor_id; - props->vendor_part_id = iwdev->vendor_part_id; + props->vendor_id = iwdev->ldev->pcidev->vendor; + props->vendor_part_id = iwdev->ldev->pcidev->device; props->hw_ver = (u32)iwdev->sc_dev.hw_rev; props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp; -- cgit v1.2.3-59-g8ed1b From f606d8933004716877eedd73ab609fb92deef84d Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:02 -0500 Subject: RDMA/i40iw: Remove unused code and fix warning Remove unused code and fix warning. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 2 -- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_cm.h | 10 +--------- drivers/infiniband/hw/i40iw/i40iw_main.c | 5 +---- 4 files changed, 3 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index bf3627fea222..f299001edbc2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -50,8 +50,6 @@ #include #include #include -#include -#include #include #include "i40iw_status.h" diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index bdd4104db40d..ab6eb0bc020e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2107,7 +2107,7 @@ static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr) struct in6_addr raddr6; i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr); - return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6)); + return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 5f8ceb4a8e84..e9046d9f9645 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -1,6 +1,6 @@ /******************************************************************************* * -* Copyright (c) 2015 Intel Corporation. All rights reserved. +* Copyright (c) 2015-2016 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -291,8 +291,6 @@ struct i40iw_cm_listener { u8 loc_mac[ETH_ALEN]; u32 loc_addr[4]; u16 loc_port; - u32 map_loc_addr[4]; - u16 map_loc_port; struct iw_cm_id *cm_id; atomic_t ref_count; struct i40iw_device *iwdev; @@ -317,8 +315,6 @@ struct i40iw_kmem_info { struct i40iw_cm_node { u32 loc_addr[4], rem_addr[4]; u16 loc_port, rem_port; - u32 map_loc_addr[4], map_rem_addr[4]; - u16 map_loc_port, map_rem_port; u16 vlan_id; enum i40iw_cm_node_state state; u8 loc_mac[ETH_ALEN]; @@ -370,10 +366,6 @@ struct i40iw_cm_info { u16 rem_port; u32 loc_addr[4]; u32 rem_addr[4]; - u16 map_loc_port; - u16 map_rem_port; - u32 map_loc_addr[4]; - u32 map_rem_addr[4]; u16 vlan_id; int backlog; u16 user_pri; diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 90e5af21737e..f49aea1cedff 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1147,10 +1147,7 @@ static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iw if (!status) { status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr, (u8)iwdev->mac_ip_table_idx); - if (!status) - status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr, - (u8)iwdev->mac_ip_table_idx); - else + if (status) i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); } return status; -- cgit v1.2.3-59-g8ed1b From f69c3331624438321877083e27f5aa09eab3b863 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:03 -0500 Subject: RDMA/i40iw: Add virtual channel message queue Queue users of virtual channel on a waitqueue until the channel is clear instead of failing the call when the channel is occupied. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 47 +++++++++++--- drivers/infiniband/hw/i40iw/i40iw_osdep.h | 1 + drivers/infiniband/hw/i40iw/i40iw_type.h | 3 +- drivers/infiniband/hw/i40iw/i40iw_utils.c | 11 ++-- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 8 +-- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 96 +++++++++++++++------------- 6 files changed, 103 insertions(+), 63 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index f49aea1cedff..9cf5b3e743ee 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1528,7 +1528,10 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, goto exit; iwdev->obj_next = iwdev->obj_mem; iwdev->push_mode = push_mode; + init_waitqueue_head(&iwdev->vchnl_waitq); + init_waitqueue_head(&dev->vf_reqs); + status = i40iw_initialize_dev(iwdev, ldev); exit: if (status) { @@ -1707,7 +1710,6 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) { if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id)) continue; - /* free all resources allocated on behalf of vf */ tmp_vfdev = dev->vf_dev[i]; spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags); @@ -1816,8 +1818,6 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev, dev = &hdl->device.sc_dev; iwdev = dev->back_dev; - i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len); - if (dev->vchnl_if.vchnl_recv) { ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len); if (!dev->is_pf) { @@ -1828,6 +1828,39 @@ static int i40iw_virtchnl_receive(struct i40e_info *ldev, return ret_code; } +/** + * i40iw_vf_clear_to_send - wait to send virtual channel message + * @dev: iwarp device * + * Wait for until virtual channel is clear + * before sending the next message + * + * Returns false if error + * Returns true if clear to send + */ +bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev) +{ + struct i40iw_device *iwdev; + wait_queue_t wait; + + iwdev = dev->back_dev; + + if (!wq_has_sleeper(&dev->vf_reqs) && + (atomic_read(&iwdev->vchnl_msgs) == 0)) + return true; /* virtual channel is clear */ + + init_wait(&wait); + add_wait_queue_exclusive(&dev->vf_reqs, &wait); + + if (!wait_event_timeout(dev->vf_reqs, + (atomic_read(&iwdev->vchnl_msgs) == 0), + I40IW_VCHNL_EVENT_TIMEOUT)) + dev->vchnl_up = false; + + remove_wait_queue(&dev->vf_reqs, &wait); + + return dev->vchnl_up; +} + /** * i40iw_virtchnl_send - send a message through the virtual channel * @dev: iwarp device @@ -1845,18 +1878,16 @@ static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev, { struct i40iw_device *iwdev; struct i40e_info *ldev; - enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR; if (!dev || !dev->back_dev) - return ret_code; + return I40IW_ERR_BAD_PTR; iwdev = dev->back_dev; ldev = iwdev->ldev; if (ldev && ldev->ops && ldev->ops->virtchnl_send) - ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len); - - return ret_code; + return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len); + return I40IW_ERR_BAD_PTR; } /* client interface functions */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h index 7e20493510e8..80f422bf3967 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h +++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h @@ -172,6 +172,7 @@ struct i40iw_hw; u8 __iomem *i40iw_get_hw_addr(void *dev); void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev); +bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev); enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 length, u32 value); struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index edb3a8c8267a..5b6a49143a92 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -483,12 +483,13 @@ struct i40iw_sc_dev { struct i40iw_hmc_fpm_misc hmc_fpm_misc; u16 qs_handle; - u32 debug_mask; + u32 debug_mask; u16 exception_lan_queue; u8 hmc_fn_id; bool is_pf; bool vchnl_up; u8 vf_id; + wait_queue_head_t vf_reqs; u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY]; struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf; u8 hw_rev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 7ed998c573a4..cddd63942031 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -990,21 +990,24 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev, enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) { struct i40iw_device *iwdev = dev->back_dev; - enum i40iw_status_code err_code = 0; int timeout_ret; i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n", __func__, __LINE__, dev, iwdev); - atomic_add(2, &iwdev->vchnl_msgs); + + atomic_set(&iwdev->vchnl_msgs, 2); timeout_ret = wait_event_timeout(iwdev->vchnl_waitq, (atomic_read(&iwdev->vchnl_msgs) == 1), I40IW_VCHNL_EVENT_TIMEOUT); atomic_dec(&iwdev->vchnl_msgs); if (!timeout_ret) { i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret); - err_code = I40IW_ERR_TIMEOUT; + atomic_set(&iwdev->vchnl_msgs, 0); + dev->vchnl_up = false; + return I40IW_ERR_TIMEOUT; } - return err_code; + wake_up(&dev->vf_reqs); + return 0; } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 40444c0557e4..6359a3ebb05a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2148,7 +2148,6 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev, struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats; struct timespec curr_time; static struct timespec last_rd_time = {0, 0}; - enum i40iw_status_code status = 0; unsigned long flags; curr_time = current_kernel_time(); @@ -2161,11 +2160,8 @@ static int i40iw_get_protocol_stats(struct ib_device *ibdev, spin_unlock_irqrestore(&devstat->stats_lock, flags); } else { if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1) - status = i40iw_vchnl_vf_get_pe_stats(dev, - &devstat->hw_stats); - - if (status) - return -ENOSYS; + if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats)) + return -ENOSYS; } stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] + diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 6b68f7890b76..4e1d7c665dc5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -437,11 +437,9 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); return I40IW_SUCCESS; } - for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; - iw_vf_idx++) { + for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { if (!dev->vf_dev[iw_vf_idx]) { - if (first_avail_iw_vf == - I40IW_MAX_PE_ENABLED_VF_COUNT) + if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT) first_avail_iw_vf = iw_vf_idx; continue; } @@ -596,23 +594,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req vchnl_req; enum i40iw_status_code ret_code; + if (!i40iw_vf_clear_to_send(dev)) + return I40IW_ERR_TIMEOUT; memset(&vchnl_req, 0, sizeof(vchnl_req)); vchnl_req.dev = dev; vchnl_req.parm = vchnl_ver; vchnl_req.parm_len = sizeof(*vchnl_ver); vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; + ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req); - if (!ret_code) { - ret_code = i40iw_vf_wait_vchnl_resp(dev); - if (!ret_code) - ret_code = vchnl_req.ret_code; - else - dev->vchnl_up = false; - } else { + if (ret_code) { i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s Send message failed 0x%0x\n", __func__, ret_code); + return ret_code; } - return ret_code; + ret_code = i40iw_vf_wait_vchnl_resp(dev); + if (ret_code) + return ret_code; + else + return vchnl_req.ret_code; } /** @@ -626,23 +626,25 @@ enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req vchnl_req; enum i40iw_status_code ret_code; + if (!i40iw_vf_clear_to_send(dev)) + return I40IW_ERR_TIMEOUT; memset(&vchnl_req, 0, sizeof(vchnl_req)); vchnl_req.dev = dev; vchnl_req.parm = hmc_fcn; vchnl_req.parm_len = sizeof(*hmc_fcn); vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; + ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req); - if (!ret_code) { - ret_code = i40iw_vf_wait_vchnl_resp(dev); - if (!ret_code) - ret_code = vchnl_req.ret_code; - else - dev->vchnl_up = false; - } else { + if (ret_code) { i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s Send message failed 0x%0x\n", __func__, ret_code); + return ret_code; } - return ret_code; + ret_code = i40iw_vf_wait_vchnl_resp(dev); + if (ret_code) + return ret_code; + else + return vchnl_req.ret_code; } /** @@ -660,25 +662,27 @@ enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req vchnl_req; enum i40iw_status_code ret_code; + if (!i40iw_vf_clear_to_send(dev)) + return I40IW_ERR_TIMEOUT; memset(&vchnl_req, 0, sizeof(vchnl_req)); vchnl_req.dev = dev; vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; + ret_code = vchnl_vf_send_add_hmc_objs_req(dev, &vchnl_req, rsrc_type, start_index, rsrc_count); - if (!ret_code) { - ret_code = i40iw_vf_wait_vchnl_resp(dev); - if (!ret_code) - ret_code = vchnl_req.ret_code; - else - dev->vchnl_up = false; - } else { + if (ret_code) { i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s Send message failed 0x%0x\n", __func__, ret_code); + return ret_code; } - return ret_code; + ret_code = i40iw_vf_wait_vchnl_resp(dev); + if (ret_code) + return ret_code; + else + return vchnl_req.ret_code; } /** @@ -696,25 +700,27 @@ enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req vchnl_req; enum i40iw_status_code ret_code; + if (!i40iw_vf_clear_to_send(dev)) + return I40IW_ERR_TIMEOUT; memset(&vchnl_req, 0, sizeof(vchnl_req)); vchnl_req.dev = dev; vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; + ret_code = vchnl_vf_send_del_hmc_objs_req(dev, &vchnl_req, rsrc_type, start_index, rsrc_count); - if (!ret_code) { - ret_code = i40iw_vf_wait_vchnl_resp(dev); - if (!ret_code) - ret_code = vchnl_req.ret_code; - else - dev->vchnl_up = false; - } else { + if (ret_code) { i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s Send message failed 0x%0x\n", __func__, ret_code); + return ret_code; } - return ret_code; + ret_code = i40iw_vf_wait_vchnl_resp(dev); + if (ret_code) + return ret_code; + else + return vchnl_req.ret_code; } /** @@ -728,21 +734,23 @@ enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req vchnl_req; enum i40iw_status_code ret_code; + if (!i40iw_vf_clear_to_send(dev)) + return I40IW_ERR_TIMEOUT; memset(&vchnl_req, 0, sizeof(vchnl_req)); vchnl_req.dev = dev; vchnl_req.parm = hw_stats; vchnl_req.parm_len = sizeof(*hw_stats); vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg; + ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req); - if (!ret_code) { - ret_code = i40iw_vf_wait_vchnl_resp(dev); - if (!ret_code) - ret_code = vchnl_req.ret_code; - else - dev->vchnl_up = false; - } else { + if (ret_code) { i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s Send message failed 0x%0x\n", __func__, ret_code); + return ret_code; } - return ret_code; + ret_code = i40iw_vf_wait_vchnl_resp(dev); + if (ret_code) + return ret_code; + else + return vchnl_req.ret_code; } -- cgit v1.2.3-59-g8ed1b From 5c1c1908c124b0be65432177cfcba99a5044fe79 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:04 -0500 Subject: RDMA/i40iw: Correct return code check in add_pble_pool Move return code check to immediately after i40iw_hmc_sd_one call where it is set instead of outside the then statement. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_pble.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index ded853d2fad8..85993dc44f6e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -404,13 +404,14 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev, sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; if (sd_entry->valid) return 0; - if (dev->is_pf) + if (dev->is_pf) { ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id, sd_reg_val, idx->sd_idx, sd_entry->entry_type, true); - if (ret_code) { - i40iw_pr_err("cqp cmd failed for sd (pbles)\n"); - goto error; + if (ret_code) { + i40iw_pr_err("cqp cmd failed for sd (pbles)\n"); + goto error; + } } sd_entry->valid = true; -- cgit v1.2.3-59-g8ed1b From eb9b0379f8215345c78a5e105af2f029b3a84095 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:05 -0500 Subject: RDMA/i40iw: Initialize max enabled vfs variable Initialize max enabled vfs to max rdma vfs instead of 0. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 9cf5b3e743ee..e7b7724b4040 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1509,6 +1509,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, I40IW_HMC_PROFILE_DEFAULT; iwdev->max_rdma_vfs = (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0; + iwdev->max_enabled_vfs = iwdev->max_rdma_vfs; iwdev->netdev = ldev->netdev; hdl->client = client; iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS; -- cgit v1.2.3-59-g8ed1b From b7aee855d3b93f31ea692ea5c7565318372d1042 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:06 -0500 Subject: RDMA/i40iw: Add base memory management extensions Implement fast register mr, Local invalidate, send with invalidate and RDMA read with invalidate. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 82 ++++++++++-- drivers/infiniband/hw/i40iw/i40iw_type.h | 3 + drivers/infiniband/hw/i40iw/i40iw_verbs.c | 204 ++++++++++++++++++++++++++++-- drivers/infiniband/hw/i40iw/i40iw_verbs.h | 1 + 4 files changed, 271 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index f05802bf6ca0..437cb8603540 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -2908,6 +2908,65 @@ static enum i40iw_status_code i40iw_sc_mw_alloc( return 0; } +/** + * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp + * @qp: sc qp struct + * @info: fast mr info + * @post_sq: flag for cqp db to ring + */ +enum i40iw_status_code i40iw_sc_mr_fast_register( + struct i40iw_sc_qp *qp, + struct i40iw_fast_reg_stag_info *info, + bool post_sq) +{ + u64 temp, header; + u64 *wqe; + u32 wqe_idx; + + wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, + 0, info->wr_id); + if (!wqe) + return I40IW_ERR_QP_TOOMANY_WRS_POSTED; + + i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n", + __func__, info->wr_id, wqe_idx, + &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid); + temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo; + set_64bit_val(wqe, 0, temp); + + temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI); + set_64bit_val(wqe, + 8, + LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) | + LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR)); + + set_64bit_val(wqe, + 16, + info->total_len | + LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO)); + + header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) | + LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) | + LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) | + LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) | + LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) | + LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) | + LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) | + LS_64(info->read_fence, I40IWQPSQ_READFENCE) | + LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) | + LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) | + LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); + + i40iw_insert_wqe_hdr(wqe, header); + + i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE", + wqe, I40IW_QP_WQE_MIN_SIZE); + + if (post_sq) + i40iw_qp_post_wr(&qp->qp_uk); + return 0; +} + /** * i40iw_sc_send_lsmm - send last streaming mode message * @qp: sc qp struct @@ -4559,17 +4618,18 @@ static struct i40iw_pd_ops iw_pd_ops = { }; static struct i40iw_priv_qp_ops iw_priv_qp_ops = { - i40iw_sc_qp_init, - i40iw_sc_qp_create, - i40iw_sc_qp_modify, - i40iw_sc_qp_destroy, - i40iw_sc_qp_flush_wqes, - i40iw_sc_qp_upload_context, - i40iw_sc_qp_setctx, - i40iw_sc_send_lsmm, - i40iw_sc_send_lsmm_nostag, - i40iw_sc_send_rtt, - i40iw_sc_post_wqe0, + .qp_init = i40iw_sc_qp_init, + .qp_create = i40iw_sc_qp_create, + .qp_modify = i40iw_sc_qp_modify, + .qp_destroy = i40iw_sc_qp_destroy, + .qp_flush_wqes = i40iw_sc_qp_flush_wqes, + .qp_upload_context = i40iw_sc_qp_upload_context, + .qp_setctx = i40iw_sc_qp_setctx, + .qp_send_lsmm = i40iw_sc_send_lsmm, + .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag, + .qp_send_rtt = i40iw_sc_send_rtt, + .qp_post_wqe0 = i40iw_sc_post_wqe0, + .iw_mr_fast_register = i40iw_sc_mr_fast_register }; static struct i40iw_priv_cq_ops iw_priv_cq_ops = { diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 5b6a49143a92..c9261981da0b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -1041,6 +1041,9 @@ struct i40iw_priv_qp_ops { void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32); void (*qp_send_rtt)(struct i40iw_sc_qp *, bool); enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8); + enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *, + struct i40iw_fast_reg_stag_info *, + bool); }; struct i40iw_priv_cq_ops { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6359a3ebb05a..196d6f006392 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -725,8 +725,10 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwarp_info = &iwqp->iwarp_info; iwarp_info->rd_enable = true; iwarp_info->wr_rdresp_en = true; - if (!iwqp->user_mode) + if (!iwqp->user_mode) { + iwarp_info->fast_reg_en = true; iwarp_info->priv_mode_en = true; + } iwarp_info->ddp_ver = 1; iwarp_info->rdmap_ver = 1; @@ -1446,6 +1448,139 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev, return err; } +/** + * i40iw_hw_alloc_stag - cqp command to allocate stag + * @iwdev: iwarp device + * @iwmr: iwarp mr pointer + */ +static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr) +{ + struct i40iw_allocate_stag_info *info; + struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + enum i40iw_status_code status; + int err = 0; + struct i40iw_cqp_request *cqp_request; + struct cqp_commands_info *cqp_info; + + cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true); + if (!cqp_request) + return -ENOMEM; + + cqp_info = &cqp_request->info; + info = &cqp_info->in.u.alloc_stag.info; + memset(info, 0, sizeof(*info)); + info->page_size = PAGE_SIZE; + info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT; + info->pd_id = iwpd->sc_pd.pd_id; + info->total_len = iwmr->length; + cqp_info->cqp_cmd = OP_ALLOC_STAG; + cqp_info->post_sq = 1; + cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev; + cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; + + status = i40iw_handle_cqp_op(iwdev, cqp_request); + if (status) { + err = -ENOMEM; + i40iw_pr_err("CQP-OP MR Reg fail"); + } + return err; +} + +/** + * i40iw_alloc_mr - register stag for fast memory registration + * @pd: ibpd pointer + * @mr_type: memory for stag registrion + * @max_num_sg: man number of pages + */ +static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct i40iw_pd *iwpd = to_iwpd(pd); + struct i40iw_device *iwdev = to_iwdev(pd->device); + struct i40iw_pble_alloc *palloc; + struct i40iw_pbl *iwpbl; + struct i40iw_mr *iwmr; + enum i40iw_status_code status; + u32 stag; + int err_code = -ENOMEM; + + iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); + if (!iwmr) + return ERR_PTR(-ENOMEM); + + stag = i40iw_create_stag(iwdev); + if (!stag) { + err_code = -EOVERFLOW; + goto err; + } + iwmr->stag = stag; + iwmr->ibmr.rkey = stag; + iwmr->ibmr.lkey = stag; + iwmr->ibmr.pd = pd; + iwmr->ibmr.device = pd->device; + iwpbl = &iwmr->iwpbl; + iwpbl->iwmr = iwmr; + iwmr->type = IW_MEMREG_TYPE_MEM; + palloc = &iwpbl->pble_alloc; + iwmr->page_cnt = max_num_sg; + mutex_lock(&iwdev->pbl_mutex); + status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); + mutex_unlock(&iwdev->pbl_mutex); + if (!status) + goto err1; + + if (palloc->level != I40IW_LEVEL_1) + goto err2; + err_code = i40iw_hw_alloc_stag(iwdev, iwmr); + if (err_code) + goto err2; + iwpbl->pbl_allocated = true; + i40iw_add_pdusecount(iwpd); + return &iwmr->ibmr; +err2: + i40iw_free_pble(iwdev->pble_rsrc, palloc); +err1: + i40iw_free_stag(iwdev, stag); +err: + kfree(iwmr); + return ERR_PTR(err_code); +} + +/** + * i40iw_set_page - populate pbl list for fmr + * @ibmr: ib mem to access iwarp mr pointer + * @addr: page dma address fro pbl list + */ +static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct i40iw_mr *iwmr = to_iwmr(ibmr); + struct i40iw_pbl *iwpbl = &iwmr->iwpbl; + struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc; + u64 *pbl; + + if (unlikely(iwmr->npages == iwmr->page_cnt)) + return -ENOMEM; + + pbl = (u64 *)palloc->level1.addr; + pbl[iwmr->npages++] = cpu_to_le64(addr); + return 0; +} + +/** + * i40iw_map_mr_sg - map of sg list for fmr + * @ibmr: ib mem to access iwarp mr pointer + * @sg: scatter gather list for fmr + * @sg_nents: number of sg pages + */ +static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents) +{ + struct i40iw_mr *iwmr = to_iwmr(ibmr); + + iwmr->npages = 0; + return ib_sg_to_pages(ibmr, sg, sg_nents, i40iw_set_page); +} + /** * i40iw_hwreg_mr - send cqp command for memory registration * @iwdev: iwarp device @@ -1886,12 +2021,14 @@ static int i40iw_post_send(struct ib_qp *ibqp, enum i40iw_status_code ret; int err = 0; unsigned long flags; + bool inv_stag; iwqp = (struct i40iw_qp *)ibqp; ukqp = &iwqp->sc_qp.qp_uk; spin_lock_irqsave(&iwqp->lock, flags); while (ib_wr) { + inv_stag = false; memset(&info, 0, sizeof(info)); info.wr_id = (u64)(ib_wr->wr_id); if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all) @@ -1901,19 +2038,28 @@ static int i40iw_post_send(struct ib_qp *ibqp, switch (ib_wr->opcode) { case IB_WR_SEND: - if (ib_wr->send_flags & IB_SEND_SOLICITED) - info.op_type = I40IW_OP_TYPE_SEND_SOL; - else - info.op_type = I40IW_OP_TYPE_SEND; + /* fall-through */ + case IB_WR_SEND_WITH_INV: + if (ib_wr->opcode == IB_WR_SEND) { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + info.op_type = I40IW_OP_TYPE_SEND_SOL; + else + info.op_type = I40IW_OP_TYPE_SEND; + } else { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + info.op_type = I40IW_OP_TYPE_SEND_SOL_INV; + else + info.op_type = I40IW_OP_TYPE_SEND_INV; + } if (ib_wr->send_flags & IB_SEND_INLINE) { info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr; info.op.inline_send.len = ib_wr->sg_list[0].length; - ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); + ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); } else { info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list; - ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false); + ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false); } if (ret) @@ -1941,6 +2087,9 @@ static int i40iw_post_send(struct ib_qp *ibqp, if (ret) err = -EIO; break; + case IB_WR_RDMA_READ_WITH_INV: + inv_stag = true; + /* fall-through*/ case IB_WR_RDMA_READ: info.op_type = I40IW_OP_TYPE_RDMA_READ; info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; @@ -1949,10 +2098,47 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr; info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; - ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false); + ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false); if (ret) err = -EIO; break; + case IB_WR_LOCAL_INV: + info.op_type = I40IW_OP_TYPE_INV_STAG; + info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; + ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true); + if (ret) + err = -EIO; + break; + case IB_WR_REG_MR: + { + struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); + int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); + int flags = reg_wr(ib_wr)->access; + struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc; + struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; + struct i40iw_fast_reg_stag_info info; + + info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD; + info.access_rights |= i40iw_get_user_access(flags); + info.stag_key = reg_wr(ib_wr)->key & 0xff; + info.stag_idx = reg_wr(ib_wr)->key >> 8; + info.wr_id = ib_wr->wr_id; + + info.addr_type = I40IW_ADDR_TYPE_VA_BASED; + info.va = (void *)(uintptr_t)iwmr->ibmr.iova; + info.total_len = iwmr->ibmr.length; + info.first_pm_pbl_index = palloc->level1.idx; + info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; + info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED; + + if (page_shift == 21) + info.page_size = 1; /* 2M page */ + + ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true); + if (ret) + err = -EIO; + break; + } default: err = -EINVAL; i40iw_pr_err(" upost_send bad opcode = 0x%x\n", @@ -2328,6 +2514,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.query_device = i40iw_query_device; iwibdev->ibdev.create_ah = i40iw_create_ah; iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; + iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; + iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 1101f77080e6..0acb6c8fe0f0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -92,6 +92,7 @@ struct i40iw_mr { struct ib_umem *region; u16 type; u32 page_cnt; + u32 npages; u32 stag; u64 length; u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS]; -- cgit v1.2.3-59-g8ed1b From 20c61f7e88a02366dc94d77179cf005eec6162e6 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:07 -0500 Subject: RDMA/i40iw: Fix endian issues and warnings Fix endian warnings and errors due to u32 stored to u16. Reported-by: Dan Carpenter Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 4 +-- drivers/infiniband/hw/i40iw/i40iw_cm.c | 57 ++++++++++++++++--------------- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 10 +++--- drivers/infiniband/hw/i40iw/i40iw_hw.c | 4 +-- drivers/infiniband/hw/i40iw/i40iw_main.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_type.h | 4 +-- drivers/infiniband/hw/i40iw/i40iw_utils.c | 27 +++++++-------- 8 files changed, 54 insertions(+), 56 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index f299001edbc2..8b9532034558 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -391,7 +391,7 @@ void i40iw_flush_wqes(struct i40iw_device *iwdev, void i40iw_manage_arp_cache(struct i40iw_device *iwdev, unsigned char *mac_addr, - __be32 *ip_addr, + u32 *ip_addr, bool ipv4, u32 action); @@ -549,7 +549,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp_flush_info *info, bool wait); -void i40iw_copy_ip_ntohl(u32 *dst, u32 *src); +void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src); struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index ab6eb0bc020e..8cb4b874ccd8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -771,6 +771,7 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node, { struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; + u16 ctrl_ird, ctrl_ord; /* initialize the upper 5 bytes of the frame */ i40iw_build_mpa_v1(cm_node, start_addr, mpa_key); @@ -779,38 +780,38 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node, /* initialize RTR msg */ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { - rtr_msg->ctrl_ird = IETF_NO_IRD_ORD; - rtr_msg->ctrl_ord = IETF_NO_IRD_ORD; + ctrl_ird = IETF_NO_IRD_ORD; + ctrl_ord = IETF_NO_IRD_ORD; } else { - rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? + ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ird_size; - rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? + ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ord_size; } - rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER; - rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN; + ctrl_ird |= IETF_PEER_TO_PEER; + ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_READ; break; case MPA_KEY_REPLY: switch (cm_node->send_rdma0_op) { case SEND_RDMA_WRITE_ZERO: - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_WRITE; break; case SEND_RDMA_READ_ZERO: - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + ctrl_ord |= IETF_RDMA0_READ; break; } break; default: break; } - rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird); - rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord); + rtr_msg->ctrl_ird = htons(ctrl_ird); + rtr_msg->ctrl_ord = htons(ctrl_ord); } /** @@ -2160,7 +2161,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->tcp_cntxt.rcv_wnd = I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; cm_node->tcp_cntxt.mss = iwdev->mss; cm_node->iwdev = iwdev; @@ -2234,7 +2235,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) if (cm_node->listener) { i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { - if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) && + if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && cm_node->apbvt_set && cm_node->iwdev) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, @@ -2921,7 +2922,6 @@ static struct i40iw_cm_node *i40iw_create_cm_node( memcpy(cm_node->pdata_buf, private_data, private_data_len); cm_node->state = I40IW_CM_STATE_SYN_SENT; - return cm_node; } @@ -3242,11 +3242,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node, tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]); tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]); - tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev, - &tcp_info->dest_ip_addr3, - true, - NULL, - I40IW_ARP_RESOLVE)); + tcp_info->arp_idx = + cpu_to_le16((u16)i40iw_arp_table( + iwqp->iwdev, + &tcp_info->dest_ip_addr3, + true, + NULL, + I40IW_ARP_RESOLVE)); } else { tcp_info->src_port = cpu_to_le16(cm_node->loc_port); tcp_info->dst_port = cpu_to_le16(cm_node->rem_port); @@ -3258,12 +3260,13 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node, tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]); tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]); tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]); - tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table( - iwqp->iwdev, - &tcp_info->dest_ip_addr0, - false, - NULL, - I40IW_ARP_RESOLVE)); + tcp_info->arp_idx = + cpu_to_le16((u16)i40iw_arp_table( + iwqp->iwdev, + &tcp_info->dest_ip_addr0, + false, + NULL, + I40IW_ARP_RESOLVE)); } } @@ -3540,7 +3543,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct i40iw_cm_node *cm_node; struct ib_qp_attr attr; int passive_state; - struct i40iw_ib_device *iwibdev; struct ib_mr *ibmr; struct i40iw_pd *iwpd; u16 buf_len = 0; @@ -3603,7 +3605,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) || (!cm_node->ipv4 && !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) { - iwibdev = iwdev->iwibdev; iwpd = iwqp->iwpd; tagged_offset = (uintptr_t)iwqp->ietf_mem.va; ibmr = i40iw_reg_phys_mr(&iwpd->ibpd, diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 437cb8603540..023a7ae3a330 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -3970,11 +3970,11 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev) */ static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt) { - u16 *mpa; + __be16 *mpa; u32 opcode = 0xffffffff; if (info->q2_data_written) { - mpa = (u16 *)pkt; + mpa = (__be16 *)pkt; opcode = ntohs(mpa[1]) & 0xf; } return opcode; @@ -4036,7 +4036,7 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp, if (info->q2_data_written) { /* Use data from offending packet to fill in ddp & rdma hdrs */ pkt = i40iw_locate_mpa(pkt); - ddp_seg_len = ntohs(*(u16 *)pkt); + ddp_seg_len = ntohs(*(__be16 *)pkt); if (ddp_seg_len) { copy_len = 2; termhdr->hdrct = DDP_LEN_FLAG; @@ -4247,13 +4247,13 @@ void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info * void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info) { u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET; - u32 *mpa; + __be32 *mpa; u8 ddp_ctl; u8 rdma_ctl; u16 aeq_id = 0; struct i40iw_terminate_hdr *termhdr; - mpa = (u32 *)i40iw_locate_mpa(pkt); + mpa = (__be32 *)i40iw_locate_mpa(pkt); if (info->q2_data_written) { /* did not validate the frame - do it now */ ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 27cfdd854754..615e115247b0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -467,7 +467,7 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad */ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, unsigned char *mac_addr, - __be32 *ip_addr, + u32 *ip_addr, bool ipv4, u32 action) { @@ -488,7 +488,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY; info = &cqp_info->in.u.add_arp_cache_entry.info; memset(info, 0, sizeof(*info)); - info->arp_index = cpu_to_le32(arp_index); + info->arp_index = cpu_to_le16((u16)arp_index); info->permanent = true; ether_addr_copy(info->mac_addr, mac_addr); cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request; diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index e7b7724b4040..72a10a19880a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1162,7 +1162,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev) struct net_device *ip_dev; struct inet6_dev *idev; struct inet6_ifaddr *ifp; - __be32 local_ipaddr6[4]; + u32 local_ipaddr6[4]; rcu_read_lock(); for_each_netdev_rcu(&init_net, ip_dev) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 8eb400d8a7a0..e9c6e82af9c7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -1194,7 +1194,7 @@ static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq, ioffset = (u16)(buf->data - (u8 *)buf->mem.va); while (datalen) { - fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap)); + fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap)); if (fpdu_len > pfpdu->max_fpdu_data) { i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ, "%s: error bad fpdu_len\n", __func__); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index c9261981da0b..e8c9491349e4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -890,8 +890,8 @@ struct i40iw_qhash_table_info { u32 qp_num; u32 dest_ip[4]; u32 src_ip[4]; - u32 dest_port; - u32 src_port; + u16 dest_port; + u16 src_port; }; struct i40iw_local_mac_ipaddr_entry_info { diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index cddd63942031..0e8db0a35141 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -59,7 +59,7 @@ * @action: modify, delete or add */ int i40iw_arp_table(struct i40iw_device *iwdev, - __be32 *ip_addr, + u32 *ip_addr, bool ipv4, u8 *mac_addr, u32 action) @@ -152,7 +152,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, struct net_device *upper_dev; struct i40iw_device *iwdev; struct i40iw_handler *hdl; - __be32 local_ipaddr; + u32 local_ipaddr; hdl = i40iw_find_netdev(event_netdev); if (!hdl) @@ -167,11 +167,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, switch (event) { case NETDEV_DOWN: if (upper_dev) - local_ipaddr = - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; + local_ipaddr = ntohl( + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); else - local_ipaddr = ifa->ifa_address; - local_ipaddr = ntohl(local_ipaddr); + local_ipaddr = ntohl(ifa->ifa_address); i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, @@ -180,11 +179,10 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_OK; case NETDEV_UP: if (upper_dev) - local_ipaddr = - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; + local_ipaddr = ntohl( + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); else - local_ipaddr = ifa->ifa_address; - local_ipaddr = ntohl(local_ipaddr); + local_ipaddr = ntohl(ifa->ifa_address); i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, @@ -194,12 +192,11 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, case NETDEV_CHANGEADDR: /* Add the address to the IP table */ if (upper_dev) - local_ipaddr = - ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; + local_ipaddr = ntohl( + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address); else - local_ipaddr = ifa->ifa_address; + local_ipaddr = ntohl(ifa->ifa_address); - local_ipaddr = ntohl(local_ipaddr); i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, @@ -227,7 +224,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, struct net_device *netdev; struct i40iw_device *iwdev; struct i40iw_handler *hdl; - __be32 local_ipaddr6[4]; + u32 local_ipaddr6[4]; hdl = i40iw_find_netdev(event_netdev); if (!hdl) -- cgit v1.2.3-59-g8ed1b From fa4153796121e573be7f9b8f59ac7eb0694d1ac0 Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:08 -0500 Subject: RDMA/i40iw: Fix SD calculation for initial HMC creation Correct SD calculation by using base address returned from commit FPM. This alleviates any assumptions on resource ordering and alignment requirement. Also consolidate SD estimation code into i40iw_est_sd(). Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 93 +++++++++++++++++++++----------- drivers/infiniband/hw/i40iw/i40iw_type.h | 2 +- 2 files changed, 62 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 023a7ae3a330..2c4b4d072d6a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -114,16 +114,21 @@ static enum i40iw_status_code i40iw_cqp_poll_registers( * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer * @buf: ptr to fpm commit buffer * @info: ptr to i40iw_hmc_obj_info struct + * @sd: number of SDs for HMC objects * * parses fpm commit info and copy base value * of hmc objects in hmc_info */ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( u64 *buf, - struct i40iw_hmc_obj_info *info) + struct i40iw_hmc_obj_info *info, + u32 *sd) { u64 temp; + u64 size; + u64 base = 0; u32 i, j; + u32 k = 0; u32 low; /* copy base values in obj_info */ @@ -131,10 +136,20 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( i <= I40IW_HMC_IW_PBLE; i++, j += 8) { get_64bit_val(buf, j, &temp); info[i].base = RS_64_1(temp, 32) * 512; + if (info[i].base > base) { + base = info[i].base; + k = i; + } low = (u32)(temp); if (low) info[i].cnt = low; } + size = info[k].cnt * info[k].size + info[k].base; + if (size & 0x1FFFFF) + *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */ + else + *sd = (u32)(size >> 21); + return 0; } @@ -3206,7 +3221,7 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_ i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id); /* parse the fpm_commit_buf and fill hmc obj info */ - i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj); + i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt); mem_size = sizeof(struct i40iw_hmc_sd_entry) * (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index); ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size); @@ -3280,7 +3295,9 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev /* parse the fpm_commit_buf and fill hmc obj info */ if (!ret_code) - ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj); + ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, + hmc_info->hmc_obj, + &hmc_info->sd_table.sd_cnt); i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER", commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE); @@ -3527,6 +3544,40 @@ static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp) return I40IW_RING_FULL_ERR(cqp->sq_ring); } +/** + * i40iw_est_sd - returns approximate number of SDs for HMC + * @dev: sc device struct + * @hmc_info: hmc structure, size and count for HMC objects + */ +static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info) +{ + int i; + u64 size = 0; + u64 sd; + + for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++) + size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size; + + if (dev->is_pf) + size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size; + + if (size & 0x1FFFFF) + sd = (size >> 21) + 1; /* add 1 for remainder */ + else + sd = size >> 21; + + if (!dev->is_pf) { + /* 2MB alignment for VF PBLE HMC */ + size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size; + if (size & 0x1FFFFF) + sd += (size >> 21) + 1; /* add 1 for remainder */ + else + sd += size >> 21; + } + + return sd; +} + /** * i40iw_config_fpm_values - configure HMC objects * @dev: sc device struct @@ -3538,7 +3589,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_ u32 i, mem_size; u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted; u32 powerof2; - u64 sd_needed, bytes_needed; + u64 sd_needed; u32 loop_count = 0; struct i40iw_hmc_info *hmc_info; @@ -3556,23 +3607,15 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_ return ret_code; } - bytes_needed = 0; - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) { + for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt; - bytes_needed += - (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size); - i40iw_debug(dev, I40IW_DEBUG_HMC, - "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n", - __func__, i, hmc_info->hmc_obj[i].max_cnt, - hmc_info->hmc_obj[i].size); - } - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */ + sd_needed = i40iw_est_sd(dev, hmc_info); i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n", __func__, sd_needed, hmc_info->first_sd_index); i40iw_debug(dev, I40IW_DEBUG_HMC, - "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n", - __func__, bytes_needed, hmc_info->sd_table.sd_cnt, + "%s: sd count %d where max sd is %d\n", + __func__, hmc_info->sd_table.sd_cnt, hmc_fpm_misc->max_sds); qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt); @@ -3614,11 +3657,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_ hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted; /* How much memory is needed for all the objects. */ - bytes_needed = 0; - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) - bytes_needed += - (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size); - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; + sd_needed = i40iw_est_sd(dev, hmc_info); if ((loop_count > 1000) || ((!(loop_count % 10)) && (qpwanted > qpwantedoriginal * 2 / 3))) { @@ -3639,15 +3678,7 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_ pblewanted -= FPM_MULTIPLIER * 1000; } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000); - bytes_needed = 0; - for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) { - bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size); - i40iw_debug(dev, I40IW_DEBUG_HMC, - "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n", - __func__, i, hmc_info->hmc_obj[i].cnt, - hmc_info->hmc_obj[i].size); - } - sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up not truncate. */ + sd_needed = i40iw_est_sd(dev, hmc_info); i40iw_debug(dev, I40IW_DEBUG_HMC, "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n", @@ -3665,8 +3696,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_ return ret_code; } - hmc_info->sd_table.sd_cnt = (u32)sd_needed; - mem_size = sizeof(struct i40iw_hmc_sd_entry) * (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1); ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index e8c9491349e4..937b7ee7d69b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -1112,7 +1112,7 @@ struct i40iw_hmc_ops { enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *, struct i40iw_hmc_fpm_misc *); enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8); - enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *); + enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd); enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev, struct i40iw_hmc_create_obj_info *); enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev, -- cgit v1.2.3-59-g8ed1b From c2b75ef7dcb9cf5e237955b0d0fa48918978493d Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Mon, 18 Apr 2016 10:33:09 -0500 Subject: RDMA/i40iw: Adding queue drain functions Adding sq and rq drain functions, which block until all previously posted wr-s in the specified queue have completed. A completion object is signaled to unblock the thread, when the last cqe for the corresponding queue is processed. Signed-off-by: Mustafa Ismail Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 38 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/i40iw/i40iw_verbs.h | 2 ++ 2 files changed, 40 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 196d6f006392..bf4e1e39dedf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -789,6 +789,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(err_code); } } + init_completion(&iwqp->sq_drained); + init_completion(&iwqp->rq_drained); return &iwqp->ibqp; error: @@ -1581,6 +1583,32 @@ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_ne return ib_sg_to_pages(ibmr, sg, sg_nents, i40iw_set_page); } +/** + * i40iw_drain_sq - drain the send queue + * @ibqp: ib qp pointer + */ +static void i40iw_drain_sq(struct ib_qp *ibqp) +{ + struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct i40iw_sc_qp *qp = &iwqp->sc_qp; + + if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring)) + wait_for_completion(&iwqp->sq_drained); +} + +/** + * i40iw_drain_rq - drain the receive queue + * @ibqp: ib qp pointer + */ +static void i40iw_drain_rq(struct ib_qp *ibqp) +{ + struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct i40iw_sc_qp *qp = &iwqp->sc_qp; + + if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring)) + wait_for_completion(&iwqp->rq_drained); +} + /** * i40iw_hwreg_mr - send cqp command for memory registration * @iwdev: iwarp device @@ -2218,6 +2246,7 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, enum i40iw_status_code ret; struct i40iw_cq_uk *ukcq; struct i40iw_sc_qp *qp; + struct i40iw_qp *iwqp; unsigned long flags; iwcq = (struct i40iw_cq *)ibcq; @@ -2268,6 +2297,13 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle; entry->qp = (struct ib_qp *)qp->back_qp; entry->src_qp = cq_poll_info.qp_id; + iwqp = (struct i40iw_qp *)qp->back_qp; + if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) { + if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring)) + complete(&iwqp->sq_drained); + if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring)) + complete(&iwqp->rq_drained); + } entry->byte_len = cq_poll_info.bytes_xfered; entry++; cqe_count++; @@ -2514,6 +2550,8 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.query_device = i40iw_query_device; iwibdev->ibdev.create_ah = i40iw_create_ah; iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; + iwibdev->ibdev.drain_sq = i40iw_drain_sq; + iwibdev->ibdev.drain_rq = i40iw_drain_rq; iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 0acb6c8fe0f0..0069be8a5a38 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -170,5 +170,7 @@ struct i40iw_qp { struct i40iw_pbl *iwpbl; struct i40iw_dma_mem q2_ctx_mem; struct i40iw_dma_mem ietf_mem; + struct completion sq_drained; + struct completion rq_drained; }; #endif -- cgit v1.2.3-59-g8ed1b From 9510b0666eaeda032de89a54f112cb3e9f41b2c5 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 22 Apr 2016 14:14:23 -0500 Subject: RDMA/i40iw: Fixes for WQE alignment Invalidation after every WQE write is changed to invalidate only if required. NOPs are padded so that WQE writes are aligned to 64B boundary. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_d.h | 2 ++ drivers/infiniband/hw/i40iw/i40iw_uk.c | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index e8951a71cc13..bd942da91a27 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1512,6 +1512,8 @@ enum i40iw_alignment { I40IW_SD_BUF_ALIGNMENT = 0x100 }; +#define I40IW_WQE_SIZE_64 64 + #define I40IW_QP_WQE_MIN_SIZE 32 #define I40IW_QP_WQE_MAX_SIZE 128 diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 9e3a700d5a2d..6e0e3272e57f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -162,6 +162,17 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; } + + if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) { + i40iw_nop_1(qp); + I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); + if (ret_code) + return NULL; + *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); + if (!*wqe_idx) + qp->swqe_polarity = !qp->swqe_polarity; + } + for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) { I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code); if (ret_code) @@ -172,8 +183,11 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); wqe_0 = qp->sq_base[peek_head].elem; - if (peek_head & 0x3) - wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); + + if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) { + if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity) + wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID); + } qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id; qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size; -- cgit v1.2.3-59-g8ed1b From 8e9f04a7c744bb18c193779d04cc5d8d4c21dd11 Mon Sep 17 00:00:00 2001 From: Chien Tin Tung Date: Fri, 22 Apr 2016 14:14:24 -0500 Subject: RDMA/i40iw: Correct STag mask to min of 14 bits STag index mask is calculated incorrectly, missing the 14 bits minimum requirement. Add max macro to use either # of MRs or 14 bits in the mask size calculation. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 615e115247b0..3ee0cad96bc6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -107,7 +107,8 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev) spin_lock_init(&iwdev->resource_lock); spin_lock_init(&iwdev->qptable_lock); - mrdrvbits = 24 - get_count_order(iwdev->max_mr); + /* stag index mask has a minimum of 14 bits */ + mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14); iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits)); return 0; } -- cgit v1.2.3-59-g8ed1b From 84a4c246639aab8cb4c28b4313a3c676fe5ea263 Mon Sep 17 00:00:00 2001 From: Mohammad Khan Date: Fri, 22 Apr 2016 14:14:25 -0500 Subject: RDMA/i40iw: Fix for a NOP WQE size Fix for filling in the WQE size for NOP Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 6e0e3272e57f..2cd9091abd3e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -56,6 +56,9 @@ static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp) wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring); wqe = qp->sq_base[wqe_idx].elem; + + qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE; + peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size; wqe_0 = qp->sq_base[peek_head].elem; if (peek_head) -- cgit v1.2.3-59-g8ed1b From df2d96c3d00413cbdd0d5e391aeba6eef806b88d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 22 Apr 2016 14:14:26 -0500 Subject: RDMA/i40iw: Fix for the size of kernel mode SQ Fix to calculate the SQ size based on the max frag_count, requested by the application instead of overwriting it with the max supported frag_count Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index bf4e1e39dedf..2d832c758c66 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -520,8 +520,6 @@ static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev, enum i40iw_status_code status; struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT; - sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1); rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1); -- cgit v1.2.3-59-g8ed1b From 6c2f76197db63e337fb60b16800f234f6428c32d Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 22 Apr 2016 14:14:27 -0500 Subject: RDMA/i40iw: Fix for using one sge for RDMA READ A check is added to validate the requested sge number. iWARP doesn't support multiple sg elements for RDMA READ work requests. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2d832c758c66..45f70f5e14a7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -74,7 +74,7 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_cqe = iwdev->max_cqe; props->max_mr = iwdev->max_mr; props->max_pd = iwdev->max_pd; - props->max_sge_rd = 1; + props->max_sge_rd = I40IW_MAX_SGE_RD; props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; @@ -2117,6 +2117,10 @@ static int i40iw_post_send(struct ib_qp *ibqp, inv_stag = true; /* fall-through*/ case IB_WR_RDMA_READ: + if (ib_wr->num_sge > I40IW_MAX_SGE_RD) { + err = -EINVAL; + break; + } info.op_type = I40IW_OP_TYPE_RDMA_READ; info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; -- cgit v1.2.3-59-g8ed1b From f8a4e76c75e572a8503410b8f863e7fa420236ba Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 22 Apr 2016 14:14:28 -0500 Subject: RDMA/i40iw: Fix for checking if the QP is destroyed Fix for checking if the QP associated with a completion has been destroyed while processing CQ elements. If that is the case, move the CQ head to the next element and continue completion processing. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_status.h | 1 + drivers/infiniband/hw/i40iw/i40iw_uk.c | 5 +++++ drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index b0110c15e044..91c421762f06 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -95,6 +95,7 @@ enum i40iw_status_code { I40IW_ERR_INVALID_MAC_ADDR = -65, I40IW_ERR_BAD_STAG = -66, I40IW_ERR_CQ_COMPL_ERROR = -67, + I40IW_ERR_QUEUE_DESTROYED = -68 }; #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 2cd9091abd3e..e35faea88c13 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -802,6 +802,10 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ); qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx; + if (!qp) { + ret_code = I40IW_ERR_QUEUE_DESTROYED; + goto exit; + } wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX); info->qp_handle = (i40iw_qp_handle)(unsigned long)qp; @@ -859,6 +863,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, ret_code = 0; +exit: if (!ret_code && (info->comp_status == I40IW_COMPL_STATUS_FLUSHED)) if (pring && (I40IW_RING_MORE_WORK(*pring))) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 45f70f5e14a7..eaa79c9fc821 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2259,6 +2259,8 @@ static int i40iw_poll_cq(struct ib_cq *ibcq, ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true); if (ret == I40IW_ERR_QUEUE_EMPTY) { break; + } else if (ret == I40IW_ERR_QUEUE_DESTROYED) { + continue; } else if (ret) { if (!cqe_count) cqe_count = -1; -- cgit v1.2.3-59-g8ed1b From ccea5f0f01797a0c0b6cba8176ecda3b10ca8534 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 22 Apr 2016 14:14:29 -0500 Subject: RDMA/i40iw: Fix for removing quad hash entries Fix for removing a quad hash entry when the corresponding quad hash entry hasn't been added, which is the case in loopback connections Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 8cb4b874ccd8..d2fa72516960 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3729,6 +3729,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; + bool qhash_set = false; int apbvt_set = 0; enum i40iw_status_code status; @@ -3787,6 +3788,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) true); if (status) return -EINVAL; + qhash_set = true; } status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD); if (status) { @@ -3814,7 +3816,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->ord_size = 1; cm_node->apbvt_set = apbvt_set; - cm_node->qhash_set = true; + cm_node->qhash_set = qhash_set; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; -- cgit v1.2.3-59-g8ed1b From 9dec900c20d95ef1f3c40bc5d5901499f5d63381 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:29 +0530 Subject: RDMA/iw_cxgb4: release ep resources on accept arp failure If ARP fails before the CPL_PASS_ACCEPT_RPL is seen by hardware, the tid will be stuck in SYN_PEND and never released. So create an arp failure handler specifically for this message to release the endpoint resources. In pass_accept_rpl_arp_failure(), put the parent endpoint so it will be freed when destroyed. Also we don't need to call release_tid() here because _c4iw_free_ep() calls cxgb4_remove_tid() which releases the hwtid. If we get an ABORT_REQ_RSS instead of a PASS_ESTABLISH (because the peer's ACK to our SYN is never received), then put the parent as well in peer_abort(). Treat accept_cr() failures just like arp failures: put the parent ep and release the ep resources destroying the tid The ARP failure handlers are called in an atomic context, so we need to schedule some of the processing which might block. Namely _c4iw_free_ep() which needs a mutex. So create a "special" CPL opcode and handler and schedule it via sched() to be run by process_work() in a blockable context. Also rework the active open arp failure handler to make use of release_ep_resources(). This allows both the active and passive arp failure handlers to use the same deferred cleanup function. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 82 ++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 651711370d55..49784a40d1d1 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -145,6 +145,7 @@ static struct sk_buff_head rxq; static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); static void ep_timeout(unsigned long arg); static void connect_reply_upcall(struct c4iw_ep *ep, int status); +static int sched(struct c4iw_dev *dev, struct sk_buff *skb); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; @@ -295,7 +296,7 @@ void _c4iw_free_ep(struct kref *kref) struct c4iw_ep *ep; ep = container_of(kref, struct c4iw_ep, com.kref); - PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]); if (test_bit(QP_REFERENCED, &ep->com.flags)) deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { @@ -432,10 +433,57 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, static void arp_failure_discard(void *handle, struct sk_buff *skb) { - PDBG("%s c4iw_dev %p\n", __func__, handle); + pr_err(MOD "ARP failure\n"); kfree_skb(skb); } +enum { + NUM_FAKE_CPLS = 1, + FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, +}; + +static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); + release_ep_resources(ep); + return 0; +} + +/* + * Fake up a special CPL opcode and call sched() so process_work() will call + * _put_ep_safe() in a safe context to free the ep resources. This is needed + * because ARP error handlers are called in an ATOMIC context, and + * _c4iw_free_ep() needs to block. + */ +static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct cpl_act_establish *rpl = cplhdr(skb); + + /* Set our special ARP_FAILURE opcode */ + rpl->ot.opcode = FAKE_CPL_PUT_EP_SAFE; + + /* + * Save ep in the skb->cb area, after where sched() will save the dev + * ptr. + */ + *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep; + sched(ep->com.dev, skb); +} + +/* Handle an ARP failure for an accept */ +static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_ep *ep = handle; + + pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n", + ep->hwtid); + + __state_set(&ep->com, DEAD); + queue_arp_failure_cpl(ep, skb); +} + /* * Handle an ARP failure for an active open. */ @@ -444,9 +492,8 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) struct c4iw_ep *ep = handle; printk(KERN_ERR MOD "ARP failure during connect\n"); - kfree_skb(skb); connect_reply_upcall(ep, -EHOSTUNREACH); - state_set(&ep->com, DEAD); + __state_set(&ep->com, DEAD); if (ep->com.remote_addr.ss_family == AF_INET6) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr; @@ -455,9 +502,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) } remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); - dst_release(ep->dst); - cxgb4_l2t_release(ep->l2t); - c4iw_put_ep(&ep->com); + queue_arp_failure_cpl(ep, skb); } /* @@ -2198,8 +2243,8 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, - struct cpl_pass_accept_req *req) +static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, + struct cpl_pass_accept_req *req) { struct cpl_pass_accept_rpl *rpl; unsigned int mtu_idx; @@ -2287,10 +2332,9 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, rpl->opt0 = cpu_to_be64(opt0); rpl->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure); - return; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) @@ -2469,8 +2513,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); - accept_cr(child_ep, skb, req); - set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); + if (accept_cr(child_ep, skb, req)) { + c4iw_put_ep(&parent_ep->com); + release_ep_resources(child_ep); + } else { + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); + } if (iptype == 6) { sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], @@ -2633,6 +2681,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CONNECTING: + c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_WAIT: (void)stop_ep_timer(ep); @@ -3809,7 +3858,7 @@ reject: * These are the real handlers that are called from a * work queue. */ -static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { +static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { [CPL_ACT_ESTABLISH] = act_establish, [CPL_ACT_OPEN_RPL] = act_open_rpl, [CPL_RX_DATA] = rx_data, @@ -3825,7 +3874,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, [CPL_FW6_MSG] = deferred_fw6_msg, - [CPL_RX_PKT] = rx_pkt + [CPL_RX_PKT] = rx_pkt, + [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe }; static void process_timeout(struct c4iw_ep *ep) -- cgit v1.2.3-59-g8ed1b From 88bc230dc614b8e19000022d0ae2c1dfd578a0b0 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:30 +0530 Subject: RDMA/iw_cxgb4: stop ep timer on close failure In c4iw_ep_disconnect(), if we start the ep timer to begin a close, but send_halfclose() fails, we need to stop the timer and send a CLOSE event up to the IWCM before releasing the resources. Otherwise, we can crash when the ep timer fires if the ep is referencing a previous instance of the device. This can happen as part of adapter reset/recovery, for instance. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 49784a40d1d1..cc9836e46800 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3467,8 +3467,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); } - if (ret) + if (ret) { + if (!abrupt) { + stop_ep_timer(ep); + close_complete_upcall(ep, -EIO); + } fatal = 1; + } } mutex_unlock(&ep->com.mutex); if (fatal) -- cgit v1.2.3-59-g8ed1b From 6e410d8f7175caf2316c515f1ea0bf80d33b3158 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:31 +0530 Subject: RDMA/iw_cxgb4: ensure eps don't get freed while the mutex is held In rx_data(), with the ep in FPDU_MODE, refcnt=2, if we get unexpected streaming data, we call c4iw_modify_rc_qp() and move the qp from RTS -> TERMINATE. In c4iw_modify_rc_qp(), if rdma_fini() returns an error, the ep will be dereferenced (refcnt=1). Then rx_data() calls c4iw_ep_disconnect() which starts the close operation. But if send_halfclose() fails in c4iw_ep_disconnect(), we will call release_ep_resources() derefing the ep which reduces the refcnt to 0 and and frees the ep. However we still has the ep mutex at that point, so we have a touch-after-free bug. There is a similar issue where peer_close() calls c4iw_ep_disconnect(). The solution is to add a reference to the ep in c4iw_ep_disconnect() after acquiring the mutex, and release it after releasing the mutex. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cc9836e46800..12eac98661c1 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3416,6 +3416,12 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); + /* + * Ref the ep here in case we have fatal errors causing the + * ep to be released and freed. + */ + c4iw_get_ep(&ep->com); + rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; @@ -3476,6 +3482,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) } } mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); if (fatal) release_ep_resources(ep); return ret; -- cgit v1.2.3-59-g8ed1b From f8e1e1d13773e1bcad127cbb5be964d00ee1f682 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:32 +0530 Subject: RDMA/iw_cxgb4: remove connection abort from process_mpa_reply Instead, have the caller, rx_data() handle the close/abort like it does for process_mpa_request(). This is part of getting rid of abort_connection() altogether so we properly clean up on send_abort() failures. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 12eac98661c1..c4ce707d210b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1346,6 +1346,18 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) #define RELAXED_IRD_NEGOTIATION 1 +/* + * process_mpa_reply - process streaming mode MPA reply + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; @@ -1575,8 +1587,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) } goto out; err: - __state_set(&ep->com, ABORTING); - send_abort(ep, skb, GFP_KERNEL); + disconnect = 2; out: connect_reply_upcall(ep, err); return disconnect; -- cgit v1.2.3-59-g8ed1b From fef4422d00c135da4300d7d58e62cd0afe2af730 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:33 +0530 Subject: RDMA/iw_cxgb4: free resources when send_flowc() fails Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c4ce707d210b..864da9dec9f6 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -519,7 +519,7 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) c4iw_ofld_send(rdev, skb); } -static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) { unsigned int flowclen = 80; struct fw_flowc_wr *flowc; @@ -575,7 +575,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - c4iw_ofld_send(&ep->com.dev->rdev, skb); + return c4iw_ofld_send(&ep->com.dev->rdev, skb); } static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) @@ -1119,6 +1119,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); struct tid_info *t = dev->rdev.lldi.tids; + int ret; ep = lookup_atid(t, atid); @@ -1144,13 +1145,20 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ - send_flowc(ep, NULL); + ret = send_flowc(ep, NULL); + if (ret) + goto err; if (ep->retry_with_mpa_v1) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); mutex_unlock(&ep->com.mutex); return 0; +err: + mutex_unlock(&ep->com.mutex); + connect_reply_upcall(ep, -ENOMEM); + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + return 0; } static void close_complete_upcall(struct c4iw_ep *ep, int status) @@ -2548,6 +2556,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_establish *req = cplhdr(skb); struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); + int ret; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -2560,10 +2569,14 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); + mutex_lock(&ep->com.mutex); + ep->com.state = MPA_REQ_WAIT; start_ep_timer(ep); - send_flowc(ep, skb); set_bit(PASS_ESTAB, &ep->com.history); + ret = send_flowc(ep, skb); + mutex_unlock(&ep->com.mutex); + if (ret) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); return 0; } -- cgit v1.2.3-59-g8ed1b From eaf4c6d46a6948302b64be2b7149cce22131ee0d Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:34 +0530 Subject: RDMA/iw_cxgb4: remove abort_connection() usage from accept/reject Use c4iw_ep_disconnect() instead. This is part of getting rid of abort_connection() altogether so we properly clean up on send_abort() failures. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 864da9dec9f6..d862369b5dd7 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2922,14 +2922,14 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) set_bit(ULP_REJECT, &ep->com.history); BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); + disconnect = 2; else { err = send_mpa_reject(ep, pdata, pdata_len); disconnect = 1; } mutex_unlock(&ep->com.mutex); if (disconnect) - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2942,13 +2942,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_ep *ep = to_ep(cm_id); struct c4iw_dev *h = to_c4iw_dev(cm_id->device); struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); + int abort = 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); if (ep->com.state == DEAD) { err = -ECONNRESET; - goto err; + goto err_out; } BUG_ON(ep->com.state != MPA_REQ_RCVD); @@ -2957,9 +2958,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) || (conn_param->ird > cur_max_read_depth(ep->com.dev))) { - abort_connection(ep, NULL, GFP_KERNEL); err = -EINVAL; - goto err; + goto err_abort; } if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -2971,9 +2971,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ord = conn_param->ord; send_mpa_reject(ep, conn_param->private_data, conn_param->private_data_len); - abort_connection(ep, NULL, GFP_KERNEL); err = -ENOMEM; - goto err; + goto err_abort; } } if (conn_param->ird < ep->ord) { @@ -2981,9 +2980,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ord <= h->rdev.lldi.max_ordird_qp) { conn_param->ird = ep->ord; } else { - abort_connection(ep, NULL, GFP_KERNEL); err = -ENOMEM; - goto err; + goto err_abort; } } } @@ -3024,23 +3022,26 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) - goto err1; + goto err_deref_cm_id; err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) - goto err1; + goto err_deref_cm_id; __state_set(&ep->com, FPDU_MODE); established_upcall(ep); mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; -err1: +err_deref_cm_id: ep->com.cm_id = NULL; - abort_connection(ep, NULL, GFP_KERNEL); cm_id->rem_ref(cm_id); -err: +err_abort: + abort = 1; +err_out: mutex_unlock(&ep->com.mutex); + if (abort) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); return err; } -- cgit v1.2.3-59-g8ed1b From fd6aabe48c8f76d31aacb55fc6c90af770632ae2 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:35 +0530 Subject: RDMA/iw_cxgb4: don't use abort_connection in process_mpa_request() Instead return whether the caller needs to disconnect. This is part of getting rid of abort_connection() altogether so we properly clean up on send_abort() failures. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 64 ++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d862369b5dd7..44e0bc409d59 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1601,7 +1601,19 @@ out: return disconnect; } -static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) +/* + * process_mpa_request - process streaming mode MPA request + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ +static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1613,11 +1625,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If we get more than the supported amount of private data * then we must fail this connection. */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - (void)stop_ep_timer(ep); - abort_connection(ep, skb, GFP_KERNEL); - return; - } + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) + goto err_stop_timer; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); @@ -1633,7 +1642,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); mpa = (struct mpa_message *) ep->mpa_pkt; @@ -1644,43 +1653,32 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); - (void)stop_ep_timer(ep); - abort_connection(ep, skb, GFP_KERNEL); - return; + goto err_stop_timer; } - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - (void)stop_ep_timer(ep); - abort_connection(ep, skb, GFP_KERNEL); - return; - } + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) + goto err_stop_timer; plen = ntohs(mpa->private_data_size); /* * Fail if there's too much private data. */ - if (plen > MPA_MAX_PRIVATE_DATA) { - (void)stop_ep_timer(ep); - abort_connection(ep, skb, GFP_KERNEL); - return; - } + if (plen > MPA_MAX_PRIVATE_DATA) + goto err_stop_timer; /* * If plen does not account for pkt size */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - (void)stop_ep_timer(ep); - abort_connection(ep, skb, GFP_KERNEL); - return; - } + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) + goto err_stop_timer; ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; /* * If we get here we have accumulated the entire mpa @@ -1742,13 +1740,21 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) SINGLE_DEPTH_NESTING); if (ep->parent_ep->com.state != DEAD) { if (connect_request_upcall(ep)) - abort_connection(ep, skb, GFP_KERNEL); + goto err_unlock_parent; } else { - abort_connection(ep, skb, GFP_KERNEL); + goto err_unlock_parent; } mutex_unlock(&ep->parent_ep->com.mutex); } - return; + return 0; + +err_unlock_parent: + mutex_unlock(&ep->parent_ep->com.mutex); + goto err_out; +err_stop_timer: + (void)stop_ep_timer(ep); +err_out: + return 2; } static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) -- cgit v1.2.3-59-g8ed1b From c00dcbafac39760f567350ce0c1cef1e4bb28a64 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:36 +0530 Subject: RDMA/iw_cxgb4: move QP -> ERROR on fatal disconnect errors In c4iw_ep_disconnect(), if we fail to initiate a close operation, then move the qp to ERROR to disassociate the ep from the qp. Failure to do this will leak the ep resources. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 44e0bc409d59..aea69ca495f3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3509,6 +3509,19 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) stop_ep_timer(ep); close_complete_upcall(ep, -EIO); } + if (ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + pr_err(MOD + "%s - qp <- error failed!\n", + __func__); + } fatal = 1; } } -- cgit v1.2.3-59-g8ed1b From 6973627968acbdf7d6f45a4c4813d46bf8e2a66a Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Thu, 5 May 2016 01:27:37 +0530 Subject: RDMA/iw_cxgb4: remove abort_connection() usage from ep_timeout() Use c4iw_ep_disconnect() instead. This is part of getting rid of abort_connection() altogether so we properly clean up on send_abort() failures. This is the last user of abort_connection(), so remove it too. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index aea69ca495f3..d7f7ab34eeba 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1179,14 +1179,6 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) } } -static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - __state_set(&ep->com, ABORTING); - set_bit(ABORT_CONN, &ep->com.history); - return send_abort(ep, skb, gfp); -} - static void peer_close_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; @@ -3977,9 +3969,9 @@ static void process_timeout(struct c4iw_ep *ep) __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - if (abort) - abort_connection(ep, NULL, GFP_KERNEL); mutex_unlock(&ep->com.mutex); + if (abort) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); } -- cgit v1.2.3-59-g8ed1b From ff2ba9936591a1364ae21adf18366dca7608395a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 May 2016 18:01:04 +0200 Subject: IB/core: Add passing an offset into the SG to ib_map_mr_sg Signed-off-by: Christoph Hellwig Tested-by: Steve Wise Reviewed-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 24 ++++++++++++------------ drivers/infiniband/hw/cxgb3/iwch_provider.c | 7 +++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 5 ++--- drivers/infiniband/hw/cxgb4/mem.c | 7 +++---- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 5 +++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 5 ++--- drivers/infiniband/hw/mlx4/mr.c | 7 +++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++--- drivers/infiniband/hw/mlx5/mr.c | 21 ++++++++++++--------- drivers/infiniband/hw/nes/nes_verbs.c | 7 +++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 7 +++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 5 ++--- drivers/infiniband/ulp/iser/iser_memory.c | 4 ++-- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_verbs.h | 23 +++++++++-------------- net/rds/ib_frmr.c | 2 +- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 19 files changed, 66 insertions(+), 76 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b65b3541e732..73fa9da1d084 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1597,6 +1597,7 @@ EXPORT_SYMBOL(ib_set_vf_guid); * @mr: memory region * @sg: dma mapped scatterlist * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg * @page_size: page vector desired page size * * Constraints: @@ -1615,17 +1616,15 @@ EXPORT_SYMBOL(ib_set_vf_guid); * After this completes successfully, the memory region * is ready for registration. */ -int ib_map_mr_sg(struct ib_mr *mr, - struct scatterlist *sg, - int sg_nents, - unsigned int page_size) +int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset, unsigned int page_size) { if (unlikely(!mr->device->map_mr_sg)) return -ENOSYS; mr->page_size = page_size; - return mr->device->map_mr_sg(mr, sg, sg_nents); + return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg); @@ -1635,6 +1634,7 @@ EXPORT_SYMBOL(ib_map_mr_sg); * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg + * @sg_offset: offset in bytes into sg * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest @@ -1645,10 +1645,8 @@ EXPORT_SYMBOL(ib_map_mr_sg); * Returns the number of sg elements that were assigned to * a page vector. */ -int ib_sg_to_pages(struct ib_mr *mr, - struct scatterlist *sgl, - int sg_nents, - int (*set_page)(struct ib_mr *, u64)) +int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, + unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)) { struct scatterlist *sg; u64 last_end_dma_addr = 0; @@ -1656,12 +1654,12 @@ int ib_sg_to_pages(struct ib_mr *mr, u64 page_mask = ~((u64)mr->page_size - 1); int i, ret; - mr->iova = sg_dma_address(&sgl[0]); + mr->iova = sg_dma_address(&sgl[0]) + sg_offset; mr->length = 0; for_each_sg(sgl, sg, sg_nents, i) { - u64 dma_addr = sg_dma_address(sg); - unsigned int dma_len = sg_dma_len(sg); + u64 dma_addr = sg_dma_address(sg) + sg_offset; + unsigned int dma_len = sg_dma_len(sg) - sg_offset; u64 end_dma_addr = dma_addr + dma_len; u64 page_addr = dma_addr & page_mask; @@ -1694,6 +1692,8 @@ next_page: mr->length += dma_len; last_end_dma_addr = end_dma_addr; last_page_off = end_dma_addr & ~page_mask; + + sg_offset = 0; } return i; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 3234a8be16f6..608aa0c16dc3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -783,15 +783,14 @@ static int iwch_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -static int iwch_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned sg_offset) { struct iwch_mr *mhp = to_iwch_mr(ibmr); mhp->npages = 0; - return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); } static int iwch_destroy_qp(struct ib_qp *ib_qp) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index df43f871ab61..067cb3f909c1 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -917,9 +917,8 @@ void c4iw_qp_rem_ref(struct ib_qp *qp); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -int c4iw_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents); +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 008be07d5604..38afb3d2dd92 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -690,15 +690,14 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -int c4iw_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset) { struct c4iw_mr *mhp = to_c4iw_mr(ibmr); mhp->mpl_len = 0; - return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page); } int c4iw_dereg_mr(struct ib_mr *ib_mr) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index eaa79c9fc821..825430e376fc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1573,12 +1573,13 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @sg: scatter gather list for fmr * @sg_nents: number of sg pages */ -static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents) +static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int sg_offset) { struct i40iw_mr *iwmr = to_iwmr(ibmr); iwmr->npages = 0; - return ib_sg_to_pages(ibmr, sg, sg_nents, i40iw_set_page); + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page); } /** diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1eca01cebe51..ba328177eae9 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -717,9 +717,8 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents); +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index ce0b5aa8eb9b..b04f6238e7e2 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -528,9 +528,8 @@ static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int rc; @@ -541,7 +540,7 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, sizeof(u64) * mr->max_pages, DMA_TO_DEVICE); - rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); + rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->page_map, sizeof(u64) * mr->max_pages, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b46c25542a7c..8c835b2be39e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -712,9 +712,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents); +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4d5bff151cdf..b678eac0f8b3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1751,24 +1751,27 @@ done: static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, - unsigned short sg_nents) + unsigned short sg_nents, + unsigned int sg_offset) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; u32 lkey = mr->ibmr.pd->local_dma_lkey; int i; - mr->ibmr.iova = sg_dma_address(sg); + mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { if (unlikely(i > mr->max_descs)) break; - klms[i].va = cpu_to_be64(sg_dma_address(sg)); - klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); + klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); + klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); klms[i].key = cpu_to_be32(lkey); mr->ibmr.length += sg_dma_len(sg); + + sg_offset = 0; } return i; @@ -1788,9 +1791,8 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); int n; @@ -1802,9 +1804,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, DMA_TO_DEVICE); if (mr->access_mode == MLX5_ACCESS_MODE_KLM) - n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); else - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, + mlx5_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index fba69a39a7eb..698aab65a286 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -402,15 +402,14 @@ static int nes_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -static int nes_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int sg_offset) { struct nes_mr *nesmr = to_nesmr(ibmr); nesmr->npages = 0; - return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page); } /** diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index a8496a18e20d..9ddd55022baf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -3081,13 +3081,12 @@ static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -int ocrdma_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents) +int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset) { struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); mr->npages = 0; - return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 8b517fd36779..b290e5dfc5f1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -122,8 +122,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -int ocrdma_map_mr_sg(struct ib_mr *ibmr, - struct scatterlist *sg, - int sg_nents); +int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned sg_offset); #endif /* __OCRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 9a391cc5b9b3..44cc85f206f3 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -236,7 +236,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->size, iser_set_page); + mem->size, 0, iser_set_page); if (unlikely(plen < mem->size)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); @@ -446,7 +446,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); + n = ib_map_mr_sg(mr, mem->sg, mem->size, 0, SIZE_4K); if (unlikely(n != mem->size)) { iser_err("failed to map sg (%d/%d)\n", n, mem->size); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 411e4464ca23..a44a73639cba 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2461,7 +2461,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, wr = &inv_wr; } - n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE); + n = ib_map_mr_sg(mr, mem->sg, mem->nents, 0, PAGE_SIZE); if (unlikely(n != mem->nents)) { isert_err("failed to map mr sg (%d/%d)\n", n, mem->nents); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 427dee3a3ab0..412df56bd84f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1329,7 +1329,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size); + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, 0, dev->mr_page_size); if (unlikely(n < 0)) return n; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fb2cef4e9747..24d0d82a07b4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1827,7 +1827,8 @@ struct ib_device { u32 max_num_sg); int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, - int sg_nents); + int sg_nents, + unsigned sg_offset); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); @@ -3111,29 +3112,23 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); -int ib_map_mr_sg(struct ib_mr *mr, - struct scatterlist *sg, - int sg_nents, - unsigned int page_size); +int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset, unsigned int page_size); static inline int -ib_map_mr_sg_zbva(struct ib_mr *mr, - struct scatterlist *sg, - int sg_nents, - unsigned int page_size) +ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, + unsigned int sg_offset, unsigned int page_size) { int n; - n = ib_map_mr_sg(mr, sg, sg_nents, page_size); + n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size); mr->iova = 0; return n; } -int ib_sg_to_pages(struct ib_mr *mr, - struct scatterlist *sgl, - int sg_nents, - int (*set_page)(struct ib_mr *, u64)); +int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, + unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)); void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 93ff038ea9d1..d921adc62765 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -111,7 +111,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE); + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE); if (unlikely(ret != ibmr->sg_len)) return ret < 0 ? ret : -EINVAL; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c250924a9fd3..3274a4a33231 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -421,7 +421,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, return -ENOMEM; } - n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", __func__, frmr->fr_mr, n, frmr->sg_nents); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 3b24a646eb46..19a74e95cd38 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -281,7 +281,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, } atomic_inc(&xprt->sc_dma_used); - n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", frmr->mr, n, frmr->sg_nents); -- cgit v1.2.3-59-g8ed1b From 9aa8b3217ed3c13d4e3496020b140da0e6f49a08 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 12 May 2016 10:49:15 -0700 Subject: IB/core: Enhance ib_map_mr_sg() The SRP initiator allows to set max_sectors to a value that exceeds the largest amount of data that can be mapped at once with an mlx4 HCA using fast registration and a page size of 4 KB. Hence modify ib_map_mr_sg() such that it can map partial sg-elements. If an sg-element has been mapped partially, let the caller know which fraction has been mapped by adjusting *sg_offset. Signed-off-by: Bart Van Assche Tested-by: Laurence Oberman Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/rw.c | 2 +- drivers/infiniband/core/verbs.c | 26 +++++++++++++++++++++----- drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 8 ++++++-- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 2 +- drivers/infiniband/ulp/iser/iser_memory.c | 4 ++-- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_verbs.h | 8 ++++---- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +- 18 files changed, 47 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 6fc50bf79afe..1eb9b1294a63 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -92,7 +92,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, reg->inv_wr.next = NULL; } - ret = ib_map_mr_sg(reg->mr, sg, nents, offset, PAGE_SIZE); + ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); if (ret < nents) { ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); return -EINVAL; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3d7b266a2dcb..1d7d4cf442e3 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(ib_set_vf_guid); * is ready for registration. */ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset, unsigned int page_size) + unsigned int *sg_offset, unsigned int page_size) { if (unlikely(!mr->device->map_mr_sg)) return -ENOSYS; @@ -1672,7 +1672,10 @@ EXPORT_SYMBOL(ib_map_mr_sg); * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg - * @sg_offset: offset in bytes into sg + * @sg_offset_p: IN: start offset in bytes into sg + * OUT: offset in bytes for element n of the sg of the first + * byte that has not been processed where n is the return + * value of this function. * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest @@ -1684,19 +1687,24 @@ EXPORT_SYMBOL(ib_map_mr_sg); * a page vector. */ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, - unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)) + unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) { struct scatterlist *sg; u64 last_end_dma_addr = 0; + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; unsigned int last_page_off = 0; u64 page_mask = ~((u64)mr->page_size - 1); int i, ret; + if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) + return -EINVAL; + mr->iova = sg_dma_address(&sgl[0]) + sg_offset; mr->length = 0; for_each_sg(sgl, sg, sg_nents, i) { u64 dma_addr = sg_dma_address(sg) + sg_offset; + u64 prev_addr = dma_addr; unsigned int dma_len = sg_dma_len(sg) - sg_offset; u64 end_dma_addr = dma_addr + dma_len; u64 page_addr = dma_addr & page_mask; @@ -1721,8 +1729,14 @@ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, do { ret = set_page(mr, page_addr); - if (unlikely(ret < 0)) - return i ? : ret; + if (unlikely(ret < 0)) { + sg_offset = prev_addr - sg_dma_address(sg); + mr->length += prev_addr - dma_addr; + if (sg_offset_p) + *sg_offset_p = sg_offset; + return i || sg_offset ? i : ret; + } + prev_addr = page_addr; next_page: page_addr += mr->page_size; } while (page_addr < end_dma_addr); @@ -1734,6 +1748,8 @@ next_page: sg_offset = 0; } + if (sg_offset_p) + *sg_offset_p = 0; return i; } EXPORT_SYMBOL(ib_sg_to_pages); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 608aa0c16dc3..47cb927a0dd6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -784,7 +784,7 @@ static int iwch_set_page(struct ib_mr *ibmr, u64 addr) } static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned sg_offset) + int sg_nents, unsigned int *sg_offset) { struct iwch_mr *mhp = to_iwch_mr(ibmr); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 067cb3f909c1..1ff3ba8ab67b 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -918,7 +918,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset); + unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 38afb3d2dd92..83960df6fe60 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -691,7 +691,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) } int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset) + unsigned int *sg_offset) { struct c4iw_mr *mhp = to_c4iw_mr(ibmr); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 825430e376fc..4a740f7a0519 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1574,7 +1574,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @sg_nents: number of sg pages */ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int sg_offset) + int sg_nents, unsigned int *sg_offset) { struct i40iw_mr *iwmr = to_iwmr(ibmr); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ba328177eae9..6c5ac5d8f32f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -718,7 +718,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset); + unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index b04f6238e7e2..631272172a0b 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -529,7 +529,7 @@ static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) } int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset) + unsigned int *sg_offset) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int rc; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8c835b2be39e..f05cf57f874c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -713,7 +713,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset); + unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b678eac0f8b3..8cf2ce50511f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1752,10 +1752,11 @@ static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, - unsigned int sg_offset) + unsigned int *sg_offset_p) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; + unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; u32 lkey = mr->ibmr.pd->local_dma_lkey; int i; @@ -1774,6 +1775,9 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } + if (sg_offset_p) + *sg_offset_p = sg_offset; + return i; } @@ -1792,7 +1796,7 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) } int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset) + unsigned int *sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); int n; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 698aab65a286..4ebea4c8c9b5 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -403,7 +403,7 @@ static int nes_set_page(struct ib_mr *ibmr, u64 addr) } static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int sg_offset) + int sg_nents, unsigned int *sg_offset) { struct nes_mr *nesmr = to_nesmr(ibmr); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9ddd55022baf..b1a3d91fe8b9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -3082,7 +3082,7 @@ static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) } int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset) + unsigned int *sg_offset) { struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b290e5dfc5f1..704ef1e9271b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -123,6 +123,6 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned sg_offset); + unsigned int *sg_offset); #endif /* __OCRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 44cc85f206f3..90be56893414 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -236,7 +236,7 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->size, 0, iser_set_page); + mem->size, NULL, iser_set_page); if (unlikely(plen < mem->size)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); @@ -446,7 +446,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->size, 0, SIZE_4K); + n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); if (unlikely(n != mem->size)) { iser_err("failed to map sg (%d/%d)\n", n, mem->size); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 6b9c5688e26a..54f4c1310897 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1329,7 +1329,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, 0, dev->mr_page_size); + n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, NULL, dev->mr_page_size); if (unlikely(n < 0)) { srp_fr_pool_put(ch->fr_pool, &desc, 1); pr_debug("%s: ib_map_mr_sg(%d) returned %d.\n", diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 544c55b4c84a..56bb0f39ce79 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1848,7 +1848,7 @@ struct ib_device { int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, - unsigned sg_offset); + unsigned int *sg_offset); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); @@ -3145,11 +3145,11 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, const struct sockaddr *addr); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset, unsigned int page_size); + unsigned int *sg_offset, unsigned int page_size); static inline int ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, - unsigned int sg_offset, unsigned int page_size) + unsigned int *sg_offset, unsigned int page_size) { int n; @@ -3160,7 +3160,7 @@ ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, } int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, - unsigned int sg_offset, int (*set_page)(struct ib_mr *, u64)); + unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64)); void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 3274a4a33231..94c3fa910b85 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -421,7 +421,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, return -ENOMEM; } - n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", __func__, frmr->fr_mr, n, frmr->sg_nents); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 19a74e95cd38..fbe7444e7de6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -281,7 +281,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, } atomic_inc(&xprt->sc_dma_used); - n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, 0, PAGE_SIZE); + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE); if (unlikely(n != frmr->sg_nents)) { pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", frmr->mr, n, frmr->sg_nents); -- cgit v1.2.3-59-g8ed1b From a647040ea85ae0bf5bd93f2a1b42dba57b0d0059 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 May 2016 14:07:23 +0200 Subject: i40e: constify i40e_client_ops structure The i40e_client_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_client.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 72a10a19880a..1f41b2c7333e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1892,7 +1892,7 @@ static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev, } /* client interface functions */ -static struct i40e_client_ops i40e_ops = { +static const struct i40e_client_ops i40e_ops = { .open = i40iw_open, .close = i40iw_close, .l2_param_change = i40iw_l2param_change, diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index bf6b453d93a1..a4601d97fb24 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -217,7 +217,7 @@ struct i40e_client { #define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) #define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) enum i40e_client_type type; - struct i40e_client_ops *ops; /* client ops provided by the client */ + const struct i40e_client_ops *ops; /* client ops provided by the client */ }; static inline bool i40e_client_is_registered(struct i40e_client *client) -- cgit v1.2.3-59-g8ed1b From dc1badf63026550ce8d099c3a4d1c803d23b038f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 May 2016 14:22:21 +0200 Subject: i40iw: constify i40iw_vf_cqp_ops structure The i40iw_vf_cqp_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_type.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 937b7ee7d69b..16cc61720b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -479,7 +479,7 @@ struct i40iw_sc_dev { struct i40iw_virt_mem ieq_mem; struct i40iw_puda_rsrc *ieq; - struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; + const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; u16 qs_handle; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c index cb0f18340e14..e33d4810965c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.c +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c @@ -80,6 +80,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, return 0; } -struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { +const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { i40iw_manage_vf_pble_bp }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h index f649f3a62e13..4359559ece9c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.h +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h @@ -57,6 +57,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, u64 scratch, bool post_sq); -extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops; +extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops; #endif -- cgit v1.2.3-59-g8ed1b From e381b3bbd7969a2bf134e10c1ead96644f985b10 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Mar 2016 11:31:26 +0200 Subject: i40iw: Remove unnecessary synchronize_irq() before free_irq() Calling synchronize_irq() right before free_irq() is quite useless. On one hand the IRQ can easily fire again before free_irq() is entered, on the other hand free_irq() itself calls synchronize_irq() internally (in a race condition free way), before any state associated with the IRQ is freed. Patch was generated using the following semantic patch: // @@ expression irq; @@ -synchronize_irq(irq); free_irq(irq, ...); // Signed-off-by: Lars-Peter Clausen Reviewed-by: Leon Romanovsky Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 1f41b2c7333e..c963cad92f5a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -270,7 +270,6 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); else i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); - synchronize_irq(msix_vec->irq); free_irq(msix_vec->irq, dev_id); } -- cgit v1.2.3-59-g8ed1b From e9bb8af98a981fe404010706a192ca0450a87760 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Mar 2016 12:14:46 +0100 Subject: i40iw: pass hw_stats by reference rather than by value passing hw_stats by value requires a 280 byte copy so instead pass it by reference is much more efficient. Signed-off-by: Colin Ian King Acked-by: Chien Tin Tung Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 4e1d7c665dc5..3041003c94d2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -254,7 +254,7 @@ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, - struct i40iw_dev_hw_stats hw_stats) + struct i40iw_dev_hw_stats *hw_stats) { enum i40iw_status_code ret_code; u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1]; @@ -264,7 +264,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx; vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer); vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS; - *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats; + *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats; ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer)); if (ret_code) i40iw_debug(dev, I40IW_DEBUG_VIRT, @@ -539,7 +539,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); vf_dev->msg_count--; - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats); + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); break; default: i40iw_debug(dev, I40IW_DEBUG_VIRT, -- cgit v1.2.3-59-g8ed1b From 9ca6f7cf9c9e2ae9a28618be320d09407c312a46 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:54 +0530 Subject: RDMA/iw_cxgb4: Add few history bits for ep - add EP_DISC_FAIL history bit - add QP_REFED/DEREFED history bits - Add functions to ref/deref the cm_id and add history bit for the same - add CLOSE_CON_RPL history Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 46 +++++++++++++++++++++------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 8 +++++- 2 files changed, 36 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d7f7ab34eeba..42983aadf57f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -150,15 +150,30 @@ static int sched(struct c4iw_dev *dev, struct sk_buff *skb); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; +static void deref_cm_id(struct c4iw_ep_common *epc) +{ + epc->cm_id->rem_ref(epc->cm_id); + epc->cm_id = NULL; + set_bit(CM_ID_DEREFED, &epc->history); +} + +static void ref_cm_id(struct c4iw_ep_common *epc) +{ + set_bit(CM_ID_REFED, &epc->history); + epc->cm_id->add_ref(epc->cm_id); +} + static void deref_qp(struct c4iw_ep *ep) { c4iw_qp_rem_ref(&ep->com.qp->ibqp); clear_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_DEREFED, &ep->com.history); } static void ref_qp(struct c4iw_ep *ep) { set_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } @@ -1173,8 +1188,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; + deref_cm_id(&ep->com); set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -1206,8 +1220,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) PDBG("abort delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; + deref_cm_id(&ep->com); set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -1250,10 +1263,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - } + if (status < 0) + deref_cm_id(&ep->com); } static int connect_request_upcall(struct c4iw_ep *ep) @@ -2818,6 +2829,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); + set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { case CLOSING: __state_set(&ep->com, MORIBUND); @@ -2998,8 +3010,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); ep->com.qp = qp; ref_qp(ep); @@ -3032,8 +3044,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) c4iw_put_ep(&ep->com); return 0; err_deref_cm_id: - ep->com.cm_id = NULL; - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); err_abort: abort = 1; err_out: @@ -3139,9 +3150,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (peer2peer && ep->ord == 0) ep->ord = 1; - cm_id->add_ref(cm_id); - ep->com.dev = dev; ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); + ep->com.dev = dev; ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); @@ -3248,7 +3259,7 @@ fail2: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail1: - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); out: return err; @@ -3342,8 +3353,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail1; } PDBG("%s ep %p\n", __func__, ep); - cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); ep->com.dev = dev; ep->backlog = backlog; memcpy(&ep->com.local_addr, &cm_id->m_local_addr, @@ -3383,7 +3394,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); fail1: out: @@ -3422,7 +3433,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); done: - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); return err; } @@ -3497,6 +3508,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) ret = send_halfclose(ep, gfp); } if (ret) { + set_bit(EP_DISC_FAIL, &ep->com.history); if (!abrupt) { stop_ep_timer(ep); close_complete_upcall(ep, -EIO); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index df43f871ab61..67710aeeb4f2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -779,7 +779,13 @@ enum c4iw_ep_history { EP_DISC_ABORT = 18, CONN_RPL_UPCALL = 19, ACT_RETRY_NOMEM = 20, - ACT_RETRY_INUSE = 21 + ACT_RETRY_INUSE = 21, + CLOSE_CON_RPL = 22, + EP_DISC_FAIL = 24, + QP_REFED = 25, + QP_DEREFED = 26, + CM_ID_REFED = 27, + CM_ID_DEREFED = 28, }; struct c4iw_ep_common { -- cgit v1.2.3-59-g8ed1b From ccd2c30b4513905a0624a696ec3eeaa3bf93530d Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:55 +0530 Subject: RDMA/iw_cxgb4: Correct RFC number of MPA Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 42983aadf57f..e953f8fd36a0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -119,7 +119,7 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " static int mpa_rev = 2; module_param(mpa_rev, int, 0644); MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" + "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft" " compliant (default=2)"); static int markers_enabled; -- cgit v1.2.3-59-g8ed1b From 92f850ec3a18d9d8bf2157a8509d435d49ce80b6 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:56 +0530 Subject: RDMA/iw_cxgb4: set the correct FID value in DSGL commands The FID value in a ULP_MEMIO command needs to be set to an IQ ID of a queue configured for our PF. The FID/IQ id is used to index into the PCIE FID table, to find out on which function the DMA needs to be issued. Essentially, every DMA needs to have the ingress queue. The exact ingress queue doesn't matter, but it needs to be an ingress queue associated with the function you want to see the DMA on. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/mem.c | 5 +++-- drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 008be07d5604..d495675ea68d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -86,8 +86,9 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, (wait ? FW_WR_COMPL_F : 0)); req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16))); - req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE)); - req->cmd |= cpu_to_be32(T5_ULP_MEMIO_ORDER_V(1)); + req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) | + T5_ULP_MEMIO_ORDER_V(1) | + T5_ULP_MEMIO_FID_V(rdev->lldi.rxq_ids[0])); req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(len>>5)); req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(addr)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 80417fc564d4..4705e2dea423 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -1392,6 +1392,10 @@ struct ulp_mem_io { #define T5_ULP_MEMIO_ORDER_V(x) ((x) << T5_ULP_MEMIO_ORDER_S) #define T5_ULP_MEMIO_ORDER_F T5_ULP_MEMIO_ORDER_V(1U) +#define T5_ULP_MEMIO_FID_S 4 +#define T5_ULP_MEMIO_FID_M 0x7ff +#define T5_ULP_MEMIO_FID_V(x) ((x) << T5_ULP_MEMIO_FID_S) + /* ulp_mem_io.lock_addr fields */ #define ULP_MEMIO_ADDR_S 0 #define ULP_MEMIO_ADDR_V(x) ((x) << ULP_MEMIO_ADDR_S) -- cgit v1.2.3-59-g8ed1b From 8d1f1a6b3fccfce5d95ee0d6456b1437e93f2bba Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:57 +0530 Subject: RDMA/iw_cxgb4: parent_ep has to be dereferenced in case of passive accept failure -> On passive side of connection parent_ep referenced during connection request has to be dereferenced during the passive accept failure. -> As passive accept failure error handlinglogic runs in atomic context, the parent ep is dereferenced by scheduling work request. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e953f8fd36a0..cae1794cef83 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -453,8 +453,9 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb) } enum { - NUM_FAKE_CPLS = 1, + NUM_FAKE_CPLS = 2, FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, + FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1, }; static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) @@ -466,18 +467,29 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + + ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); + c4iw_put_ep(&ep->parent_ep->com); + release_ep_resources(ep); + return 0; +} + /* * Fake up a special CPL opcode and call sched() so process_work() will call * _put_ep_safe() in a safe context to free the ep resources. This is needed * because ARP error handlers are called in an ATOMIC context, and * _c4iw_free_ep() needs to block. */ -static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb) +static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, + int cpl) { struct cpl_act_establish *rpl = cplhdr(skb); /* Set our special ARP_FAILURE opcode */ - rpl->ot.opcode = FAKE_CPL_PUT_EP_SAFE; + rpl->ot.opcode = cpl; /* * Save ep in the skb->cb area, after where sched() will save the dev @@ -496,7 +508,7 @@ static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb) ep->hwtid); __state_set(&ep->com, DEAD); - queue_arp_failure_cpl(ep, skb); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE); } /* @@ -517,7 +529,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) } remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); - queue_arp_failure_cpl(ep, skb); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); } /* @@ -3935,7 +3947,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = { [CPL_FW4_ACK] = fw4_ack, [CPL_FW6_MSG] = deferred_fw6_msg, [CPL_RX_PKT] = rx_pkt, - [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe + [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe, + [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe }; static void process_timeout(struct c4iw_ep *ep) -- cgit v1.2.3-59-g8ed1b From da1cecdffc13494bef012d598ed3dc1ed9572204 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:58 +0530 Subject: RDMA/iw_cxgb4: Do not stop timer in case of incomplete messages In case of incomplete mpa messages we should not stop timer as it results in return with timeout for the next mpa message Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index cae1794cef83..c7b3d5751963 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1395,21 +1395,13 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - /* - * Stop mpa timer. If it expired, then - * we ignore the MPA reply. process_timeout() - * will abort the connection. - */ - if (stop_ep_timer(ep)) - return 0; - /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { err = -EINVAL; - goto err; + goto err_stop_timer; } /* @@ -1431,11 +1423,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); err = -EPROTO; - goto err; + goto err_stop_timer; } if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { err = -EPROTO; - goto err; + goto err_stop_timer; } plen = ntohs(mpa->private_data_size); @@ -1445,7 +1437,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) */ if (plen > MPA_MAX_PRIVATE_DATA) { err = -EPROTO; - goto err; + goto err_stop_timer; } /* @@ -1453,7 +1445,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { err = -EPROTO; - goto err; + goto err_stop_timer; } ep->plen = (u8) plen; @@ -1467,9 +1459,17 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->flags & MPA_REJECT) { err = -ECONNREFUSED; - goto err; + goto err_stop_timer; } + /* + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. + */ + if (stop_ep_timer(ep)) + return 0; + /* * If we get here we have accumulated the entire mpa * start reply message including private data. And @@ -1609,6 +1609,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) goto out; } goto out; +err_stop_timer: + stop_ep_timer(ep); err: disconnect = 2; out: -- cgit v1.2.3-59-g8ed1b From e4b76a2a2619e95deb1ae2b088c0aa4f24a0bbee Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:17:59 +0530 Subject: RDMA/iw_cxgb4: stop_ep_timer() after MPA negotiation ->Stop the ep timer after MPA negotiation so that the arp failures during send_mpa_reply/reject will be handled by process_timeout() after the ep timer expires. ->Added case MPA_REP_SENT in process_timeout(). ->For MPA reject, c4iw_ep_disconnect tries to start an already started timer, which leads to warning message "timer already started". -> In case of mpa reject stop the timer and call send_mpa_reject(). -> Added new ep flag STOP_MPA_TIMER to tell fw4_ack() to stop the timer only for send_mpa_reply(), which is set in c4iw_accept_cr(). Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 39 +++++++++++++++++----------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c7b3d5751963..410154c86061 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1047,7 +1047,6 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) */ skb_get(skb); set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; ep->snd_seq += mpalen; @@ -1132,7 +1131,6 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) * Function fw4_ack() will deref it. */ skb_get(skb); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; __state_set(&ep->com, MPA_REP_SENT); ep->snd_seq += mpalen; @@ -1744,25 +1742,17 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - /* - * If the endpoint timer already expired, then we ignore - * the start request. process_timeout() will abort - * the connection. - */ - if (!stop_ep_timer(ep)) { - __state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - mutex_lock_nested(&ep->parent_ep->com.mutex, - SINGLE_DEPTH_NESTING); - if (ep->parent_ep->com.state != DEAD) { - if (connect_request_upcall(ep)) - goto err_unlock_parent; - } else { + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) goto err_unlock_parent; - } - mutex_unlock(&ep->parent_ep->com.mutex); + } else { + goto err_unlock_parent; } + mutex_unlock(&ep->parent_ep->com.mutex); return 0; err_unlock_parent: @@ -2926,6 +2916,10 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; + mutex_lock(&ep->com.mutex); + if (test_bit(STOP_MPA_TIMER, &ep->com.flags)) + stop_ep_timer(ep); + mutex_unlock(&ep->com.mutex); } return 0; } @@ -2952,8 +2946,10 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) disconnect = 1; } mutex_unlock(&ep->com.mutex); - if (disconnect) + if (disconnect) { + stop_ep_timer(ep); err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); + } c4iw_put_ep(&ep->com); return 0; } @@ -3047,6 +3043,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.qp, mask, &attrs, 1); if (err) goto err_deref_cm_id; + + set_bit(STOP_MPA_TIMER, &ep->com.flags); err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) @@ -3968,6 +3966,7 @@ static void process_timeout(struct c4iw_ep *ep) connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + case MPA_REP_SENT: __state_set(&ep->com, ABORTING); break; case CLOSING: diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 67710aeeb4f2..d4e2b519a192 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -755,6 +755,7 @@ enum c4iw_ep_flags { CLOSE_SENT = 3, TIMEOUT = 4, QP_REFERENCED = 5, + STOP_MPA_TIMER = 7, }; enum c4iw_ep_history { -- cgit v1.2.3-59-g8ed1b From caa6c9f247d64f7f7c183514d71113f472124f55 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:00 +0530 Subject: RDMA/iw_cxgb4: handle return value of c4iw_l2t_send() and send_mpa_req() ->In act_open_rpl(), CPL_ERR_TCAM_FULL error handling branch, there is no handling of the return value of send_fw_act_open_req(). ->In send_fw_act_open_req(), there is no handling of return value of c4iw_l2t_send(), which may cause a ep leak and won't notify upper layers on connection establish failure. ->send_mpa_req() should act on the return from c4iw_l2t_send() and return the error to the caller. ->In case of c4iw_l2t_send() failure in send_mpa_req(), returns without starting the timer and not changing the ep state, which is further handled by act_establish() -> In act_establish()?if send_mpa_request's get_skb returns an error, may cause an ep leak. So handle return value of send_mpa_req() Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 410154c86061..658ea16d3265 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -217,6 +217,8 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); if (error < 0) kfree_skb(skb); + else if (error == NET_XMIT_DROP) + return -ENOMEM; return error < 0 ? error : 0; } @@ -879,10 +881,10 @@ clip_release: return ret; } -static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, - u8 mpa_rev_to_use) +static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, + u8 mpa_rev_to_use) { - int mpalen, wrlen; + int mpalen, wrlen, ret; struct fw_ofld_tx_data_wr *req; struct mpa_message *mpa; struct mpa_v2_conn_params mpa_v2_params; @@ -898,7 +900,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, skb = get_skb(skb, wrlen, GFP_KERNEL); if (!skb) { connect_reply_upcall(ep, -ENOMEM); - return; + return -ENOMEM; } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); @@ -966,12 +968,14 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + if (ret) + return ret; start_ep_timer(ep); __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; ep->snd_seq += mpalen; - return; + return ret; } static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) @@ -1174,9 +1178,11 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) if (ret) goto err; if (ep->retry_with_mpa_v1) - send_mpa_req(ep, skb, 1); + ret = send_mpa_req(ep, skb, 1); else - send_mpa_req(ep, skb, mpa_rev); + ret = send_mpa_req(ep, skb, mpa_rev); + if (ret) + goto err; mutex_unlock(&ep->com.mutex); return 0; err: @@ -1850,7 +1856,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) { struct sk_buff *skb; struct fw_ofld_connection_wr *req; @@ -1920,7 +1926,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2); set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); set_bit(ACT_OFLD_CONN, &ep->com.history); - c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } /* @@ -2146,6 +2152,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct sockaddr_in *ra; struct sockaddr_in6 *la6; struct sockaddr_in6 *ra6; + int ret = 0; ep = lookup_atid(t, atid); la = (struct sockaddr_in *)&ep->com.local_addr; @@ -2181,9 +2188,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&dev->rdev.stats.lock); if (ep->com.local_addr.ss_family == AF_INET && dev->rdev.lldi.enable_fw_ofld_conn) { - send_fw_act_open_req(ep, - TID_TID_G(AOPEN_ATID_G( - ntohl(rpl->atid_status)))); + ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G( + ntohl(rpl->atid_status)))); + if (ret) + goto fail; return 0; } break; @@ -2223,6 +2231,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) break; } +fail: connect_reply_upcall(ep, status2errno(status)); state_set(&ep->com, DEAD); -- cgit v1.2.3-59-g8ed1b From 8a70f812b16ea4523938749a168817ffed232df9 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:01 +0530 Subject: RDMA/iw_cxgb4: in process_timeout() don't move ep state to ABORTING Moving the state to ABORTING causes the ep to get stuck because c4iw_ep_timeout() thinks the ABORT has already been done. So leave the state alone and let c4iw_ep_disconnect() do the right thing given the ep state. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 658ea16d3265..6c22bc95a112 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3971,12 +3971,10 @@ static void process_timeout(struct c4iw_ep *ep) set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: case MPA_REP_SENT: - __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: @@ -3986,7 +3984,6 @@ static void process_timeout(struct c4iw_ep *ep) ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - __state_set(&ep->com, ABORTING); close_complete_upcall(ep, -ETIMEDOUT); break; case ABORTING: -- cgit v1.2.3-59-g8ed1b From 761e19a504afa55ec0ede0ed490cddb99b2596a5 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:02 +0530 Subject: RDMA/iw_cxgb4: Handle return value of c4iw_ofld_send() in abort_arp_failure() In abort_arp_failure(), the return value from c4iw_ofld_send() is ignored and thus if the CPL isn't sent, the endpoint is stuck and never gets aborted. Failure of c4iw_ofld_send() is treated as fatal error, and the ep resources are released in a safer context through process_work(). Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6c22bc95a112..6129dbd0dc9e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -540,12 +540,18 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) */ static void abort_arp_failure(void *handle, struct sk_buff *skb) { - struct c4iw_rdev *rdev = handle; + int ret; + struct c4iw_ep *ep = handle; + struct c4iw_rdev *rdev = &ep->com.dev->rdev; struct cpl_abort_req *req = cplhdr(skb); PDBG("%s rdev %p\n", __func__, rdev); req->cmd = CPL_ABORT_NO_RST; - c4iw_ofld_send(rdev, skb); + ret = c4iw_ofld_send(rdev, skb); + if (ret) { + __state_set(&ep->com, DEAD); + queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); + } } static int send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) @@ -642,7 +648,7 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return -ENOMEM; } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure); + t4_set_arp_err_handler(skb, ep, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); -- cgit v1.2.3-59-g8ed1b From 944661dd97f4f257cd914fffec7eb80832ff9141 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:03 +0530 Subject: RDMA/iw_cxgb4: atomically lookup ep and get a reference There is a race between ULP threads calling c4iw_ep_disconnect() via c4iw_modify_rc_qp() and the ingress CPL thread where the ULP thread can free the endpoint just after the ingress CPL thread finds the ep pointer in the tid table. To avoid this, we now use the hwtid_idr table for lookups instead of the LLD tid table so we can lock around insert, remove, and lookup+get_ep to avoid the race. The CPL handlers now will either find the ep ptr and have a ref on it, or not find it and they can discard the CPL. Callers of get_ep_from_tid() will have a ref on the ep if found, and thus must deref when they are done. Negative advice in peer_abort_intr() need to dereference the ep. therefore peer_abort() is scheduled to dereference the ep later. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 110 +++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6129dbd0dc9e..c247812fd38c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -308,6 +308,40 @@ static void *alloc_ep(int size, gfp_t gfp) return epc; } +static void remove_ep_tid(struct c4iw_ep *ep) +{ + unsigned long flags; + + spin_lock_irqsave(&ep->com.dev->lock, flags); + _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + spin_unlock_irqrestore(&ep->com.dev->lock, flags); +} + +static void insert_ep_tid(struct c4iw_ep *ep) +{ + unsigned long flags; + + spin_lock_irqsave(&ep->com.dev->lock, flags); + _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0); + spin_unlock_irqrestore(&ep->com.dev->lock, flags); +} + +/* + * Atomically lookup the ep ptr given the tid and grab a reference on the ep. + */ +static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) +{ + struct c4iw_ep *ep; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + ep = idr_find(&dev->hwtid_idr, tid); + if (ep) + c4iw_get_ep(&ep->com); + spin_unlock_irqrestore(&dev->lock, flags); + return ep; +} + void _c4iw_free_ep(struct kref *kref) { struct c4iw_ep *ep; @@ -327,7 +361,6 @@ void _c4iw_free_ep(struct kref *kref) (const u32 *)&sin6->sin6_addr.s6_addr, 1); } - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -338,6 +371,15 @@ void _c4iw_free_ep(struct kref *kref) static void release_ep_resources(struct c4iw_ep *ep) { set_bit(RELEASE_RESOURCES, &ep->com.flags); + + /* + * If we have a hwtid, then remove it from the idr table + * so lookups will no longer find this endpoint. Otherwise + * we have a race where one thread finds the ep ptr just + * before the other thread is freeing the ep memory. + */ + if (ep->hwtid != -1) + remove_ep_tid(ep); c4iw_put_ep(&ep->com); } @@ -1167,7 +1209,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); - insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); + insert_ep_tid(ep); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -1782,11 +1824,10 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_rx_data *hdr = cplhdr(skb); unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); - struct tid_info *t = dev->rdev.lldi.tids; __u8 status = hdr->status; int disconnect = 0; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); if (!ep) return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); @@ -1826,6 +1867,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&ep->com.mutex); if (disconnect) c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + c4iw_put_ep(&ep->com); return 0; } @@ -1835,9 +1877,8 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_abort_rpl_rss *rpl = cplhdr(skb); int release = 0; unsigned int tid = GET_TID(rpl); - struct tid_info *t = dev->rdev.lldi.tids; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); if (!ep) { printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); return 0; @@ -1859,6 +1900,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (release) release_ep_resources(ep); + c4iw_put_ep(&ep->com); return 0; } @@ -2559,7 +2601,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); - insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); + insert_ep_tid(child_ep); if (accept_cr(child_ep, skb, req)) { c4iw_put_ep(&parent_ep->com); release_ep_resources(child_ep); @@ -2582,11 +2624,10 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_pass_establish *req = cplhdr(skb); - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); int ret; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -2605,6 +2646,7 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&ep->com.mutex); if (ret) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); + c4iw_put_ep(&ep->com); return 0; } @@ -2616,11 +2658,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; int disconnect = 1; int release = 0; - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(hdr); int ret; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); @@ -2692,6 +2736,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_ep_disconnect(ep, 0, GFP_KERNEL); if (release) release_ep_resources(ep); + c4iw_put_ep(&ep->com); return 0; } @@ -2704,10 +2749,12 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; int ret; int release = 0; - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; + if (is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, @@ -2716,7 +2763,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; mutex_unlock(&dev->rdev.stats.lock); - return 0; + goto deref_ep; } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); @@ -2782,7 +2829,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); mutex_unlock(&ep->com.mutex); - return 0; + goto deref_ep; default: BUG_ON(1); break; @@ -2829,6 +2876,10 @@ out: c4iw_reconnect(ep); } +deref_ep: + c4iw_put_ep(&ep->com); + /* Dereferencing ep, referenced in peer_abort_intr() */ + c4iw_put_ep(&ep->com); return 0; } @@ -2838,10 +2889,11 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; struct cpl_close_con_rpl *rpl = cplhdr(skb); int release = 0; - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(rpl); - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(!ep); @@ -2876,18 +2928,18 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_unlock(&ep->com.mutex); if (release) release_ep_resources(ep); + c4iw_put_ep(&ep->com); return 0; } static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_rdma_terminate *rpl = cplhdr(skb); - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(rpl); struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); BUG_ON(!ep); if (ep && ep->com.qp) { @@ -2898,6 +2950,7 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } else printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); + c4iw_put_ep(&ep->com); return 0; } @@ -2913,15 +2966,16 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_fw4_ack *hdr = cplhdr(skb); u8 credits = hdr->credits; unsigned int tid = GET_TID(hdr); - struct tid_info *t = dev->rdev.lldi.tids; - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); if (credits == 0) { PDBG("%s 0 credit ack ep %p tid %u state %u\n", __func__, ep, ep->hwtid, state_read(&ep->com)); - return 0; + goto out; } dst_confirm(ep->dst); @@ -2936,6 +2990,8 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) stop_ep_timer(ep); mutex_unlock(&ep->com.mutex); } +out: + c4iw_put_ep(&ep->com); return 0; } @@ -4142,10 +4198,10 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); - ep = lookup_tid(t, tid); + ep = get_ep_from_tid(dev, tid); + /* This EP will be dereferenced in peer_abort() */ if (!ep) { printk(KERN_WARNING MOD "Abort on non-existent endpoint, tid %d\n", tid); @@ -4156,10 +4212,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); - ep->stats.abort_neg_adv++; - dev->rdev.stats.neg_adv++; - kfree_skb(skb); - return 0; + goto out; } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); @@ -4174,6 +4227,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); } else c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); +out: sched(dev, skb); return 0; } -- cgit v1.2.3-59-g8ed1b From c878b706ffcb1f71908491dcadb2ff556fc86d95 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:04 +0530 Subject: RDMA/iw_cxgb4: Free skb in case of arp failure in _c4iw_free_ep() Arp failure for send_mpa_reply/reject() is handled by freeing the mpa_skb in c4iw_free_ep() before releasing ep. Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c247812fd38c..6557240cc6b9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -364,6 +364,8 @@ void _c4iw_free_ep(struct kref *kref) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); + if (ep->mpa_skb) + kfree_skb(ep->mpa_skb); } kfree(ep); } -- cgit v1.2.3-59-g8ed1b From ceb110a8c1518f54913568bf84f84df573db99e4 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:05 +0530 Subject: RDMA/iw_cxgb4: Release ep for for FPDU_MODE and MPA_REQ_RCVD in process_timeout ARP failure may also happen when ep in FPDU_MODE and these failures need to be handled by process_timeout(). process_timeout() also has to handle case MPA_REQ_RCVD, setting abort to 1, leading to ep resource release. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6557240cc6b9..a972067d94cb 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -4038,7 +4038,9 @@ static void process_timeout(struct c4iw_ep *ep) connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + case MPA_REQ_RCVD: case MPA_REP_SENT: + case FPDU_MODE: break; case CLOSING: case MORIBUND: -- cgit v1.2.3-59-g8ed1b From e8667a9b35c550e3daf4519058a52252bf41d9db Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:06 +0530 Subject: RDMA/iw_cxgb4: Handle ULP accept/reject during ABORTING c4iw_reject() and c4iw_accept() need to handle the case where the endpoint has timed out and is in the middle of ABORTING the connection. Here is the flow that causes the BUG_ON() to fire on the server side: 1) offload connection setup and endpoint timer started 2) MPA_START request received from peer, CONNECT_REQUEST passed to ULP 3) endpoint timer fires, and process_timeout() aborts the connection, this moves the endpoint state to ABORTING until HW sends up the ABORT_RPL_RSS. 4) application exits closing the CONNECT_REQUEST cm_id. The IWCM calls c4iw_reject_cr() to destroy this connection request. 5) WHAMO: BUG_ON() because the state is ABORTING. The fix is to change c4iw_reject_cr() and c4iw_accept_cr() to fail the operation if the state is not in MPA_REQ_RCVD vs in DEAD. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a972067d94cb..e8edfeea84f9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3005,13 +3005,12 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); - if (ep->com.state == DEAD) { + if (ep->com.state != MPA_REQ_RCVD) { mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) disconnect = 2; else { @@ -3040,12 +3039,11 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); mutex_lock(&ep->com.mutex); - if (ep->com.state == DEAD) { + if (ep->com.state != MPA_REQ_RCVD) { err = -ECONNRESET; goto err_out; } - BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); -- cgit v1.2.3-59-g8ed1b From f86fac79afecb41682886785364b15cb13f22280 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:07 +0530 Subject: RDMA/iw_cxgb4: atomic find and reference for listening endpoints Add get_ep_from_stid() which will atomically find and reference the endpoint struct if found. This avoids touch-after-free races between threads destroying listening endpoints and the CPL processing thread processing an incoming PASS_ACCEPT_REQ CPL. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e8edfeea84f9..4ee10547a4e4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -342,6 +342,23 @@ static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid) return ep; } +/* + * Atomically lookup the ep ptr given the stid and grab a reference on the ep. + */ +static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, + unsigned int stid) +{ + struct c4iw_listen_ep *ep; + unsigned long flags; + + spin_lock_irqsave(&dev->lock, flags); + ep = idr_find(&dev->stid_idr, stid); + if (ep) + c4iw_get_ep(&ep->com); + spin_unlock_irqrestore(&dev->lock, flags); + return ep; +} + void _c4iw_free_ep(struct kref *kref) { struct c4iw_ep *ep; @@ -2306,9 +2323,8 @@ fail: static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_pass_open_rpl *rpl = cplhdr(skb); - struct tid_info *t = dev->rdev.lldi.tids; unsigned int stid = GET_TID(rpl); - struct c4iw_listen_ep *ep = lookup_stid(t, stid); + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); if (!ep) { PDBG("%s stid %d lookup failure!\n", __func__, stid); @@ -2317,7 +2333,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); - + c4iw_put_ep(&ep->com); out: return 0; } @@ -2325,12 +2341,12 @@ out: static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); - struct tid_info *t = dev->rdev.lldi.tids; unsigned int stid = GET_TID(rpl); - struct c4iw_listen_ep *ep = lookup_stid(t, stid); + struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); PDBG("%s ep %p\n", __func__, ep); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + c4iw_put_ep(&ep->com); return 0; } @@ -2490,7 +2506,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) unsigned short hdrs; u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); - parent_ep = lookup_stid(t, stid); + parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!parent_ep) { PDBG("%s connect request on invalid stid %d\n", __func__, stid); goto reject; @@ -2618,6 +2634,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto out; reject: reject_cr(dev, hwtid, skb); + if (parent_ep) + c4iw_put_ep(&parent_ep->com); out: return 0; } @@ -3868,7 +3886,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_accept_req *req = (void *)(rss + 1); struct l2t_entry *e; struct dst_entry *dst; - struct c4iw_ep *lep; + struct c4iw_ep *lep = NULL; u16 window; struct port_info *pi; struct net_device *pdev; @@ -3893,7 +3911,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) */ stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); - lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid); if (!lep) { PDBG("%s connect request on invalid stid %d\n", __func__, stid); goto reject; @@ -3994,6 +4012,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) free_dst: dst_release(dst); reject: + if (lep) + c4iw_put_ep(&lep->com); return 0; } -- cgit v1.2.3-59-g8ed1b From 4a4dd8db9dc15579edc62631326f37c43fda0942 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:08 +0530 Subject: RDMA/iw_cxgb4: Handle ret value of process_mpa_reply() in rx_data Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4ee10547a4e4..0502fac4a214 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1864,7 +1864,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_WAIT: ep->rcv_seq += dlen; - process_mpa_request(ep, skb); + disconnect = process_mpa_request(ep, skb); break; case FPDU_MODE: { struct c4iw_qp_attributes attrs; @@ -1885,7 +1885,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) } mutex_unlock(&ep->com.mutex); if (disconnect) - c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } -- cgit v1.2.3-59-g8ed1b From 093108cb3640844cfdabb0f506fa6b592b64272d Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:09 +0530 Subject: RDMA/iw_cxgb4: Always wake up waiter in c4iw_peer_abort_intr() Currently c4iw_peer_abort_intr() does not wake up the waiter if the endpoint state indicates we're using MPAv2 and we're currently trying to connect. This was introduced with commit 7c0a33d61187a ("RDMA/cxgb4: Don't wakeup threads for MPAv2") However, this original fix is flawed because it introduces a race that can cause a deadlock of the iwarp stack. Here is the race: ->local side sets up an active offload connection. ->local side sends MPA_START request. ->peer sends MPA_START response. ->local side ingress cpl thread begins processing the MPA_START response, but before it changes the state from MPA_REQ_SENT to FPDU_MODE: ->peer sends a RST which results in a ABORT_REQ_RSS. This triggers peer_abort_intr() which sees the state in MPA_REQ_SENT and since mpa_rev is 2, it will avoid waking up the endpoint with -ECONNRESET, assuming the stack will re-attempt the connection using MPAv1. ->Meanwhile, the cpl thread moves the state to FPDU_MODE and calls c4iw_modify_rc_qp() which calls rdma_init() which sends a RI_WR/INIT WR to firmware. But since HW sent an abort, FW correctly drops the RI_WR/INIT WR. ->So the cpl thread is stuck waiting for a reply and cannot process the ABORT_REQ_RSS cpl sitting in its input queue. Thus everything comes to a halt because no more ingress cpls are processed by the stack... The correct fix for the issue is to always do the wake up in c4iw_abort_intr() but reinitialize the wait object in c4iw_reconnect(). Fixes: 7c0a33d61187a ("RDMA/cxgb4: Don't wakeup threads for MPAv2") Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0502fac4a214..4f8afa2b8e21 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2136,6 +2136,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); init_timer(&ep->timer); + c4iw_init_wr_wait(&ep->com.wr_wait); /* * Allocate an active TID to initiate a TCP connection. @@ -4239,16 +4240,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); - /* - * Wake up any threads in rdma_init() or rdma_fini(). - * However, if we are on MPAv2 and want to retry with MPAv1 - * then, don't wake up yet. - */ - if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { - if (ep->com.state != MPA_REQ_SENT) - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); - } else - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); out: sched(dev, skb); return 0; -- cgit v1.2.3-59-g8ed1b From 64bec74a9b3ccc0e9795161427ea49864462f612 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 6 May 2016 22:18:10 +0530 Subject: RDMA/iw_cxgb4: Add arp failure handlers to send_mpa_reply/reject() These handlers when called print error message to the kernel log, but the actual handling is done by _c4iw_free_ep() and process_timeout(). Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4f8afa2b8e21..c4b7f1f8f8a5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -515,6 +515,11 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb) kfree_skb(skb); } +static void mpa_start_arp_failure(void *handle, struct sk_buff *skb) +{ + pr_err("ARP failure during MPA Negotiation - Closing Connection\n"); +} + enum { NUM_FAKE_CPLS = 2, FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0, @@ -1118,6 +1123,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) */ skb_get(skb); set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; ep->snd_seq += mpalen; @@ -1202,6 +1208,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) * Function fw4_ack() will deref it. */ skb_get(skb); + t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure); ep->mpa_skb = skb; __state_set(&ep->com, MPA_REP_SENT); ep->snd_seq += mpalen; -- cgit v1.2.3-59-g8ed1b From ba987e51a63713669ce6bdbe9b120d72e59eec8e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 12 Apr 2016 14:45:24 -0700 Subject: iw_cxgb4: Convert a __force cast __force casts should be avoided if there is a better alternative. Hence modify the comparison of s_addr with INADDR_ANY such that the __force cast is no longer necessary. Signed-off-by: Bart Van Assche Cc: Steve Wise Cc: Vipul Pandya Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c4b7f1f8f8a5..a3a67216bce6 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3300,7 +3300,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* * Handle loopback requests to INADDR_ANY. */ - if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { + if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) goto fail1; -- cgit v1.2.3-59-g8ed1b From 2016de7ba013462d56fb344a5a8ed93f7a4c460a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 May 2016 14:22:21 +0200 Subject: i40iw: constify i40iw_vf_cqp_ops structure The i40iw_vf_cqp_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_type.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 937b7ee7d69b..16cc61720b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -479,7 +479,7 @@ struct i40iw_sc_dev { struct i40iw_virt_mem ieq_mem; struct i40iw_puda_rsrc *ieq; - struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; + const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; u16 qs_handle; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c index cb0f18340e14..e33d4810965c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.c +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c @@ -80,6 +80,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, return 0; } -struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { +const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { i40iw_manage_vf_pble_bp }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h index f649f3a62e13..4359559ece9c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.h +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h @@ -57,6 +57,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, u64 scratch, bool post_sq); -extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops; +extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops; #endif -- cgit v1.2.3-59-g8ed1b From 6d6c5c1eb39badb85be1df9b1d6aa5b345dac75f Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Mar 2016 11:31:26 +0200 Subject: i40iw: Remove unnecessary synchronize_irq() before free_irq() Calling synchronize_irq() right before free_irq() is quite useless. On one hand the IRQ can easily fire again before free_irq() is entered, on the other hand free_irq() itself calls synchronize_irq() internally (in a race condition free way), before any state associated with the IRQ is freed. Patch was generated using the following semantic patch: // @@ expression irq; @@ -synchronize_irq(irq); free_irq(irq, ...); // Signed-off-by: Lars-Peter Clausen Reviewed-by: Leon Romanovsky Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 72a10a19880a..53d4fd3ba1d0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -270,7 +270,6 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); else i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); - synchronize_irq(msix_vec->irq); free_irq(msix_vec->irq, dev_id); } -- cgit v1.2.3-59-g8ed1b From 78c49f83ee282dd08fd264fecb90f4f9e2f21a6d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Mar 2016 12:14:46 +0100 Subject: i40iw: pass hw_stats by reference rather than by value passing hw_stats by value requires a 280 byte copy so instead pass it by reference is much more efficient. Signed-off-by: Colin Ian King Acked-by: Chien Tin Tung Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 4e1d7c665dc5..3041003c94d2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -254,7 +254,7 @@ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, - struct i40iw_dev_hw_stats hw_stats) + struct i40iw_dev_hw_stats *hw_stats) { enum i40iw_status_code ret_code; u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1]; @@ -264,7 +264,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx; vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer); vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS; - *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats; + *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats; ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer)); if (ret_code) i40iw_debug(dev, I40IW_DEBUG_VIRT, @@ -539,7 +539,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); vf_dev->msg_count--; - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats); + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); break; default: i40iw_debug(dev, I40IW_DEBUG_VIRT, -- cgit v1.2.3-59-g8ed1b From 1997412db64bae810edd9ef77d62aefccf965e80 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 29 Mar 2016 12:58:37 -0500 Subject: RDMA/nes: Adding queue drain functions Adding sq and rq drain functions, which block until all previously posted wr-s in the specified queue have completed. A completion object is signaled to unblock the thread, when the last cqe for the corresponding queue is processed. Signed-off-by: Tatyana Nikolova Signed-off-by: Faisal Latif Reviewed-by: Steve Wise Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_verbs.c | 34 ++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/nes/nes_verbs.h | 2 ++ 2 files changed, 36 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index fba69a39a7eb..7394224e99b2 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1315,6 +1315,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); return ERR_PTR(-EINVAL); } + init_completion(&nesqp->sq_drained); + init_completion(&nesqp->rq_drained); nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); init_timer(&nesqp->terminate_timer); @@ -3452,6 +3454,29 @@ out: return err; } +/** + * nes_drain_sq - drain sq + * @ibqp: pointer to ibqp + */ +static void nes_drain_sq(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + + if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head) + wait_for_completion(&nesqp->sq_drained); +} + +/** + * nes_drain_rq - drain rq + * @ibqp: pointer to ibqp + */ +static void nes_drain_rq(struct ib_qp *ibqp) +{ + struct nes_qp *nesqp = to_nesqp(ibqp); + + if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head) + wait_for_completion(&nesqp->rq_drained); +} /** * nes_poll_cq @@ -3582,6 +3607,13 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) } } + if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) { + if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head) + complete(&nesqp->sq_drained); + if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head) + complete(&nesqp->rq_drained); + } + entry->wr_id = wrid; entry++; cqe_count++; @@ -3754,6 +3786,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; nesibdev->ibdev.post_send = nes_post_send; nesibdev->ibdev.post_recv = nes_post_recv; + nesibdev->ibdev.drain_sq = nes_drain_sq; + nesibdev->ibdev.drain_rq = nes_drain_rq; nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 70290883d067..e02a5662dc20 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -189,6 +189,8 @@ struct nes_qp { u8 pau_pending; u8 pau_state; __u64 nesuqp_addr; + struct completion sq_drained; + struct completion rq_drained; }; struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, -- cgit v1.2.3-59-g8ed1b From da74bf4aea3d9ec6cf653ad0014c13e9680f3903 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 Apr 2016 20:58:42 +0200 Subject: IB/nes: Deinline nes_free_qp_mem, save 1072 bytes This function compiles to 550 bytes of machine code. Three callsites, all in nes_create_qp. Signed-off-by: Denys Vlasenko CC: Faisal Latif CC: Doug Ledford CC: linux-rdma@vger.kernel.org CC: linux-kernel@vger.kernel.org Reviewed-By: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 7394224e99b2..9229f168eca4 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -981,7 +981,7 @@ static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic, /** * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. */ -static inline void nes_free_qp_mem(struct nes_device *nesdev, +static void nes_free_qp_mem(struct nes_device *nesdev, struct nes_qp *nesqp, int virt_wqs) { unsigned long flags; -- cgit v1.2.3-59-g8ed1b From aa70345369e251779c0372ef6dd2bd6325a3350c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 25 Apr 2016 20:26:50 +0100 Subject: IB/mlx4: trivial fix of spelling mistake on "argument" fix spelling mistake, argumant -> argument Signed-off-by: Colin Ian King Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f014eaf5969b..b01ef6eee6e8 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1601,7 +1601,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att else if (ret == -ENXIO) pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); else if (ret) - pr_err("Invalid argumant. Fail to register network rule.\n"); + pr_err("Invalid argument. Fail to register network rule.\n"); mlx4_free_cmd_mailbox(mdev->dev, mailbox); return ret; -- cgit v1.2.3-59-g8ed1b From faca88273b68b71a15749e04037a4d7ee98fff2d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 6 May 2016 20:19:42 +0300 Subject: RDMA/nes: replace custom print_hex_dump() There is no need to duplicate a lot of code that is in the kernel library for ages. Replace duplicating code by calling to print_hex_dump() directly. Note that output is slightly changed: - hex and ascii parts have just two spaces delimeter - there is no delimeter for ascii portions - file and line removed from prefix (they were redundant anyway since previous output shows same closer enough) Signed-off-by: Andy Shevchenko Reviewed-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_utils.c | 60 ++--------------------------------- 1 file changed, 3 insertions(+), 57 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 6d3a169c049b..37331e2fdc5f 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -903,70 +904,15 @@ void nes_clc(unsigned long parm) */ void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) { - char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 'a', 'b', 'c', 'd', 'e', 'f'}; - char *ptr; - char hex_buf[80]; - char ascii_buf[20]; - int num_char; - int num_ascii; - int num_hex; - if (!(nes_debug_level & dump_debug_level)) { return; } - ptr = addr; if (length > 0x100) { nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); length = 0x100; } - nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); - - memset(ascii_buf, 0, 20); - memset(hex_buf, 0, 80); - - num_ascii = 0; - num_hex = 0; - for (num_char = 0; num_char < length; num_char++) { - if (num_ascii == 8) { - ascii_buf[num_ascii++] = ' '; - hex_buf[num_hex++] = '-'; - hex_buf[num_hex++] = ' '; - } - - if (*ptr < 0x20 || *ptr > 0x7e) - ascii_buf[num_ascii++] = '.'; - else - ascii_buf[num_ascii++] = *ptr; - hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)]; - hex_buf[num_hex++] = xlate[*ptr & 0x0f]; - hex_buf[num_hex++] = ' '; - ptr++; - - if (num_ascii >= 17) { - /* output line and reset */ - nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); - memset(ascii_buf, 0, 20); - memset(hex_buf, 0, 80); - num_ascii = 0; - num_hex = 0; - } - } + nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length); - /* output the rest */ - if (num_ascii) { - while (num_ascii < 17) { - if (num_ascii == 8) { - hex_buf[num_hex++] = ' '; - hex_buf[num_hex++] = ' '; - } - hex_buf[num_hex++] = ' '; - hex_buf[num_hex++] = ' '; - hex_buf[num_hex++] = ' '; - num_ascii++; - } - - nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); - } + print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true); } -- cgit v1.2.3-59-g8ed1b From ee71b9681df6b71b7fa110d42200bec05dfaf19a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 7 Dec 2015 23:04:43 +0800 Subject: IB/mlx4: Use list_for_each_entry_safe Simplify the code in search_relocate_mgid0_group with by using list_for_each_entry_safe(). Signed-off-by: Geliang Tang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 99451d887266..ebdca2b280ab 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -747,14 +747,11 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx __be64 tid, union ib_gid *new_mgid) { - struct mcast_group *group = NULL, *cur_group; + struct mcast_group *group = NULL, *cur_group, *n; struct mcast_req *req; - struct list_head *pos; - struct list_head *n; mutex_lock(&ctx->mcg_table_lock); - list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { - group = list_entry(pos, struct mcast_group, mgid0_list); + list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) { mutex_lock(&group->lock); if (group->last_req_tid == tid) { if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { -- cgit v1.2.3-59-g8ed1b From 6cbac1e4cd0e0110b4be38c201fc055249dfd365 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 14 Apr 2016 16:52:10 +0300 Subject: IB/mlx5: Allow mapping the free running counter on PROT_EXEC The current mlx5 code disallows mapping the free running counter of mlx5 based hardwares when PROT_EXEC is set. Although this behaviour is correct, Linux does add an implicit VM_EXEC to the vm_flags if the READ_IMPLIES_EXEC bit is set in the process personality. This happens for example if the process stack is executable. This causes libmlx5 to output a warning and prevents the user from reading the free running clock. Executing the init segment of the hardware isn't a security risk (at least no more than executing a process own stack), so we just prevent writes to there. Fixes: d69e3bcf7976 ('IB/mlx5: Mmap the HCA's core clock register to user-space') Signed-off-by: Matan Barak Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6ad0489cb3c5..f2a1ad9b1a7a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1108,7 +1108,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; - if (vma->vm_flags & (VM_WRITE | VM_EXEC)) + if (vma->vm_flags & VM_WRITE) return -EPERM; /* Don't expose to user-space information it shouldn't have */ -- cgit v1.2.3-59-g8ed1b From 37aa5c36aa70c9fc5f633b89cce990f04aaa3cd4 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Wed, 27 Apr 2016 16:49:50 +0300 Subject: IB/mlx5: Add UARs write-combining and non-cached mapping By this patch, the user space library will be able to improve performance using appropriate ringing DoorBell method according to the memory type it asked for. Currently only one mapping command is allowed for UARs: MLX5_IB_MMAP_REGULAR_PAGE. Using this mapping, the kernel maps the UARs to write-combining (WC) if the system supports it. If the system is not supporting WC the UARs are mapped to non-cached(NC). In this case the user space library can't tell which mapping is applied. This patch adds 2 new mapping commands: MLX5_IB_MMAP_WC_PAGE and MLX5_IB_MMAP_NC_PAGE. For these commands the kernel maps exactly as requested and fails if it can't. Since there is no generic way to check if the requested memory region can be mapped as WC, driver enables conclusive WC mapping only for x86, PowerPC and ARM which support WC for the device's memory region. Signed-off-by: Guy Levy Signed-off-by: Moshe Lazer Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 96 ++++++++++++++++++++++++++++-------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + 2 files changed, 77 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f2a1ad9b1a7a..7456e44efbaa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -38,6 +38,9 @@ #include #include #include +#if defined(CONFIG_X86) +#include +#endif #include #include #include @@ -1068,38 +1071,89 @@ static int get_index(unsigned long offset) return get_arg(offset); } +static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) +{ + switch (cmd) { + case MLX5_IB_MMAP_WC_PAGE: + return "WC"; + case MLX5_IB_MMAP_REGULAR_PAGE: + return "best effort WC"; + case MLX5_IB_MMAP_NC_PAGE: + return "NC"; + default: + return NULL; + } +} + +static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, + struct vm_area_struct *vma, struct mlx5_uuar_info *uuari) +{ + int err; + unsigned long idx; + phys_addr_t pfn, pa; + pgprot_t prot; + + switch (cmd) { + case MLX5_IB_MMAP_WC_PAGE: +/* Some architectures don't support WC memory */ +#if defined(CONFIG_X86) + if (!pat_enabled()) + return -EPERM; +#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) + return -EPERM; +#endif + /* fall through */ + case MLX5_IB_MMAP_REGULAR_PAGE: + /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ + prot = pgprot_writecombine(vma->vm_page_prot); + break; + case MLX5_IB_MMAP_NC_PAGE: + prot = pgprot_noncached(vma->vm_page_prot); + break; + default: + return -EINVAL; + } + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + idx = get_index(vma->vm_pgoff); + if (idx >= uuari->num_uars) + return -EINVAL; + + pfn = uar_index2pfn(dev, uuari->uars[idx].index); + mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); + + vma->vm_page_prot = prot; + err = io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot); + if (err) { + mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", + err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); + return -EAGAIN; + } + + pa = pfn << PAGE_SHIFT; + mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), + vma->vm_start, &pa); + + return 0; +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_uuar_info *uuari = &context->uuari; unsigned long command; - unsigned long idx; phys_addr_t pfn; command = get_command(vma->vm_pgoff); switch (command) { + case MLX5_IB_MMAP_WC_PAGE: + case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_REGULAR_PAGE: - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; - - idx = get_index(vma->vm_pgoff); - if (idx >= uuari->num_uars) - return -EINVAL; - - pfn = uar_index2pfn(dev, uuari->uars[idx].index); - mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, - (unsigned long long)pfn); - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - break; + return uar_mmap(dev, command, vma, uuari); case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: return -ENOSYS; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b46c25542a7c..6e81217b7c67 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -70,6 +70,8 @@ enum { enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_REGULAR_PAGE = 0, MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, + MLX5_IB_MMAP_WC_PAGE = 2, + MLX5_IB_MMAP_NC_PAGE = 3, /* 5 is chosen in order to be compatible with old versions of libmlx5 */ MLX5_IB_MMAP_CORE_CLOCK = 5, }; -- cgit v1.2.3-59-g8ed1b From fb997816fdf3475fd9bd9a76b6b77aba03864835 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 1 May 2016 14:22:21 +0200 Subject: i40iw: constify i40iw_vf_cqp_ops structure The i40iw_vf_cqp_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_type.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_vf.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 937b7ee7d69b..16cc61720b53 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -479,7 +479,7 @@ struct i40iw_sc_dev { struct i40iw_virt_mem ieq_mem; struct i40iw_puda_rsrc *ieq; - struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; + const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops; struct i40iw_hmc_fpm_misc hmc_fpm_misc; u16 qs_handle; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c index cb0f18340e14..e33d4810965c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.c +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c @@ -80,6 +80,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, return 0; } -struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { +const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = { i40iw_manage_vf_pble_bp }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h index f649f3a62e13..4359559ece9c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_vf.h +++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h @@ -57,6 +57,6 @@ enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp, u64 scratch, bool post_sq); -extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops; +extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops; #endif -- cgit v1.2.3-59-g8ed1b From d42ed55aaf0e9040b063189b72928279848c0a1b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Mar 2016 11:31:26 +0200 Subject: i40iw: Remove unnecessary synchronize_irq() before free_irq() Calling synchronize_irq() right before free_irq() is quite useless. On one hand the IRQ can easily fire again before free_irq() is entered, on the other hand free_irq() itself calls synchronize_irq() internally (in a race condition free way), before any state associated with the IRQ is freed. Patch was generated using the following semantic patch: // @@ expression irq; @@ -synchronize_irq(irq); free_irq(irq, ...); // Signed-off-by: Lars-Peter Clausen Reviewed-by: Leon Romanovsky Acked-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 72a10a19880a..53d4fd3ba1d0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -270,7 +270,6 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev, i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0); else i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0); - synchronize_irq(msix_vec->irq); free_irq(msix_vec->irq, dev_id); } -- cgit v1.2.3-59-g8ed1b From 73fbb4db6cf3682f12b68a64040897fe226fd409 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Mar 2016 12:14:46 +0100 Subject: i40iw: pass hw_stats by reference rather than by value passing hw_stats by value requires a 280 byte copy so instead pass it by reference is much more efficient. Signed-off-by: Colin Ian King Acked-by: Chien Tin Tung Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 4e1d7c665dc5..3041003c94d2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -254,7 +254,7 @@ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, - struct i40iw_dev_hw_stats hw_stats) + struct i40iw_dev_hw_stats *hw_stats) { enum i40iw_status_code ret_code; u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1]; @@ -264,7 +264,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx; vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer); vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS; - *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats; + *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats; ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer)); if (ret_code) i40iw_debug(dev, I40IW_DEBUG_VIRT, @@ -539,7 +539,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats); spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags); vf_dev->msg_count--; - vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats); + vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats); break; default: i40iw_debug(dev, I40IW_DEBUG_VIRT, -- cgit v1.2.3-59-g8ed1b From 358e42ea66e26d30a7a3e2c967c78f01ec31fe4f Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 17 Apr 2016 17:19:37 +0300 Subject: IB/mlx5: Add Scatter FCS support for Raw Packet QP Enable Scatter FCS in the RQ context when the user passes Scatter FCS create flag. Signed-off-by: Majd Dibbiny Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b46c25542a7c..fb0a110b66c0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -356,6 +356,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, + MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8dee8bc1e0fe..504117657d41 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1028,6 +1028,7 @@ static int get_rq_pas_size(void *qpc) static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin) { + struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; __be64 *qp_pas; void *in; @@ -1051,6 +1052,9 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); + if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS) + MLX5_SET(rqc, rqc, scatter_fcs, 1); + wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, end_padding_mode, @@ -1136,11 +1140,12 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } if (qp->rq.wqe_cnt) { + rq->base.container_mibqp = qp; + err = create_raw_packet_qp_rq(dev, rq, in); if (err) goto err_destroy_sq; - rq->base.container_mibqp = qp; err = create_raw_packet_qp_tir(dev, rq, tdn); if (err) @@ -1252,6 +1257,19 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EOPNOTSUPP; } + if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs"); + return -EOPNOTSUPP; + } + if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) || + !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { + mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS; + } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; -- cgit v1.2.3-59-g8ed1b From cff5a0f3a3cda0d852425093f92acca169eb5aea Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 17 Apr 2016 17:19:38 +0300 Subject: IB/mlx5: Report Scatter FCS device capability when supported Report Scatter FCS support when the Firmware supports as well. Signed-off-by: Majd Dibbiny Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6ad0489cb3c5..dbb3d66e62a1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -517,6 +517,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_TSO; } + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, scatter_fcs)) + props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; -- cgit v1.2.3-59-g8ed1b From 04ef0f1a0169a14b8e653af1178524ab4390133f Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Wed, 18 May 2016 06:18:10 -0400 Subject: IB/mlx4: Fix unaligned access in send_reply_to_slave The problem is that the function 'send_reply_to_slave' gets the 'req_sa_mad' as a pointer whose address is only aliged to 4 bytes but is 8 bytes in size. This can result in unaligned access faults on certain architectures. Sowmini Varadhan pointed to this reply from Dave Miller that say that memcpy should not be used to solve alignment issues: https://lkml.org/lkml/2015/10/21/352 Optimization of memcpy to 'ldx' instruction can only happen if the compiler knows that the size of the data we are copying is 8 bytes and it assumes it is aligned to 8 bytes. If the compiler know the type is not aligned to 8 it must not optimize the 8 byte copy. Defining the data type as aligned to 4 forces the compiler to treat all accesses as though they aren't aligned and avoids the 'ldx' optimization. Full credit for the idea goes to Jason Gunthorpe . Signed-off-by: Shamir Rabinovitch Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index ebdca2b280ab..8f7ad07915b0 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -96,7 +96,7 @@ struct ib_sa_mcmember_data { u8 scope_join_state; u8 proxy_join; u8 reserved[2]; -}; +} __packed __aligned(4); struct mcast_group { struct ib_sa_mcmember_data rec; -- cgit v1.2.3-59-g8ed1b From c16d2750a08c8ccaf98d65f287a8aec91bb9610d Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 17 Apr 2016 17:08:41 +0300 Subject: IB/mlx5: Fire the CQ completion handler from tasklet Previously, mlx5_ib_cq_comp was executed from interrupt context. Under heavy load, this could cause the CPU core to be in an interrupt context too long. Instead of executing the handler from the interrupt context we execute it from a much friendly tasklet context. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a00ba4418de9..dabcc65bd65e 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -879,7 +879,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); cq->mcq.irqn = irqn; - cq->mcq.comp = mlx5_ib_cq_comp; + if (context) + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; + else + cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; INIT_LIST_HEAD(&cq->wc_list); -- cgit v1.2.3-59-g8ed1b