1 files changed, 127 insertions, 117 deletions
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 7065cb310553..93f6e5bf0ec5 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -130,7 +130,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + 2 * sizeof *res;
 
-	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb) {
 		ret = -ENOMEM;
 		goto err7;
@@ -162,7 +162,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
 		V_FW_RI_RES_WR_DCAEN(0) |
 		V_FW_RI_RES_WR_DCACPU(0) |
-		V_FW_RI_RES_WR_FBMIN(3) |
+		V_FW_RI_RES_WR_FBMIN(2) |
 		V_FW_RI_RES_WR_FBMAX(3) |
 		V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
 		V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -185,7 +185,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
 		V_FW_RI_RES_WR_DCAEN(0) |
 		V_FW_RI_RES_WR_DCACPU(0) |
-		V_FW_RI_RES_WR_FBMIN(3) |
+		V_FW_RI_RES_WR_FBMIN(2) |
 		V_FW_RI_RES_WR_FBMAX(3) |
 		V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
 		V_FW_RI_RES_WR_CIDXFTHRESH(0) |
@@ -235,12 +235,78 @@ err1:
 	return -ENOMEM;
 }
 
-static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
+		      struct ib_send_wr *wr, int max, u32 *plenp)
 {
+	u8 *dstp, *srcp;
+	u32 plen = 0;
 	int i;
+	int rem, len;
+
+	dstp = (u8 *)immdp->data;
+	for (i = 0; i < wr->num_sge; i++) {
+		if ((plen + wr->sg_list[i].length) > max)
+			return -EMSGSIZE;
+		srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
+		plen += wr->sg_list[i].length;
+		rem = wr->sg_list[i].length;
+		while (rem) {
+			if (dstp == (u8 *)&sq->queue[sq->size])
+				dstp = (u8 *)sq->queue;
+			if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
+				len = rem;
+			else
+				len = (u8 *)&sq->queue[sq->size] - dstp;
+			memcpy(dstp, srcp, len);
+			dstp += len;
+			srcp += len;
+			rem -= len;
+		}
+	}
+	immdp->op = FW_RI_DATA_IMMD;
+	immdp->r1 = 0;
+	immdp->r2 = 0;
+	immdp->immdlen = cpu_to_be32(plen);
+	*plenp = plen;
+	return 0;
+}
+
+static int build_isgl(__be64 *queue_start, __be64 *queue_end,
+		      struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
+		      int num_sge, u32 *plenp)
+
+{
+	int i;
+	u32 plen = 0;
+	__be64 *flitp = (__be64 *)isglp->sge;
+
+	for (i = 0; i < num_sge; i++) {
+		if ((plen + sg_list[i].length) < plen)
+			return -EMSGSIZE;
+		plen += sg_list[i].length;
+		*flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
+				     sg_list[i].length);
+		if (++flitp == queue_end)
+			flitp = queue_start;
+		*flitp = cpu_to_be64(sg_list[i].addr);
+		if (++flitp == queue_end)
+			flitp = queue_start;
+	}
+	isglp->op = FW_RI_DATA_ISGL;
+	isglp->r1 = 0;
+	isglp->nsge = cpu_to_be16(num_sge);
+	isglp->r2 = 0;
+	if (plenp)
+		*plenp = plen;
+	return 0;
+}
+
+static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
+			   struct ib_send_wr *wr, u8 *len16)
+{
 	u32 plen;
 	int size;
-	u8 *datap;
+	int ret;
 
 	if (wr->num_sge > T4_MAX_SEND_SGE)
 		return -EINVAL;
@@ -267,43 +333,23 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 	default:
 		return -EINVAL;
 	}
+
 	plen = 0;
 	if (wr->num_sge) {
 		if (wr->send_flags & IB_SEND_INLINE) {
-			datap = (u8 *)wqe->send.u.immd_src[0].data;
-			for (i = 0; i < wr->num_sge; i++) {
-				if ((plen + wr->sg_list[i].length) >
-				    T4_MAX_SEND_INLINE) {
-					return -EMSGSIZE;
-				}
-				plen += wr->sg_list[i].length;
-				memcpy(datap,
-				     (void *)(unsigned long)wr->sg_list[i].addr,
-				     wr->sg_list[i].length);
-				datap += wr->sg_list[i].length;
-			}
-			wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
-			wqe->send.u.immd_src[0].r1 = 0;
-			wqe->send.u.immd_src[0].r2 = 0;
-			wqe->send.u.immd_src[0].immdlen = cpu_to_be32(plen);
+			ret = build_immd(sq, wqe->send.u.immd_src, wr,
+					 T4_MAX_SEND_INLINE, &plen);
+			if (ret)
+				return ret;
 			size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
 			       plen;
 		} else {
-			for (i = 0; i < wr->num_sge; i++) {
-				if ((plen + wr->sg_list[i].length) < plen)
-					return -EMSGSIZE;
-				plen += wr->sg_list[i].length;
-				wqe->send.u.isgl_src[0].sge[i].stag =
-					cpu_to_be32(wr->sg_list[i].lkey);
-				wqe->send.u.isgl_src[0].sge[i].len =
-					cpu_to_be32(wr->sg_list[i].length);
-				wqe->send.u.isgl_src[0].sge[i].to =
-					cpu_to_be64(wr->sg_list[i].addr);
-			}
-			wqe->send.u.isgl_src[0].op = FW_RI_DATA_ISGL;
-			wqe->send.u.isgl_src[0].r1 = 0;
-			wqe->send.u.isgl_src[0].nsge = cpu_to_be16(wr->num_sge);
-			wqe->send.u.isgl_src[0].r2 = 0;
+			ret = build_isgl((__be64 *)sq->queue,
+					 (__be64 *)&sq->queue[sq->size],
+					 wqe->send.u.isgl_src,
+					 wr->sg_list, wr->num_sge, &plen);
+			if (ret)
+				return ret;
 			size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
 			       wr->num_sge * sizeof(struct fw_ri_sge);
 		}
@@ -313,62 +359,40 @@ static int build_rdma_send(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 		wqe->send.u.immd_src[0].r2 = 0;
 		wqe->send.u.immd_src[0].immdlen = 0;
 		size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+		plen = 0;
 	}
 	*len16 = DIV_ROUND_UP(size, 16);
 	wqe->send.plen = cpu_to_be32(plen);
 	return 0;
 }
 
-static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
+			    struct ib_send_wr *wr, u8 *len16)
 {
-	int i;
 	u32 plen;
 	int size;
-	u8 *datap;
+	int ret;
 
-	if (wr->num_sge > T4_MAX_WRITE_SGE)
+	if (wr->num_sge > T4_MAX_SEND_SGE)
 		return -EINVAL;
 	wqe->write.r2 = 0;
 	wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
 	wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
-	plen = 0;
 	if (wr->num_sge) {
 		if (wr->send_flags & IB_SEND_INLINE) {
-			datap = (u8 *)wqe->write.u.immd_src[0].data;
-			for (i = 0; i < wr->num_sge; i++) {
-				if ((plen + wr->sg_list[i].length) >
-				    T4_MAX_WRITE_INLINE) {
-					return -EMSGSIZE;
-				}
-				plen += wr->sg_list[i].length;
-				memcpy(datap,
-				     (void *)(unsigned long)wr->sg_list[i].addr,
-				     wr->sg_list[i].length);
-				datap += wr->sg_list[i].length;
-			}
-			wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
-			wqe->write.u.immd_src[0].r1 = 0;
-			wqe->write.u.immd_src[0].r2 = 0;
-			wqe->write.u.immd_src[0].immdlen = cpu_to_be32(plen);
+			ret = build_immd(sq, wqe->write.u.immd_src, wr,
+					 T4_MAX_WRITE_INLINE, &plen);
+			if (ret)
+				return ret;
 			size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
 			       plen;
 		} else {
-			for (i = 0; i < wr->num_sge; i++) {
-				if ((plen + wr->sg_list[i].length) < plen)
-					return -EMSGSIZE;
-				plen += wr->sg_list[i].length;
-				wqe->write.u.isgl_src[0].sge[i].stag =
-					cpu_to_be32(wr->sg_list[i].lkey);
-				wqe->write.u.isgl_src[0].sge[i].len =
-					cpu_to_be32(wr->sg_list[i].length);
-				wqe->write.u.isgl_src[0].sge[i].to =
-					cpu_to_be64(wr->sg_list[i].addr);
-			}
-			wqe->write.u.isgl_src[0].op = FW_RI_DATA_ISGL;
-			wqe->write.u.isgl_src[0].r1 = 0;
-			wqe->write.u.isgl_src[0].nsge =
-						       cpu_to_be16(wr->num_sge);
-			wqe->write.u.isgl_src[0].r2 = 0;
+			ret = build_isgl((__be64 *)sq->queue,
+					 (__be64 *)&sq->queue[sq->size],
+					 wqe->write.u.isgl_src,
+					 wr->sg_list, wr->num_sge, &plen);
+			if (ret)
+				return ret;
 			size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
 			       wr->num_sge * sizeof(struct fw_ri_sge);
 		}
@@ -378,6 +402,7 @@ static int build_rdma_write(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 		wqe->write.u.immd_src[0].r2 = 0;
 		wqe->write.u.immd_src[0].immdlen = 0;
 		size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+		plen = 0;
 	}
 	*len16 = DIV_ROUND_UP(size, 16);
 	wqe->write.plen = cpu_to_be32(plen);
@@ -416,29 +441,13 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 			   struct ib_recv_wr *wr, u8 *len16)
 {
-	int i;
-	int plen = 0;
+	int ret;
 
-	for (i = 0; i < wr->num_sge; i++) {
-		if ((plen + wr->sg_list[i].length) < plen)
-			return -EMSGSIZE;
-		plen += wr->sg_list[i].length;
-		wqe->recv.isgl.sge[i].stag =
-			cpu_to_be32(wr->sg_list[i].lkey);
-		wqe->recv.isgl.sge[i].len =
-			cpu_to_be32(wr->sg_list[i].length);
-		wqe->recv.isgl.sge[i].to =
-			cpu_to_be64(wr->sg_list[i].addr);
-	}
-	for (; i < T4_MAX_RECV_SGE; i++) {
-		wqe->recv.isgl.sge[i].stag = 0;
-		wqe->recv.isgl.sge[i].len = 0;
-		wqe->recv.isgl.sge[i].to = 0;
-	}
-	wqe->recv.isgl.op = FW_RI_DATA_ISGL;
-	wqe->recv.isgl.r1 = 0;
-	wqe->recv.isgl.nsge = cpu_to_be16(wr->num_sge);
-	wqe->recv.isgl.r2 = 0;
+	ret = build_isgl((__be64 *)qhp->wq.rq.queue,
+			 (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
+			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+	if (ret)
+		return ret;
 	*len16 = DIV_ROUND_UP(sizeof wqe->recv +
 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
 	return 0;
@@ -547,7 +556,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			*bad_wr = wr;
 			break;
 		}
-		wqe = &qhp->wq.sq.queue[qhp->wq.sq.pidx];
+		wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
+		      qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+
 		fw_flags = 0;
 		if (wr->send_flags & IB_SEND_SOLICITED)
 			fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
@@ -564,12 +575,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				swsqe->opcode = FW_RI_SEND;
 			else
 				swsqe->opcode = FW_RI_SEND_WITH_INV;
-			err = build_rdma_send(wqe, wr, &len16);
+			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
 			break;
 		case IB_WR_RDMA_WRITE:
 			fw_opcode = FW_RI_RDMA_WRITE_WR;
 			swsqe->opcode = FW_RI_RDMA_WRITE;
-			err = build_rdma_write(wqe, wr, &len16);
+			err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
 			break;
 		case IB_WR_RDMA_READ:
 		case IB_WR_RDMA_READ_WITH_INV:
@@ -619,8 +630,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		     swsqe->opcode, swsqe->read_len);
 		wr = wr->next;
 		num_wrs--;
-		t4_sq_produce(&qhp->wq);
-		idx++;
+		t4_sq_produce(&qhp->wq, len16);
+		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 	}
 	if (t4_wq_db_enabled(&qhp->wq))
 		t4_ring_sq_db(&qhp->wq, idx);
@@ -656,7 +667,9 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			*bad_wr = wr;
 			break;
 		}
-		wqe = &qhp->wq.rq.queue[qhp->wq.rq.pidx];
+		wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
+					   qhp->wq.rq.wq_pidx *
+					   T4_EQ_ENTRY_SIZE);
 		if (num_wrs)
 			err = build_rdma_recv(qhp, wqe, wr, &len16);
 		else
@@ -675,15 +688,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		wqe->recv.r2[1] = 0;
 		wqe->recv.r2[2] = 0;
 		wqe->recv.len16 = len16;
-		if (len16 < 5)
-			wqe->flits[8] = 0;
-
 		PDBG("%s cookie 0x%llx pidx %u\n", __func__,
 		     (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
-		t4_rq_produce(&qhp->wq);
+		t4_rq_produce(&qhp->wq, len16);
+		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 		wr = wr->next;
 		num_wrs--;
-		idx++;
 	}
 	if (t4_wq_db_enabled(&qhp->wq))
 		t4_ring_rq_db(&qhp->wq, idx);
@@ -905,7 +915,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 	atomic_inc(&qhp->refcnt);
 	spin_unlock_irqrestore(&qhp->lock, *flag);
 
-	/* locking heirarchy: cq lock first, then qp lock. */
+	/* locking hierarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&rchp->lock, *flag);
 	spin_lock(&qhp->lock);
 	c4iw_flush_hw_cq(&rchp->cq);
@@ -916,7 +926,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 	if (flushed)
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
 
-	/* locking heirarchy: cq lock first, then qp lock. */
+	/* locking hierarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&schp->lock, *flag);
 	spin_lock(&qhp->lock);
 	c4iw_flush_hw_cq(&schp->cq);
@@ -951,7 +961,8 @@ static void flush_qp(struct c4iw_qp *qhp, unsigned long *flag)
 	__flush_qp(qhp, rchp, schp, flag);
 }
 
-static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
+static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+		     struct c4iw_ep *ep)
 {
 	struct fw_ri_wr *wqe;
 	int ret;
@@ -959,12 +970,12 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	struct sk_buff *skb;
 
 	PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
-	     qhp->ep->hwtid);
+	     ep->hwtid);
 
-	skb = alloc_skb(sizeof *wqe, GFP_KERNEL | __GFP_NOFAIL);
+	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
-	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 
 	wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof *wqe);
@@ -972,7 +983,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 		FW_WR_OP(FW_RI_INIT_WR) |
 		FW_WR_COMPL(1));
 	wqe->flowid_len16 = cpu_to_be32(
-		FW_WR_FLOWID(qhp->ep->hwtid) |
+		FW_WR_FLOWID(ep->hwtid) |
 		FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
 	wqe->cookie = (u64)&wr_wait;
 
@@ -1035,7 +1046,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
 	     qhp->ep->hwtid);
 
-	skb = alloc_skb(sizeof *wqe, GFP_KERNEL | __GFP_NOFAIL);
+	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
@@ -1202,17 +1213,16 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 		case C4IW_QP_STATE_CLOSING:
 			BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
 			qhp->attr.state = C4IW_QP_STATE_CLOSING;
+			ep = qhp->ep;
 			if (!internal) {
 				abort = 0;
 				disconnect = 1;
-				ep = qhp->ep;
 				c4iw_get_ep(&ep->com);
 			}
 			spin_unlock_irqrestore(&qhp->lock, flag);
-			ret = rdma_fini(rhp, qhp);
+			ret = rdma_fini(rhp, qhp, ep);
 			spin_lock_irqsave(&qhp->lock, flag);
 			if (ret) {
-				ep = qhp->ep;
 				c4iw_get_ep(&ep->com);
 				disconnect = abort = 1;
 				goto err;