aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw/rxe/rxe_req.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_req.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c224
1 files changed, 142 insertions, 82 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0c9d2af15f3d..f63771207970 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -15,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- unsigned int mask, int npsn)
+ struct rxe_send_wqe *wqe, int npsn)
{
int i;
@@ -33,8 +32,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp,
} else {
advance_dma_data(&wqe->dma, to_send);
}
- if (mask & WR_WRITE_MASK)
- wqe->iova += qp->mtu;
}
}
@@ -85,7 +82,7 @@ static void req_retry(struct rxe_qp *qp)
if (mask & WR_WRITE_OR_SEND_MASK) {
npsn = (qp->comp.psn - wqe->first_psn) &
BTH_PSN_MASK;
- retry_first_write_send(qp, wqe, mask, npsn);
+ retry_first_write_send(qp, wqe, npsn);
}
if (mask & WR_READ_MASK) {
@@ -103,14 +100,17 @@ void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
- pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
+ pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+
+ /* request a send queue retry */
+ qp->req.need_retry = 1;
+ qp->req.wait_for_rnr_timer = 0;
rxe_run_task(&qp->req.task, 1);
}
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
- unsigned long flags;
struct rxe_queue *q = qp->sq.queue;
unsigned int index = qp->req.wqe_index;
unsigned int cons;
@@ -124,25 +124,23 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
/* check to see if we are drained;
* state_lock used by requester and completer
*/
- spin_lock_irqsave(&qp->state_lock, flags);
+ spin_lock_bh(&qp->state_lock);
do {
if (qp->req.state != QP_STATE_DRAIN) {
/* comp just finished */
- spin_unlock_irqrestore(&qp->state_lock,
- flags);
+ spin_unlock_bh(&qp->state_lock);
break;
}
if (wqe && ((index != cons) ||
(wqe->state != wqe_state_posted))) {
/* comp not done yet */
- spin_unlock_irqrestore(&qp->state_lock,
- flags);
+ spin_unlock_bh(&qp->state_lock);
break;
}
qp->req.state = QP_STATE_DRAINED;
- spin_unlock_irqrestore(&qp->state_lock, flags);
+ spin_unlock_bh(&qp->state_lock);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -166,16 +164,36 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
(wqe->state != wqe_state_processing)))
return NULL;
- if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
- (index != cons))) {
- qp->req.wait_fence = 1;
- return NULL;
- }
-
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
}
+/**
+ * rxe_wqe_is_fenced - check if next wqe is fenced
+ * @qp: the queue pair
+ * @wqe: the next wqe
+ *
+ * Returns: 1 if wqe needs to wait
+ * 0 if wqe is ready to go
+ */
+static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ /* Local invalidate fence (LIF) see IBA 10.6.5.1
+ * Requires ALL previous operations on the send queue
+ * are complete. Make mandatory for the rxe driver.
+ */
+ if (wqe->wr.opcode == IB_WR_LOCAL_INV)
+ return qp->req.wqe_index != queue_get_consumer(qp->sq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
+
+ /* Fence see IBA 10.8.3.3
+ * Requires that all previous read and atomic operations
+ * are complete.
+ */
+ return (wqe->wr.send_flags & IB_SEND_FENCE) &&
+ atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic;
+}
+
static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
{
switch (opcode) {
@@ -311,7 +329,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
case IB_QPT_UC:
return next_opcode_uc(qp, opcode, fits);
- case IB_QPT_SMI:
case IB_QPT_UD:
case IB_QPT_GSI:
switch (opcode) {
@@ -361,38 +378,25 @@ static inline int get_mtu(struct rxe_qp *qp)
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
+ struct rxe_av *av,
struct rxe_send_wqe *wqe,
- int opcode, int payload,
+ int opcode, u32 payload,
struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
struct rxe_send_wr *ibwr = &wqe->wr;
- struct rxe_av *av;
int pad = (-payload) & 0x3;
int paylen;
int solicited;
- u16 pkey;
u32 qp_num;
int ack_req;
/* length from start of bth to end of icrc */
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
-
- /* pkt->hdr, port_num and mask are initialized in ifc layer */
- pkt->rxe = rxe;
- pkt->opcode = opcode;
- pkt->qp = qp;
- pkt->psn = qp->req.psn;
- pkt->mask = rxe_opcode[opcode].mask;
- pkt->paylen = paylen;
- pkt->wqe = wqe;
+ pkt->paylen = paylen;
/* init skb */
- av = rxe_get_av(pkt);
- if (!av)
- return NULL;
-
skb = rxe_init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
@@ -404,8 +408,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
(RXE_WRITE_MASK | RXE_IMMDT_MASK));
- pkey = IB_DEFAULT_PKEY_FULL;
-
qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
qp->attr.dest_qp_num;
@@ -414,7 +416,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (ack_req)
qp->req.noack_pkts = 0;
- bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
+ bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num,
ack_req, pkt->psn);
/* init optional headers */
@@ -432,8 +434,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_ATMETH_MASK) {
atmeth_set_va(pkt, wqe->iova);
- if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
- opcode == IB_OPCODE_RD_COMPARE_SWAP) {
+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
} else {
@@ -453,13 +454,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
return skb;
}
-static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, struct sk_buff *skb,
- int paylen)
+static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
+ struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 payload)
{
int err;
- err = rxe_prepare(pkt, skb);
+ err = rxe_prepare(av, pkt, skb);
if (err)
return err;
@@ -467,19 +468,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
- memcpy(payload_addr(pkt), tmp, paylen);
+ memcpy(payload_addr(pkt), tmp, payload);
- wqe->dma.resid -= paylen;
- wqe->dma.sge_offset += paylen;
+ wqe->dma.resid -= payload;
+ wqe->dma.sge_offset += payload;
} else {
err = copy_data(qp->pd, 0, &wqe->dma,
- payload_addr(pkt), paylen,
+ payload_addr(pkt), payload,
RXE_FROM_MR_OBJ);
if (err)
return err;
}
if (bth_pad(pkt)) {
- u8 *pad = payload_addr(pkt) + paylen;
+ u8 *pad = payload_addr(pkt) + payload;
memset(pad, 0, bth_pad(pkt));
}
@@ -503,7 +504,7 @@ static void update_wqe_state(struct rxe_qp *qp,
static void update_wqe_psn(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt,
- int payload)
+ u32 payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -545,8 +546,7 @@ static void rollback_state(struct rxe_send_wqe *wqe,
qp->req.psn = rollback_psn;
}
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, int payload)
+static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@@ -604,9 +604,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->status = IB_WC_SUCCESS;
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
- rxe_run_task(&qp->comp.task, 1);
+ /* There is no ack coming for local work requests
+ * which can lead to a deadlock. So go ahead and complete
+ * it now.
+ */
+ rxe_run_task(&qp->comp.task, 1);
return 0;
}
@@ -614,24 +616,39 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
enum rxe_hdr_mask mask;
- int payload;
+ u32 payload;
int mtu;
int opcode;
+ int err;
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
struct rxe_queue *q = qp->sq.queue;
+ struct rxe_ah *ah;
+ struct rxe_av *av;
- rxe_add_ref(qp);
+ if (!rxe_get(qp))
+ return -EAGAIN;
-next_wqe:
- if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
+ if (unlikely(!qp->valid))
goto exit;
+ if (unlikely(qp->req.state == QP_STATE_ERROR)) {
+ wqe = req_next_wqe(qp);
+ if (wqe)
+ /*
+ * Generate an error completion for error qp state
+ */
+ goto err;
+ else
+ goto exit;
+ }
+
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = queue_get_consumer(q,
QUEUE_TYPE_FROM_CLIENT);
@@ -639,10 +656,17 @@ next_wqe:
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
+ qp->req.wait_for_rnr_timer = 0;
goto exit;
}
- if (unlikely(qp->req.need_retry)) {
+ /* we come here if the retransmit timer has fired
+ * or if the rnr timer has fired. If the retransmit
+ * timer fires while we are processing an RNR NAK wait
+ * until the rnr timer has fired before starting the
+ * retry flow
+ */
+ if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) {
req_retry(qp);
qp->req.need_retry = 0;
}
@@ -651,12 +675,17 @@ next_wqe:
if (unlikely(!wqe))
goto exit;
+ if (rxe_wqe_is_fenced(qp, wqe)) {
+ qp->req.wait_fence = 1;
+ goto exit;
+ }
+
if (wqe->mask & WR_LOCAL_OP_MASK) {
- ret = rxe_do_local_ops(qp, wqe);
- if (unlikely(ret))
+ err = rxe_do_local_ops(qp, wqe);
+ if (unlikely(err))
goto err;
else
- goto next_wqe;
+ goto done;
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
@@ -676,7 +705,7 @@ next_wqe:
opcode = next_opcode(qp, wqe, wqe->wr.opcode);
if (unlikely(opcode < 0)) {
wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto exit;
+ goto err;
}
mask = rxe_opcode[opcode].mask;
@@ -704,31 +733,51 @@ next_wqe:
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- __rxe_do_task(&qp->comp.task);
- rxe_drop_ref(qp);
- return 0;
+ rxe_run_task(&qp->comp.task, 0);
+ goto done;
}
payload = mtu;
}
- skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
+ pkt.rxe = rxe;
+ pkt.opcode = opcode;
+ pkt.qp = qp;
+ pkt.psn = qp->req.psn;
+ pkt.mask = rxe_opcode[opcode].mask;
+ pkt.wqe = wqe;
+
+ av = rxe_get_av(&pkt, &ah);
+ if (unlikely(!av)) {
+ pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+ }
+
+ skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
wqe->status = IB_WC_LOC_QP_OP_ERR;
+ if (ah)
+ rxe_put(ah);
goto err;
}
- ret = finish_packet(qp, wqe, &pkt, skb, payload);
- if (unlikely(ret)) {
+ err = finish_packet(qp, av, wqe, &pkt, skb, payload);
+ if (unlikely(err)) {
pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
- if (ret == -EFAULT)
+ if (err == -EFAULT)
wqe->status = IB_WC_LOC_PROT_ERR;
else
wqe->status = IB_WC_LOC_QP_OP_ERR;
kfree_skb(skb);
+ if (ah)
+ rxe_put(ah);
goto err;
}
+ if (ah)
+ rxe_put(ah);
+
/*
* To prevent a race on wqe access between requester and completer,
* wqe members state and psn need to be set before calling
@@ -738,13 +787,14 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(qp, &pkt, skb);
- if (ret) {
+
+ err = rxe_xmit_packet(qp, &pkt, skb);
+ if (err) {
qp->need_req_skb = 1;
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
- if (ret == -EAGAIN) {
+ if (err == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
goto exit;
}
@@ -753,15 +803,25 @@ next_wqe:
goto err;
}
- update_state(qp, wqe, &pkt, payload);
-
- goto next_wqe;
+ update_state(qp, &pkt);
+ /* A non-zero return value will cause rxe_do_task to
+ * exit its loop and end the tasklet. A zero return
+ * will continue looping and return to rxe_requester
+ */
+done:
+ ret = 0;
+ goto out;
err:
+ /* update wqe_index for each wqe completion */
+ qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
- __rxe_do_task(&qp->comp.task);
-
+ qp->req.state = QP_STATE_ERROR;
+ rxe_run_task(&qp->comp.task, 0);
exit:
- rxe_drop_ref(qp);
- return -EAGAIN;
+ ret = -EAGAIN;
+out:
+ rxe_put(qp);
+
+ return ret;
}