aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c6
-rw-r--r--drivers/infiniband/sw/rxe/Makefile1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c36
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c32
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c130
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c343
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c45
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c21
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h272
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c159
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c157
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c101
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h53
26 files changed, 1143 insertions, 349 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 601d18dda1f5..34b7af6ab9c2 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -101,8 +101,8 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
}
/**
- *rvt_mr_exit: clean up MR
- *@rdi: rvt dev structure
+ * rvt_mr_exit - clean up MR
+ * @rdi: rvt dev structure
*
* called when drivers have unregistered or perhaps failed to register with us
*/
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 9d13db68283c..e9f3d356b361 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -984,7 +984,8 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
spin_unlock_irq(&qp->r_lock);
}
-/** rvt_free_qpn - Free a qpn from the bit map
+/**
+ * rvt_free_qpn - Free a qpn from the bit map
* @qpt: QP table
* @qpn: queue pair number to free
*/
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 12ebe041a5da..ac17209816cd 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -144,7 +144,7 @@ static int rvt_modify_device(struct ib_device *device,
}
/**
- * rvt_query_port: Passes the query port call to the driver
+ * rvt_query_port - Passes the query port call to the driver
* @ibdev: Verbs IB dev
* @port_num: port number, 1 based from ib core
* @props: structure to hold returned properties
@@ -175,7 +175,7 @@ static int rvt_query_port(struct ib_device *ibdev, u32 port_num,
}
/**
- * rvt_modify_port
+ * rvt_modify_port - modify port
* @ibdev: Verbs IB dev
* @port_num: Port number, 1 based from ib core
* @port_modify_mask: How to change the port
@@ -418,7 +418,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
* These functions are not part of verbs specifically but are
* required for rdmavt to function.
*/
- if ((!rdi->ibdev.ops.init_port) ||
+ if ((!rdi->ibdev.ops.port_groups) ||
(!rdi->driver_f.get_pci_dev))
return -EINVAL;
break;
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 66af72dca759..1e24673e9318 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -15,6 +15,7 @@ rdma_rxe-y := \
rxe_qp.o \
rxe_cq.o \
rxe_mr.o \
+ rxe_mw.o \
rxe_opcode.o \
rxe_mmap.o \
rxe_icrc.o \
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 95f0de0c8b49..8e0f9c489cab 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -54,6 +54,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_cq = RXE_MAX_CQ;
rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
rxe->attr.max_mr = RXE_MAX_MR;
+ rxe->attr.max_mw = RXE_MAX_MW;
rxe->attr.max_pd = RXE_MAX_PD;
rxe->attr.max_qp_rd_atom = RXE_MAX_QP_RD_ATOM;
rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index a6712e373eed..58ad9c2644f3 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -103,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_RDMA_READ_WITH_INV: return IB_WC_RDMA_READ;
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
+ case IB_WR_BIND_MW: return IB_WC_BIND_MW;
default:
return 0xff;
@@ -141,7 +142,10 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
/* we come here whether or not we found a response packet to see if
* there are any posted WQEs
*/
- wqe = queue_head(qp->sq.queue);
+ if (qp->is_user)
+ wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
+ else
+ wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
*wqe_p = wqe;
/* no WQE or requester has not started it yet */
@@ -345,7 +349,7 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, payload_addr(pkt),
- payload_size(pkt), to_mr_obj, NULL);
+ payload_size(pkt), RXE_TO_MR_OBJ, NULL);
if (ret) {
wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
@@ -367,7 +371,7 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, &atomic_orig,
- sizeof(u64), to_mr_obj, NULL);
+ sizeof(u64), RXE_TO_MR_OBJ, NULL);
if (ret) {
wqe->status = IB_WC_LOC_PROT_ERR;
return COMPST_ERROR;
@@ -418,16 +422,23 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_cqe cqe;
+ bool post;
+
+ /* do we need to post a completion */
+ post = ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ wqe->status != IB_WC_SUCCESS);
- if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- wqe->status != IB_WC_SUCCESS) {
+ if (post)
make_send_cqe(qp, wqe, &cqe);
- advance_consumer(qp->sq.queue);
+
+ if (qp->is_user)
+ advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
+ else
+ advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);
+
+ if (post)
rxe_cq_post(qp->scq, &cqe, 0);
- } else {
- advance_consumer(qp->sq.queue);
- }
if (wqe->wr.opcode == IB_WR_SEND ||
wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
@@ -515,6 +526,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
{
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
+ struct rxe_queue *q = qp->sq.queue;
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
@@ -522,12 +534,12 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
ib_device_put(qp->ibqp.device);
}
- while ((wqe = queue_head(qp->sq.queue))) {
+ while ((wqe = queue_head(q, q->type))) {
if (notify) {
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
} else {
- advance_consumer(qp->sq.queue);
+ advance_consumer(q, q->type);
}
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index b315ebf041ac..aef288f164fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -25,7 +25,11 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cq) {
- count = queue_count(cq->queue);
+ if (cq->is_user)
+ count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
+
if (cqe < count) {
pr_warn("cqe(%d) < current # elements in queue (%d)",
cqe, count);
@@ -59,9 +63,11 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
struct rxe_create_cq_resp __user *uresp)
{
int err;
+ enum queue_type type;
+ type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL;
cq->queue = rxe_queue_init(rxe, &cqe,
- sizeof(struct rxe_cqe));
+ sizeof(struct rxe_cqe), type);
if (!cq->queue) {
pr_warn("unable to create cq\n");
return -ENOMEM;
@@ -106,10 +112,17 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
unsigned long flags;
+ int full;
+ void *addr;
spin_lock_irqsave(&cq->cq_lock, flags);
- if (unlikely(queue_full(cq->queue))) {
+ if (cq->is_user)
+ full = queue_full(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ full = queue_full(cq->queue, QUEUE_TYPE_KERNEL);
+
+ if (unlikely(full)) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
ev.device = cq->ibcq.device;
@@ -121,9 +134,18 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return -EBUSY;
}
- memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));
+ if (cq->is_user)
+ addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL);
+
+ memcpy(addr, cqe, sizeof(*cqe));
+
+ if (cq->is_user)
+ advance_producer(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ advance_producer(cq->queue, QUEUE_TYPE_KERNEL);
- advance_producer(cq->queue);
spin_unlock_irqrestore(&cq->cq_lock, flags);
if ((cq->notify == IB_CQ_NEXT_COMP) ||
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index f469fd1c753d..d5ceb706d964 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -40,13 +40,10 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev,
return ARRAY_SIZE(rxe_counter_name);
}
-struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
- u32 port_num)
+struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
{
BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS);
- /* We support only per port stats */
- if (!port_num)
- return NULL;
return rdma_alloc_hw_stats_struct(rxe_counter_name,
ARRAY_SIZE(rxe_counter_name),
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index 2f369acb46d7..71f4d4fa9dc8 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -29,8 +29,8 @@ enum rxe_counters {
RXE_NUM_OF_COUNTERS
};
-struct rdma_hw_stats *rxe_ib_alloc_hw_stats(struct ib_device *ibdev,
- u32 port_num);
+struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
int rxe_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index ef8061d2fbe0..1ddb20855dee 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -71,40 +71,32 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, u32 size,
int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
-enum copy_direction {
- to_mr_obj,
- from_mr_obj,
-};
-
+u8 rxe_get_next_key(u32 last_key);
void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr);
-
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
- int access, struct ib_udata *udata, struct rxe_mr *mr);
-
+ int access, struct rxe_mr *mr);
int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr);
-
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
- enum copy_direction dir, u32 *crcp);
-
+ enum rxe_mr_copy_dir dir, u32 *crcp);
int copy_data(struct rxe_pd *pd, int access,
struct rxe_dma_info *dma, void *addr, int length,
- enum copy_direction dir, u32 *crcp);
-
+ enum rxe_mr_copy_dir dir, u32 *crcp);
void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length);
-
-enum lookup_type {
- lookup_local,
- lookup_remote,
-};
-
struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
- enum lookup_type type);
-
+ enum rxe_mr_lookup_type type);
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
-
+int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
+int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_entry *arg);
-int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+/* rxe_mw.c */
+int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
+int rxe_dealloc_mw(struct ib_mw *ibmw);
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
+struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
+void rxe_mw_cleanup(struct rxe_pool_entry *arg);
/* rxe_net.c */
void rxe_loopback(struct sk_buff *skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 9f63947bab12..6aabcb4de235 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -7,19 +7,17 @@
#include "rxe.h"
#include "rxe_loc.h"
-/*
- * lfsr (linear feedback shift register) with period 255
+/* Return a random 8 bit key value that is
+ * different than the last_key. Set last_key to -1
+ * if this is the first key for an MR or MW
*/
-static u8 rxe_get_key(void)
+u8 rxe_get_next_key(u32 last_key)
{
- static u32 key = 1;
-
- key = key << 1;
+ u8 key;
- key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
- ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
-
- key &= 0xff;
+ do {
+ get_random_bytes(&key, 1);
+ } while (key == last_key);
return key;
}
@@ -47,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->pelem.index << 8 | rxe_get_key();
+ u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
mr->ibmr.lkey = lkey;
@@ -57,21 +55,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-void rxe_mr_cleanup(struct rxe_pool_entry *arg)
-{
- struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
- int i;
-
- ib_umem_release(mr->umem);
-
- if (mr->map) {
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
-
- kfree(mr->map);
- }
-}
-
static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
{
int i;
@@ -121,7 +104,7 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
}
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
- int access, struct ib_udata *udata, struct rxe_mr *mr)
+ int access, struct rxe_mr *mr)
{
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
@@ -135,7 +118,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
- err = -EINVAL;
+ err = PTR_ERR(umem);
goto err1;
}
@@ -300,7 +283,7 @@ out:
* crc32 if crcp is not zero. caller must hold a reference to mr
*/
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
- enum copy_direction dir, u32 *crcp)
+ enum rxe_mr_copy_dir dir, u32 *crcp)
{
int err;
int bytes;
@@ -318,9 +301,9 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
if (mr->type == RXE_MR_TYPE_DMA) {
u8 *src, *dest;
- src = (dir == to_mr_obj) ? addr : ((void *)(uintptr_t)iova);
+ src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
- dest = (dir == to_mr_obj) ? ((void *)(uintptr_t)iova) : addr;
+ dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
memcpy(dest, src, length);
@@ -348,8 +331,8 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
u8 *src, *dest;
va = (u8 *)(uintptr_t)buf->addr + offset;
- src = (dir == to_mr_obj) ? addr : va;
- dest = (dir == to_mr_obj) ? va : addr;
+ src = (dir == RXE_TO_MR_OBJ) ? addr : va;
+ dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
bytes = buf->size - offset;
@@ -394,7 +377,7 @@ int copy_data(
struct rxe_dma_info *dma,
void *addr,
int length,
- enum copy_direction dir,
+ enum rxe_mr_copy_dir dir,
u32 *crcp)
{
int bytes;
@@ -414,7 +397,7 @@ int copy_data(
}
if (sge->length && (offset < sge->length)) {
- mr = lookup_mr(pd, access, sge->lkey, lookup_local);
+ mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
if (!mr) {
err = -EINVAL;
goto err1;
@@ -440,7 +423,7 @@ int copy_data(
if (sge->length) {
mr = lookup_mr(pd, access, sge->lkey,
- lookup_local);
+ RXE_LOOKUP_LOCAL);
if (!mr) {
err = -EINVAL;
goto err1;
@@ -522,7 +505,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
* (4) verify that mr state is valid
*/
struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
- enum lookup_type type)
+ enum rxe_mr_lookup_type type)
{
struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
@@ -532,8 +515,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
if (!mr)
return NULL;
- if (unlikely((type == lookup_local && mr_lkey(mr) != key) ||
- (type == lookup_remote && mr_rkey(mr) != key) ||
+ if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
+ (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
mr_pd(mr) != pd || (access && !(access & mr->access)) ||
mr->state != RXE_MR_STATE_VALID)) {
rxe_drop_ref(mr);
@@ -542,3 +525,72 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
return mr;
}
+
+int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_mr *mr;
+ int ret;
+
+ mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
+ if (!mr) {
+ pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (rkey != mr->ibmr.rkey) {
+ pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
+ __func__, rkey, mr->ibmr.rkey);
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+
+ if (atomic_read(&mr->num_mw) > 0) {
+ pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
+ __func__);
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+
+ mr->state = RXE_MR_STATE_FREE;
+ ret = 0;
+
+err_drop_ref:
+ rxe_drop_ref(mr);
+err:
+ return ret;
+}
+
+int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+
+ if (atomic_read(&mr->num_mw) > 0) {
+ pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ mr->state = RXE_MR_STATE_ZOMBIE;
+ rxe_drop_ref(mr_pd(mr));
+ rxe_drop_index(mr);
+ rxe_drop_ref(mr);
+
+ return 0;
+}
+
+void rxe_mr_cleanup(struct rxe_pool_entry *arg)
+{
+ struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
+ int i;
+
+ ib_umem_release(mr->umem);
+
+ if (mr->map) {
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
+
+ kfree(mr->map);
+ }
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
new file mode 100644
index 000000000000..5ba77df7598e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
+ */
+
+#include "rxe.h"
+
+int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct rxe_mw *mw = to_rmw(ibmw);
+ struct rxe_pd *pd = to_rpd(ibmw->pd);
+ struct rxe_dev *rxe = to_rdev(ibmw->device);
+ int ret;
+
+ rxe_add_ref(pd);
+
+ ret = rxe_add_to_pool(&rxe->mw_pool, mw);
+ if (ret) {
+ rxe_drop_ref(pd);
+ return ret;
+ }
+
+ rxe_add_index(mw);
+ ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
+ mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
+ RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
+ spin_lock_init(&mw->lock);
+
+ return 0;
+}
+
+static void rxe_do_dealloc_mw(struct rxe_mw *mw)
+{
+ if (mw->mr) {
+ struct rxe_mr *mr = mw->mr;
+
+ mw->mr = NULL;
+ atomic_dec(&mr->num_mw);
+ rxe_drop_ref(mr);
+ }
+
+ if (mw->qp) {
+ struct rxe_qp *qp = mw->qp;
+
+ mw->qp = NULL;
+ rxe_drop_ref(qp);
+ }
+
+ mw->access = 0;
+ mw->addr = 0;
+ mw->length = 0;
+ mw->state = RXE_MW_STATE_INVALID;
+}
+
+int rxe_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct rxe_mw *mw = to_rmw(ibmw);
+ struct rxe_pd *pd = to_rpd(ibmw->pd);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mw->lock, flags);
+ rxe_do_dealloc_mw(mw);
+ spin_unlock_irqrestore(&mw->lock, flags);
+
+ rxe_drop_ref(mw);
+ rxe_drop_ref(pd);
+
+ return 0;
+}
+
+static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_mw *mw, struct rxe_mr *mr)
+{
+ if (mw->ibmw.type == IB_MW_TYPE_1) {
+ if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
+ pr_err_once(
+ "attempt to bind a type 1 MW not in the valid state\n");
+ return -EINVAL;
+ }
+
+ /* o10-36.2.2 */
+ if (unlikely((mw->access & IB_ZERO_BASED))) {
+ pr_err_once("attempt to bind a zero based type 1 MW\n");
+ return -EINVAL;
+ }
+ }
+
+ if (mw->ibmw.type == IB_MW_TYPE_2) {
+ /* o10-37.2.30 */
+ if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
+ pr_err_once(
+ "attempt to bind a type 2 MW not in the free state\n");
+ return -EINVAL;
+ }
+
+ /* C10-72 */
+ if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
+ pr_err_once(
+ "attempt to bind type 2 MW with qp with different PD\n");
+ return -EINVAL;
+ }
+
+ /* o10-37.2.40 */
+ if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
+ pr_err_once(
+ "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
+ return -EINVAL;
+ }
+ }
+
+ if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
+ pr_err_once("attempt to bind MW with same key\n");
+ return -EINVAL;
+ }
+
+ /* remaining checks only apply to a nonzero MR */
+ if (!mr)
+ return 0;
+
+ if (unlikely(mr->access & IB_ZERO_BASED)) {
+ pr_err_once("attempt to bind MW to zero based MR\n");
+ return -EINVAL;
+ }
+
+ /* C10-73 */
+ if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
+ pr_err_once(
+ "attempt to bind an MW to an MR without bind access\n");
+ return -EINVAL;
+ }
+
+ /* C10-74 */
+ if (unlikely((mw->access &
+ (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
+ !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
+ pr_err_once(
+ "attempt to bind an writeable MW to an MR without local write access\n");
+ return -EINVAL;
+ }
+
+ /* C10-75 */
+ if (mw->access & IB_ZERO_BASED) {
+ if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
+ pr_err_once(
+ "attempt to bind a ZB MW outside of the MR\n");
+ return -EINVAL;
+ }
+ } else {
+ if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
+ ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
+ (mr->iova + mr->length)))) {
+ pr_err_once(
+ "attempt to bind a VA MW outside of the MR\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_mw *mw, struct rxe_mr *mr)
+{
+ u32 rkey;
+ u32 new_rkey;
+
+ rkey = mw->ibmw.rkey;
+ new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
+
+ mw->ibmw.rkey = new_rkey;
+ mw->access = wqe->wr.wr.mw.access;
+ mw->state = RXE_MW_STATE_VALID;
+ mw->addr = wqe->wr.wr.mw.addr;
+ mw->length = wqe->wr.wr.mw.length;
+
+ if (mw->mr) {
+ rxe_drop_ref(mw->mr);
+ atomic_dec(&mw->mr->num_mw);
+ mw->mr = NULL;
+ }
+
+ if (mw->length) {
+ mw->mr = mr;
+ atomic_inc(&mr->num_mw);
+ rxe_add_ref(mr);
+ }
+
+ if (mw->ibmw.type == IB_MW_TYPE_2) {
+ rxe_add_ref(qp);
+ mw->qp = qp;
+ }
+}
+
+int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ int ret;
+ struct rxe_mw *mw;
+ struct rxe_mr *mr;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
+
+ mw = rxe_pool_get_index(&rxe->mw_pool,
+ wqe->wr.wr.mw.mw_rkey >> 8);
+ if (unlikely(!mw)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) {
+ ret = -EINVAL;
+ goto err_drop_mw;
+ }
+
+ if (likely(wqe->wr.wr.mw.length)) {
+ mr = rxe_pool_get_index(&rxe->mr_pool,
+ wqe->wr.wr.mw.mr_lkey >> 8);
+ if (unlikely(!mr)) {
+ ret = -EINVAL;
+ goto err_drop_mw;
+ }
+
+ if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) {
+ ret = -EINVAL;
+ goto err_drop_mr;
+ }
+ } else {
+ mr = NULL;
+ }
+
+ spin_lock_irqsave(&mw->lock, flags);
+
+ ret = rxe_check_bind_mw(qp, wqe, mw, mr);
+ if (ret)
+ goto err_unlock;
+
+ rxe_do_bind_mw(qp, wqe, mw, mr);
+err_unlock:
+ spin_unlock_irqrestore(&mw->lock, flags);
+err_drop_mr:
+ if (mr)
+ rxe_drop_ref(mr);
+err_drop_mw:
+ rxe_drop_ref(mw);
+err:
+ return ret;
+}
+
+static int rxe_check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
+{
+ if (unlikely(mw->state == RXE_MW_STATE_INVALID))
+ return -EINVAL;
+
+ /* o10-37.2.26 */
+ if (unlikely(mw->ibmw.type == IB_MW_TYPE_1))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void rxe_do_invalidate_mw(struct rxe_mw *mw)
+{
+ struct rxe_qp *qp;
+ struct rxe_mr *mr;
+
+ /* valid type 2 MW will always have a QP pointer */
+ qp = mw->qp;
+ mw->qp = NULL;
+ rxe_drop_ref(qp);
+
+ /* valid type 2 MW will always have an MR pointer */
+ mr = mw->mr;
+ mw->mr = NULL;
+ atomic_dec(&mr->num_mw);
+ rxe_drop_ref(mr);
+
+ mw->access = 0;
+ mw->addr = 0;
+ mw->length = 0;
+ mw->state = RXE_MW_STATE_FREE;
+}
+
+int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
+ struct rxe_mw *mw;
+ int ret;
+
+ mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
+ if (!mw) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (rkey != mw->ibmw.rkey) {
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+
+ spin_lock_irqsave(&mw->lock, flags);
+
+ ret = rxe_check_invalidate_mw(qp, mw);
+ if (ret)
+ goto err_unlock;
+
+ rxe_do_invalidate_mw(mw);
+err_unlock:
+ spin_unlock_irqrestore(&mw->lock, flags);
+err_drop_ref:
+ rxe_drop_ref(mw);
+err:
+ return ret;
+}
+
+struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ struct rxe_pd *pd = to_rpd(qp->ibqp.pd);
+ struct rxe_mw *mw;
+ int index = rkey >> 8;
+
+ mw = rxe_pool_get_index(&rxe->mw_pool, index);
+ if (!mw)
+ return NULL;
+
+ if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd ||
+ (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
+ (mw->length == 0) ||
+ (access && !(access & mw->access)) ||
+ mw->state != RXE_MW_STATE_VALID)) {
+ rxe_drop_ref(mw);
+ return NULL;
+ }
+
+ return mw;
+}
+
+void rxe_mw_cleanup(struct rxe_pool_entry *elem)
+{
+ struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem);
+
+ rxe_drop_index(mw);
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 01662727dca0..fc1ba4904279 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -207,10 +207,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
/* Create UDP socket */
err = udp_sock_create(net, &udp_cfg, &sock);
- if (err < 0) {
- pr_err("failed to create udp socket. err = %d\n", err);
+ if (err < 0)
return ERR_PTR(err);
- }
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;
@@ -619,6 +617,12 @@ static int rxe_net_ipv6_init(void)
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
+ if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) {
+ recv_sockets.sk6 = NULL;
+ pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n");
+ return 0;
+ }
+
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("Failed to create IPv6 UDP tunnel\n");
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 0cb4b01fd910..3ef5a10a6efd 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -87,13 +87,20 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_LOCAL_INV] = {
.name = "IB_WR_LOCAL_INV",
.mask = {
- [IB_QPT_RC] = WR_REG_MASK,
+ [IB_QPT_RC] = WR_LOCAL_OP_MASK,
},
},
[IB_WR_REG_MR] = {
.name = "IB_WR_REG_MR",
.mask = {
- [IB_QPT_RC] = WR_REG_MASK,
+ [IB_QPT_RC] = WR_LOCAL_OP_MASK,
+ },
+ },
+ [IB_WR_BIND_MW] = {
+ .name = "IB_WR_BIND_MW",
+ .mask = {
+ [IB_QPT_RC] = WR_LOCAL_OP_MASK,
+ [IB_QPT_UC] = WR_LOCAL_OP_MASK,
},
},
};
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 1041ac9a9233..e02f039b8c44 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -19,8 +19,7 @@ enum rxe_wr_mask {
WR_SEND_MASK = BIT(2),
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
- WR_LOCAL_MASK = BIT(5),
- WR_REG_MASK = BIT(6),
+ WR_LOCAL_OP_MASK = BIT(5),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 25ab50d9b7c2..742e6ec93686 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -37,7 +37,6 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu)
enum rxe_device_param {
RXE_MAX_MR_SIZE = -1ull,
RXE_PAGE_SIZE_CAP = 0xfffff000,
- RXE_MAX_QP = 0x10000,
RXE_MAX_QP_WR = 0x4000,
RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR
| IB_DEVICE_BAD_QKEY_CNTR
@@ -49,7 +48,10 @@ enum rxe_device_param {
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
- | IB_DEVICE_ALLOW_USER_UNREG,
+ | IB_DEVICE_ALLOW_USER_UNREG
+ | IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_MEM_WINDOW_TYPE_2A
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B,
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +
sizeof(struct ib_sge) * RXE_MAX_SGE,
@@ -58,7 +60,6 @@ enum rxe_device_param {
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
RXE_MAX_LOG_CQE = 15,
- RXE_MAX_MR = 256 * 1024,
RXE_MAX_PD = 0x7ffc,
RXE_MAX_QP_RD_ATOM = 128,
RXE_MAX_RES_RD_ATOM = 0x3f000,
@@ -67,7 +68,6 @@ enum rxe_device_param {
RXE_MAX_MCAST_QP_ATTACH = 56,
RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000,
RXE_MAX_AH = 100,
- RXE_MAX_SRQ = 960,
RXE_MAX_SRQ_WR = 0x4000,
RXE_MIN_SRQ_WR = 1,
RXE_MAX_SRQ_SGE = 27,
@@ -80,16 +80,21 @@ enum rxe_device_param {
RXE_NUM_PORT = 1,
+ RXE_MAX_QP = 0x10000,
RXE_MIN_QP_INDEX = 16,
RXE_MAX_QP_INDEX = 0x00020000,
+ RXE_MAX_SRQ = 0x00001000,
RXE_MIN_SRQ_INDEX = 0x00020001,
RXE_MAX_SRQ_INDEX = 0x00040000,
+ RXE_MAX_MR = 0x00001000,
+ RXE_MAX_MW = 0x00001000,
RXE_MIN_MR_INDEX = 0x00000001,
- RXE_MAX_MR_INDEX = 0x00040000,
- RXE_MIN_MW_INDEX = 0x00040001,
- RXE_MAX_MW_INDEX = 0x00060000,
+ RXE_MAX_MR_INDEX = 0x00010000,
+ RXE_MIN_MW_INDEX = 0x00010001,
+ RXE_MAX_MW_INDEX = 0x00020000,
+
RXE_MAX_PKT_PER_ACK = 64,
RXE_MAX_UNACKED_PSNS = 128,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index d24901f2af3f..0b8e7c6255a2 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -65,6 +65,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mw),
.elem_offset = offsetof(struct rxe_mw, pelem),
+ .cleanup = rxe_mw_cleanup,
.flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
@@ -183,7 +184,7 @@ static u32 alloc_index(struct rxe_pool *pool)
return index + pool->index.min_index;
}
-static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
struct rb_node **link = &pool->index.tree.rb_node;
struct rb_node *parent = NULL;
@@ -195,7 +196,7 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
if (elem->index == new->index) {
pr_warn("element already exists!\n");
- goto out;
+ return -EINVAL;
}
if (elem->index > new->index)
@@ -206,11 +207,11 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
rb_link_node(&new->index_node, parent, link);
rb_insert_color(&new->index_node, &pool->index.tree);
-out:
- return;
+
+ return 0;
}
-static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
struct rb_node **link = &pool->key.tree.rb_node;
struct rb_node *parent = NULL;
@@ -226,7 +227,7 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
if (cmp == 0) {
pr_warn("key already exists!\n");
- goto out;
+ return -EINVAL;
}
if (cmp > 0)
@@ -237,26 +238,32 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
rb_link_node(&new->key_node, parent, link);
rb_insert_color(&new->key_node, &pool->key.tree);
-out:
- return;
+
+ return 0;
}
-void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
+int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key)
{
struct rxe_pool *pool = elem->pool;
+ int err;
memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
- insert_key(pool, elem);
+ err = rxe_insert_key(pool, elem);
+
+ return err;
}
-void __rxe_add_key(struct rxe_pool_entry *elem, void *key)
+int __rxe_add_key(struct rxe_pool_entry *elem, void *key)
{
struct rxe_pool *pool = elem->pool;
unsigned long flags;
+ int err;
write_lock_irqsave(&pool->pool_lock, flags);
- __rxe_add_key_locked(elem, key);
+ err = __rxe_add_key_locked(elem, key);
write_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return err;
}
void __rxe_drop_key_locked(struct rxe_pool_entry *elem)
@@ -276,22 +283,28 @@ void __rxe_drop_key(struct rxe_pool_entry *elem)
write_unlock_irqrestore(&pool->pool_lock, flags);
}
-void __rxe_add_index_locked(struct rxe_pool_entry *elem)
+int __rxe_add_index_locked(struct rxe_pool_entry *elem)
{
struct rxe_pool *pool = elem->pool;
+ int err;
elem->index = alloc_index(pool);
- insert_index(pool, elem);
+ err = rxe_insert_index(pool, elem);
+
+ return err;
}
-void __rxe_add_index(struct rxe_pool_entry *elem)
+int __rxe_add_index(struct rxe_pool_entry *elem)
{
struct rxe_pool *pool = elem->pool;
unsigned long flags;
+ int err;
write_lock_irqsave(&pool->pool_lock, flags);
- __rxe_add_index_locked(elem);
+ err = __rxe_add_index_locked(elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return err;
}
void __rxe_drop_index_locked(struct rxe_pool_entry *elem)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 61210b300a78..1feca1bffced 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -111,11 +111,11 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
/* assign an index to an indexed object and insert object into
* pool's rb tree holding and not holding the pool_lock
*/
-void __rxe_add_index_locked(struct rxe_pool_entry *elem);
+int __rxe_add_index_locked(struct rxe_pool_entry *elem);
#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem)
-void __rxe_add_index(struct rxe_pool_entry *elem);
+int __rxe_add_index(struct rxe_pool_entry *elem);
#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem)
@@ -133,11 +133,11 @@ void __rxe_drop_index(struct rxe_pool_entry *elem);
/* assign a key to a keyed object and insert object into
* pool's rb tree holding and not holding pool_lock
*/
-void __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
+int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key);
#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key)
-void __rxe_add_key(struct rxe_pool_entry *elem, void *key);
+int __rxe_add_key(struct rxe_pool_entry *elem, void *key);
#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key)
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b0f350d674fd..1ab6af7ddb25 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -136,7 +136,6 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
{
if (res->type == RXE_ATOMIC_MASK) {
- rxe_drop_ref(qp);
kfree_skb(res->atomic.skb);
} else if (res->type == RXE_READ_MASK) {
if (res->read.mr)
@@ -206,6 +205,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
{
int err;
int wqe_size;
+ enum queue_type type;
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
@@ -231,7 +231,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
wqe_size += sizeof(struct rxe_send_wqe);
- qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size);
+ type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
+ qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
+ wqe_size, type);
if (!qp->sq.queue)
return -ENOMEM;
@@ -246,7 +248,13 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
- qp->req.wqe_index = producer_index(qp->sq.queue);
+ if (qp->is_user)
+ qp->req.wqe_index = producer_index(qp->sq.queue,
+ QUEUE_TYPE_FROM_USER);
+ else
+ qp->req.wqe_index = producer_index(qp->sq.queue,
+ QUEUE_TYPE_KERNEL);
+
qp->req.state = QP_STATE_RESET;
qp->req.opcode = -1;
qp->comp.opcode = -1;
@@ -274,6 +282,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
{
int err;
int wqe_size;
+ enum queue_type type;
if (!qp->srq) {
qp->rq.max_wr = init->cap.max_recv_wr;
@@ -284,9 +293,9 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
- qp->rq.queue = rxe_queue_init(rxe,
- &qp->rq.max_wr,
- wqe_size);
+ type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
+ qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
+ wqe_size, type);
if (!qp->rq.queue)
return -ENOMEM;
@@ -304,6 +313,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
+ qp->rq.is_user = qp->is_user;
+
skb_queue_head_init(&qp->resp_pkts);
rxe_init_task(rxe, &qp->resp.task, qp,
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index fa69241b1187..85b812586ed4 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -52,9 +52,8 @@ inline void rxe_queue_reset(struct rxe_queue *q)
memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
}
-struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
- int *num_elem,
- unsigned int elem_size)
+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
+ unsigned int elem_size, enum queue_type type)
{
struct rxe_queue *q;
size_t buf_size;
@@ -69,6 +68,7 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
goto err1;
q->rxe = rxe;
+ q->type = type;
/* used in resize, only need to copy used part of queue */
q->elem_size = elem_size;
@@ -111,14 +111,15 @@ err1:
static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
unsigned int num_elem)
{
- if (!queue_empty(q) && (num_elem < queue_count(q)))
+ if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type)))
return -EINVAL;
- while (!queue_empty(q)) {
- memcpy(producer_addr(new_q), consumer_addr(q),
- new_q->elem_size);
- advance_producer(new_q);
- advance_consumer(q);
+ while (!queue_empty(q, q->type)) {
+ memcpy(producer_addr(new_q, new_q->type),
+ consumer_addr(q, q->type),
+ new_q->elem_size);
+ advance_producer(new_q, new_q->type);
+ advance_consumer(q, q->type);
}
swap(*q, *new_q);
@@ -136,7 +137,7 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
int err;
unsigned long flags = 0, flags1;
- new_q = rxe_queue_init(q->rxe, &num_elem, elem_size);
+ new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type);
if (!new_q)
return -ENOMEM;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 2902ca7b288c..2702b0e55fc3 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -17,8 +17,29 @@
* up to a power of 2. Since the queue is empty when the
* producer and consumer indices match the maximum capacity
* of the queue is one less than the number of element slots
+ *
+ * Notes:
+ * - Kernel space indices are always masked off to q->index_mask
+ * before storing so do not need to be checked on reads.
+ * - User space indices may be out of range and must be
+ * masked before use when read.
+ * - The kernel indices for shared queues must not be written
+ * by user space so a local copy is used and a shared copy is
+ * stored when the local copy changes.
+ * - By passing the type in the parameter list separate from q
+ * the compiler can eliminate the switch statement when the
+ * actual queue type is known when the function is called.
+ * In the performance path this is done. In less critical
+ * paths just q->type is passed.
*/
+/* type of queue */
+enum queue_type {
+ QUEUE_TYPE_KERNEL,
+ QUEUE_TYPE_TO_USER,
+ QUEUE_TYPE_FROM_USER,
+};
+
struct rxe_queue {
struct rxe_dev *rxe;
struct rxe_queue_buf *buf;
@@ -27,6 +48,13 @@ struct rxe_queue {
size_t elem_size;
unsigned int log2_elem_size;
u32 index_mask;
+ enum queue_type type;
+ /* private copy of index for shared queues between
+ * kernel space and user space. Kernel reads and writes
+ * this copy and then replicates to rxe_queue_buf
+ * for read access by user space.
+ */
+ u32 index;
};
int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -35,9 +63,8 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
void rxe_queue_reset(struct rxe_queue *q);
-struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
- int *num_elem,
- unsigned int elem_size);
+struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
+ unsigned int elem_size, enum queue_type type);
int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
unsigned int elem_size, struct ib_udata *udata,
@@ -54,120 +81,235 @@ static inline int next_index(struct rxe_queue *q, int index)
return (index + 1) & q->buf->index_mask;
}
-static inline int queue_empty(struct rxe_queue *q)
+static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
{
u32 prod;
u32 cons;
- /* make sure all changes to queue complete before
- * testing queue empty
- */
- prod = smp_load_acquire(&q->buf->producer_index);
- /* same */
- cons = smp_load_acquire(&q->buf->consumer_index);
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ cons = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ cons = q->buf->consumer_index;
+ break;
+ }
return ((prod - cons) & q->index_mask) == 0;
}
-static inline int queue_full(struct rxe_queue *q)
+static inline int queue_full(struct rxe_queue *q, enum queue_type type)
{
u32 prod;
u32 cons;
- /* make sure all changes to queue complete before
- * testing queue full
- */
- prod = smp_load_acquire(&q->buf->producer_index);
- /* same */
- cons = smp_load_acquire(&q->buf->consumer_index);
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ cons = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ cons = q->buf->consumer_index;
+ break;
+ }
return ((prod + 1 - cons) & q->index_mask) == 0;
}
-static inline void advance_producer(struct rxe_queue *q)
+static inline unsigned int queue_count(const struct rxe_queue *q,
+ enum queue_type type)
{
u32 prod;
+ u32 cons;
- prod = (q->buf->producer_index + 1) & q->index_mask;
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ cons = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ cons = q->buf->consumer_index;
+ break;
+ }
+
+ return (prod - cons) & q->index_mask;
+}
- /* make sure all changes to queue complete before
- * changing producer index
- */
- smp_store_release(&q->buf->producer_index, prod);
+static inline void advance_producer(struct rxe_queue *q, enum queue_type type)
+{
+ u32 prod;
+
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ pr_warn_once("Normally kernel should not write user space index\n");
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ prod = (prod + 1) & q->index_mask;
+ /* same */
+ smp_store_release(&q->buf->producer_index, prod);
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ q->index = (prod + 1) & q->index_mask;
+ q->buf->producer_index = q->index;
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ q->buf->producer_index = (prod + 1) & q->index_mask;
+ break;
+ }
}
-static inline void advance_consumer(struct rxe_queue *q)
+static inline void advance_consumer(struct rxe_queue *q, enum queue_type type)
{
u32 cons;
- cons = (q->buf->consumer_index + 1) & q->index_mask;
-
- /* make sure all changes to queue complete before
- * changing consumer index
- */
- smp_store_release(&q->buf->consumer_index, cons);
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ cons = q->index;
+ q->index = (cons + 1) & q->index_mask;
+ q->buf->consumer_index = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ pr_warn_once("Normally kernel should not write user space index\n");
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ cons = (cons + 1) & q->index_mask;
+ /* same */
+ smp_store_release(&q->buf->consumer_index, cons);
+ break;
+ case QUEUE_TYPE_KERNEL:
+ cons = q->buf->consumer_index;
+ q->buf->consumer_index = (cons + 1) & q->index_mask;
+ break;
+ }
}
-static inline void *producer_addr(struct rxe_queue *q)
+static inline void *producer_addr(struct rxe_queue *q, enum queue_type type)
{
- return q->buf->data + ((q->buf->producer_index & q->index_mask)
- << q->log2_elem_size);
+ u32 prod;
+
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ prod &= q->index_mask;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ break;
+ }
+
+ return q->buf->data + (prod << q->log2_elem_size);
}
-static inline void *consumer_addr(struct rxe_queue *q)
+static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type)
{
- return q->buf->data + ((q->buf->consumer_index & q->index_mask)
- << q->log2_elem_size);
+ u32 cons;
+
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ cons = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ cons &= q->index_mask;
+ break;
+ case QUEUE_TYPE_KERNEL:
+ cons = q->buf->consumer_index;
+ break;
+ }
+
+ return q->buf->data + (cons << q->log2_elem_size);
}
-static inline unsigned int producer_index(struct rxe_queue *q)
+static inline unsigned int producer_index(struct rxe_queue *q,
+ enum queue_type type)
{
- u32 index;
-
- /* make sure all changes to queue
- * complete before getting producer index
- */
- index = smp_load_acquire(&q->buf->producer_index);
- index &= q->index_mask;
+ u32 prod;
- return index;
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ /* protect user space index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ prod &= q->index_mask;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ prod = q->index;
+ break;
+ case QUEUE_TYPE_KERNEL:
+ prod = q->buf->producer_index;
+ break;
+ }
+
+ return prod;
}
-static inline unsigned int consumer_index(struct rxe_queue *q)
+static inline unsigned int consumer_index(struct rxe_queue *q,
+ enum queue_type type)
{
- u32 index;
-
- /* make sure all changes to queue
- * complete before getting consumer index
- */
- index = smp_load_acquire(&q->buf->consumer_index);
- index &= q->index_mask;
+ u32 cons;
- return index;
+ switch (type) {
+ case QUEUE_TYPE_FROM_USER:
+ cons = q->index;
+ break;
+ case QUEUE_TYPE_TO_USER:
+ /* protect user space index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ cons &= q->index_mask;
+ break;
+ case QUEUE_TYPE_KERNEL:
+ cons = q->buf->consumer_index;
+ break;
+ }
+
+ return cons;
}
-static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
+static inline void *addr_from_index(struct rxe_queue *q,
+ unsigned int index)
{
return q->buf->data + ((index & q->index_mask)
<< q->buf->log2_elem_size);
}
static inline unsigned int index_from_addr(const struct rxe_queue *q,
- const void *addr)
+ const void *addr)
{
return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
- & q->index_mask;
-}
-
-static inline unsigned int queue_count(const struct rxe_queue *q)
-{
- return (q->buf->producer_index - q->buf->consumer_index)
- & q->index_mask;
+ & q->index_mask;
}
-static inline void *queue_head(struct rxe_queue *q)
+static inline void *queue_head(struct rxe_queue *q, enum queue_type type)
{
- return queue_empty(q) ? NULL : consumer_addr(q);
+ return queue_empty(q, type) ? NULL : consumer_addr(q, type);
}
#endif /* RXE_QUEUE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 3664cdae7e1f..c57699cc6578 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -45,14 +45,24 @@ static void req_retry(struct rxe_qp *qp)
unsigned int mask;
int npsn;
int first = 1;
+ struct rxe_queue *q = qp->sq.queue;
+ unsigned int cons;
+ unsigned int prod;
- qp->req.wqe_index = consumer_index(qp->sq.queue);
+ if (qp->is_user) {
+ cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
+ prod = producer_index(q, QUEUE_TYPE_FROM_USER);
+ } else {
+ cons = consumer_index(q, QUEUE_TYPE_KERNEL);
+ prod = producer_index(q, QUEUE_TYPE_KERNEL);
+ }
+
+ qp->req.wqe_index = cons;
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
- for (wqe_index = consumer_index(qp->sq.queue);
- wqe_index != producer_index(qp->sq.queue);
- wqe_index = next_index(qp->sq.queue, wqe_index)) {
+ for (wqe_index = cons; wqe_index != prod;
+ wqe_index = next_index(q, wqe_index)) {
wqe = addr_from_index(qp->sq.queue, wqe_index);
mask = wr_opcode_mask(wqe->wr.opcode, qp);
@@ -104,8 +114,22 @@ void rnr_nak_timer(struct timer_list *t)
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
- struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
+ struct rxe_send_wqe *wqe;
unsigned long flags;
+ struct rxe_queue *q = qp->sq.queue;
+ unsigned int index = qp->req.wqe_index;
+ unsigned int cons;
+ unsigned int prod;
+
+ if (qp->is_user) {
+ wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
+ cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
+ prod = producer_index(q, QUEUE_TYPE_FROM_USER);
+ } else {
+ wqe = queue_head(q, QUEUE_TYPE_KERNEL);
+ cons = consumer_index(q, QUEUE_TYPE_KERNEL);
+ prod = producer_index(q, QUEUE_TYPE_KERNEL);
+ }
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* check to see if we are drained;
@@ -120,8 +144,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
break;
}
- if (wqe && ((qp->req.wqe_index !=
- consumer_index(qp->sq.queue)) ||
+ if (wqe && ((index != cons) ||
(wqe->state != wqe_state_posted))) {
/* comp not done yet */
spin_unlock_irqrestore(&qp->state_lock,
@@ -144,10 +167,10 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
} while (0);
}
- if (qp->req.wqe_index == producer_index(qp->sq.queue))
+ if (index == prod)
return NULL;
- wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
+ wqe = addr_from_index(q, index);
if (unlikely((qp->req.state == QP_STATE_DRAIN ||
qp->req.state == QP_STATE_DRAINED) &&
@@ -155,7 +178,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
return NULL;
if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
- (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
+ (index != cons))) {
qp->req.wait_fence = 1;
return NULL;
}
@@ -439,7 +462,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
return skb;
}
-static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt, struct sk_buff *skb,
int paylen)
{
@@ -464,7 +487,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
} else {
err = copy_data(qp->pd, 0, &wqe->dma,
payload_addr(pkt), paylen,
- from_mr_obj,
+ RXE_FROM_MR_OBJ,
&crc);
if (err)
return err;
@@ -555,6 +578,60 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
jiffies + qp->qp_timeout_jiffies);
}
+static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ u8 opcode = wqe->wr.opcode;
+ struct rxe_mr *mr;
+ u32 rkey;
+ int ret;
+
+ switch (opcode) {
+ case IB_WR_LOCAL_INV:
+ rkey = wqe->wr.ex.invalidate_rkey;
+ if (rkey_is_mw(rkey))
+ ret = rxe_invalidate_mw(qp, rkey);
+ else
+ ret = rxe_invalidate_mr(qp, rkey);
+
+ if (unlikely(ret)) {
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ return ret;
+ }
+ break;
+ case IB_WR_REG_MR:
+ mr = to_rmr(wqe->wr.wr.reg.mr);
+ rxe_add_ref(mr);
+ mr->state = RXE_MR_STATE_VALID;
+ mr->access = wqe->wr.wr.reg.access;
+ mr->ibmr.lkey = wqe->wr.wr.reg.key;
+ mr->ibmr.rkey = wqe->wr.wr.reg.key;
+ mr->iova = wqe->wr.wr.reg.mr->iova;
+ rxe_drop_ref(mr);
+ break;
+ case IB_WR_BIND_MW:
+ ret = rxe_bind_mw(qp, wqe);
+ if (unlikely(ret)) {
+ wqe->status = IB_WC_MW_BIND_ERR;
+ return ret;
+ }
+ break;
+ default:
+ pr_err("Unexpected send wqe opcode %d\n", opcode);
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ return -EINVAL;
+ }
+
+ wqe->state = wqe_state_done;
+ wqe->status = IB_WC_SUCCESS;
+ qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+
+ if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ qp->sq_sig_type == IB_SIGNAL_ALL_WR)
+ rxe_run_task(&qp->comp.task, 1);
+
+ return 0;
+}
+
int rxe_requester(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
@@ -568,6 +645,7 @@ int rxe_requester(void *arg)
int ret;
struct rxe_send_wqe rollback_wqe;
u32 rollback_psn;
+ struct rxe_queue *q = qp->sq.queue;
rxe_add_ref(qp);
@@ -576,7 +654,7 @@ next_wqe:
goto exit;
if (unlikely(qp->req.state == QP_STATE_RESET)) {
- qp->req.wqe_index = consumer_index(qp->sq.queue);
+ qp->req.wqe_index = consumer_index(q, q->type);
qp->req.opcode = -1;
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
@@ -593,43 +671,12 @@ next_wqe:
if (unlikely(!wqe))
goto exit;
- if (wqe->mask & WR_REG_MASK) {
- if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_mr *rmr;
-
- rmr = rxe_pool_get_index(&rxe->mr_pool,
- wqe->wr.ex.invalidate_rkey >> 8);
- if (!rmr) {
- pr_err("No mr for key %#x\n",
- wqe->wr.ex.invalidate_rkey);
- wqe->state = wqe_state_error;
- wqe->status = IB_WC_MW_BIND_ERR;
- goto exit;
- }
- rmr->state = RXE_MR_STATE_FREE;
- rxe_drop_ref(rmr);
- wqe->state = wqe_state_done;
- wqe->status = IB_WC_SUCCESS;
- } else if (wqe->wr.opcode == IB_WR_REG_MR) {
- struct rxe_mr *rmr = to_rmr(wqe->wr.wr.reg.mr);
-
- rmr->state = RXE_MR_STATE_VALID;
- rmr->access = wqe->wr.wr.reg.access;
- rmr->ibmr.lkey = wqe->wr.wr.reg.key;
- rmr->ibmr.rkey = wqe->wr.wr.reg.key;
- rmr->iova = wqe->wr.wr.reg.mr->iova;
- wqe->state = wqe_state_done;
- wqe->status = IB_WC_SUCCESS;
- } else {
- goto exit;
- }
- if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- qp->sq_sig_type == IB_SIGNAL_ALL_WR)
- rxe_run_task(&qp->comp.task, 1);
- qp->req.wqe_index = next_index(qp->sq.queue,
- qp->req.wqe_index);
- goto next_wqe;
+ if (wqe->mask & WR_LOCAL_OP_MASK) {
+ ret = rxe_do_local_ops(qp, wqe);
+ if (unlikely(ret))
+ goto err;
+ else
+ goto next_wqe;
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
@@ -687,11 +734,17 @@ next_wqe:
skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
goto err;
}
- if (fill_packet(qp, wqe, &pkt, skb, payload)) {
- pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
+ ret = finish_packet(qp, wqe, &pkt, skb, payload);
+ if (unlikely(ret)) {
+ pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
+ if (ret == -EFAULT)
+ wqe->status = IB_WC_LOC_PROT_ERR;
+ else
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
kfree_skb(skb);
goto err;
}
@@ -716,6 +769,7 @@ next_wqe:
goto exit;
}
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
goto err;
}
@@ -724,7 +778,6 @@ next_wqe:
goto next_wqe;
err:
- wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
__rxe_do_task(&qp->comp.task);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 2b220659bddb..f8a7ccd4d8b7 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -35,6 +35,7 @@ enum resp_states {
RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
RESPST_ERR_RNR,
RESPST_ERR_RKEY_VIOLATION,
+ RESPST_ERR_INVALIDATE_RKEY,
RESPST_ERR_LENGTH,
RESPST_ERR_CQ_OVERFLOW,
RESPST_ERROR,
@@ -68,6 +69,7 @@ static char *resp_state_name[] = {
[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ",
[RESPST_ERR_RNR] = "ERR_RNR",
[RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION",
+ [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION",
[RESPST_ERR_LENGTH] = "ERR_LENGTH",
[RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
[RESPST_ERROR] = "ERROR",
@@ -293,13 +295,17 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
struct rxe_queue *q = srq->rq.queue;
struct rxe_recv_wqe *wqe;
struct ib_event ev;
+ unsigned int count;
if (srq->error)
return RESPST_ERR_RNR;
spin_lock_bh(&srq->rq.consumer_lock);
- wqe = queue_head(q);
+ if (qp->is_user)
+ wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
+ else
+ wqe = queue_head(q, QUEUE_TYPE_KERNEL);
if (!wqe) {
spin_unlock_bh(&srq->rq.consumer_lock);
return RESPST_ERR_RNR;
@@ -309,10 +315,15 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
memcpy(&qp->resp.srq_wqe, wqe, sizeof(qp->resp.srq_wqe));
qp->resp.wqe = &qp->resp.srq_wqe.wqe;
- advance_consumer(q);
+ if (qp->is_user) {
+ advance_consumer(q, QUEUE_TYPE_FROM_USER);
+ count = queue_count(q, QUEUE_TYPE_FROM_USER);
+ } else {
+ advance_consumer(q, QUEUE_TYPE_KERNEL);
+ count = queue_count(q, QUEUE_TYPE_KERNEL);
+ }
- if (srq->limit && srq->ibsrq.event_handler &&
- (queue_count(q) < srq->limit)) {
+ if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
srq->limit = 0;
goto event;
}
@@ -339,7 +350,12 @@ static enum resp_states check_resource(struct rxe_qp *qp,
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
} else if (!srq) {
- qp->resp.wqe = queue_head(qp->rq.queue);
+ if (qp->is_user)
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_FROM_USER);
+ else
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_KERNEL);
if (qp->resp.wqe) {
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
@@ -366,7 +382,12 @@ static enum resp_states check_resource(struct rxe_qp *qp,
if (srq)
return get_srq_wqe(qp);
- qp->resp.wqe = queue_head(qp->rq.queue);
+ if (qp->is_user)
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_FROM_USER);
+ else
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_KERNEL);
return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
}
@@ -392,6 +413,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
struct rxe_mr *mr = NULL;
+ struct rxe_mw *mw = NULL;
u64 va;
u32 rkey;
u32 resid;
@@ -403,6 +425,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
if (pkt->mask & RXE_RETH_MASK) {
qp->resp.va = reth_va(pkt);
+ qp->resp.offset = 0;
qp->resp.rkey = reth_rkey(pkt);
qp->resp.resid = reth_len(pkt);
qp->resp.length = reth_len(pkt);
@@ -411,6 +434,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
: IB_ACCESS_REMOTE_WRITE;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
qp->resp.va = atmeth_va(pkt);
+ qp->resp.offset = 0;
qp->resp.rkey = atmeth_rkey(pkt);
qp->resp.resid = sizeof(u64);
access = IB_ACCESS_REMOTE_ATOMIC;
@@ -430,18 +454,36 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
resid = qp->resp.resid;
pktlen = payload_size(pkt);
- mr = lookup_mr(qp->pd, access, rkey, lookup_remote);
- if (!mr) {
- state = RESPST_ERR_RKEY_VIOLATION;
- goto err;
- }
+ if (rkey_is_mw(rkey)) {
+ mw = rxe_lookup_mw(qp, access, rkey);
+ if (!mw) {
+ pr_err("%s: no MW matches rkey %#x\n", __func__, rkey);
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err;
+ }
- if (unlikely(mr->state == RXE_MR_STATE_FREE)) {
- state = RESPST_ERR_RKEY_VIOLATION;
- goto err;
+ mr = mw->mr;
+ if (!mr) {
+ pr_err("%s: MW doesn't have an MR\n", __func__);
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err;
+ }
+
+ if (mw->access & IB_ZERO_BASED)
+ qp->resp.offset = mw->addr;
+
+ rxe_drop_ref(mw);
+ rxe_add_ref(mr);
+ } else {
+ mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
+ if (!mr) {
+ pr_err("%s: no MR matches rkey %#x\n", __func__, rkey);
+ state = RESPST_ERR_RKEY_VIOLATION;
+ goto err;
+ }
}
- if (mr_check_range(mr, va, resid)) {
+ if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -475,6 +517,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
err:
if (mr)
rxe_drop_ref(mr);
+ if (mw)
+ rxe_drop_ref(mw);
+
return state;
}
@@ -484,7 +529,7 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
int err;
err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
- data_addr, data_len, to_mr_obj, NULL);
+ data_addr, data_len, RXE_TO_MR_OBJ, NULL);
if (unlikely(err))
return (err == -ENOSPC) ? RESPST_ERR_LENGTH
: RESPST_ERR_MALFORMED_WQE;
@@ -499,8 +544,8 @@ static enum resp_states write_data_in(struct rxe_qp *qp,
int err;
int data_len = payload_size(pkt);
- err = rxe_mr_copy(qp->resp.mr, qp->resp.va, payload_addr(pkt), data_len,
- to_mr_obj, NULL);
+ err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
+ payload_addr(pkt), data_len, RXE_TO_MR_OBJ, NULL);
if (err) {
rc = RESPST_ERR_RKEY_VIOLATION;
goto out;
@@ -519,7 +564,6 @@ static DEFINE_SPINLOCK(atomic_ops_lock);
static enum resp_states process_atomic(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
- u64 iova = atmeth_va(pkt);
u64 *vaddr;
enum resp_states ret;
struct rxe_mr *mr = qp->resp.mr;
@@ -529,7 +573,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
goto out;
}
- vaddr = iova_to_vaddr(mr, iova, sizeof(u64));
+ vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64));
/* check vaddr is 8 bytes aligned. */
if (!vaddr || (uintptr_t)vaddr & 7) {
@@ -653,8 +697,10 @@ static enum resp_states read_reply(struct rxe_qp *qp,
res->type = RXE_READ_MASK;
res->replay = 0;
- res->read.va = qp->resp.va;
- res->read.va_org = qp->resp.va;
+ res->read.va = qp->resp.va +
+ qp->resp.offset;
+ res->read.va_org = qp->resp.va +
+ qp->resp.offset;
res->first_psn = req_pkt->psn;
@@ -701,7 +747,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
return RESPST_ERR_RNR;
err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt),
- payload, from_mr_obj, &icrc);
+ payload, RXE_FROM_MR_OBJ, &icrc);
if (err)
pr_err("Failed copying memory\n");
@@ -751,6 +797,14 @@ static void build_rdma_network_hdr(union rdma_network_hdr *hdr,
memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh));
}
+static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
+{
+ if (rkey_is_mw(rkey))
+ return rxe_invalidate_mw(qp, rkey);
+ else
+ return rxe_invalidate_mr(qp, rkey);
+}
+
/* Executes a new request. A retried request never reach that function (send
* and writes are discarded, and reads and atomics are retried elsewhere.
*/
@@ -790,6 +844,14 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
WARN_ON_ONCE(1);
}
+ if (pkt->mask & RXE_IETH_MASK) {
+ u32 rkey = ieth_rkey(pkt);
+
+ err = invalidate_rkey(qp, rkey);
+ if (err)
+ return RESPST_ERR_INVALIDATE_RKEY;
+ }
+
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
@@ -822,13 +884,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
memset(&cqe, 0, sizeof(cqe));
if (qp->rcq->is_user) {
- uwc->status = qp->resp.status;
- uwc->qp_num = qp->ibqp.qp_num;
- uwc->wr_id = wqe->wr_id;
+ uwc->status = qp->resp.status;
+ uwc->qp_num = qp->ibqp.qp_num;
+ uwc->wr_id = wqe->wr_id;
} else {
- wc->status = qp->resp.status;
- wc->qp = &qp->ibqp;
- wc->wr_id = wqe->wr_id;
+ wc->status = qp->resp.status;
+ wc->qp = &qp->ibqp;
+ wc->wr_id = wqe->wr_id;
}
if (wc->status == IB_WC_SUCCESS) {
@@ -883,34 +945,25 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
if (pkt->mask & RXE_IETH_MASK) {
- struct rxe_mr *rmr;
-
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
wc->ex.invalidate_rkey = ieth_rkey(pkt);
-
- rmr = rxe_pool_get_index(&rxe->mr_pool,
- wc->ex.invalidate_rkey >> 8);
- if (unlikely(!rmr)) {
- pr_err("Bad rkey %#x invalidation\n",
- wc->ex.invalidate_rkey);
- return RESPST_ERROR;
- }
- rmr->state = RXE_MR_STATE_FREE;
- rxe_drop_ref(rmr);
}
- wc->qp = &qp->ibqp;
-
if (pkt->mask & RXE_DETH_MASK)
wc->src_qp = deth_sqp(pkt);
+ wc->qp = &qp->ibqp;
wc->port_num = qp->attr.port_num;
}
}
/* have copy for srq and reference for !srq */
- if (!qp->srq)
- advance_consumer(qp->rq.queue);
+ if (!qp->srq) {
+ if (qp->is_user)
+ advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER);
+ else
+ advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL);
+ }
qp->resp.wqe = NULL;
@@ -966,8 +1019,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto out;
}
- rxe_add_ref(qp);
-
res = &qp->resp.resources[qp->resp.res_head];
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
@@ -1176,6 +1227,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
{
struct sk_buff *skb;
+ struct rxe_queue *q = qp->rq.queue;
while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
@@ -1186,8 +1238,8 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
if (notify)
return;
- while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue))
- advance_consumer(qp->rq.queue);
+ while (!qp->srq && q && queue_head(q, q->type))
+ advance_consumer(q, q->type);
}
int rxe_responder(void *arg)
@@ -1314,6 +1366,13 @@ int rxe_responder(void *arg)
}
break;
+ case RESPST_ERR_INVALIDATE_RKEY:
+ /* RC - Class J. */
+ qp->resp.goto_error = 1;
+ qp->resp.status = IB_WC_REM_INV_REQ_ERR;
+ state = RESPST_COMPLETE;
+ break;
+
case RESPST_ERR_LENGTH:
if (qp_type(qp) == IB_QPT_RC) {
/* Class C */
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 41b0d1e11baf..610c98d24b5c 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -78,6 +78,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
int err;
int srq_wqe_size;
struct rxe_queue *q;
+ enum queue_type type;
srq->ibsrq.event_handler = init->event_handler;
srq->ibsrq.srq_context = init->srq_context;
@@ -85,14 +86,16 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
srq->srq_num = srq->pelem.index;
srq->rq.max_wr = init->attr.max_wr;
srq->rq.max_sge = init->attr.max_sge;
+ srq->rq.is_user = srq->is_user;
srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
spin_lock_init(&srq->rq.producer_lock);
spin_lock_init(&srq->rq.consumer_lock);
+ type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
q = rxe_queue_init(rxe, &srq->rq.max_wr,
- srq_wqe_size);
+ srq_wqe_size, type);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
return -ENOMEM;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index aeb5e232c195..c223959ac174 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -216,8 +216,14 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
u32 length;
struct rxe_recv_wqe *recv_wqe;
int num_sge = ibwr->num_sge;
+ int full;
- if (unlikely(queue_full(rq->queue))) {
+ if (rq->is_user)
+ full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
+
+ if (unlikely(full)) {
err = -ENOMEM;
goto err1;
}
@@ -231,7 +237,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
for (i = 0; i < num_sge; i++)
length += ibwr->sg_list[i].length;
- recv_wqe = producer_addr(rq->queue);
+ if (rq->is_user)
+ recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
+
recv_wqe->wr_id = ibwr->wr_id;
recv_wqe->num_sge = num_sge;
@@ -244,7 +254,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe->dma.cur_sge = 0;
recv_wqe->dma.sge_offset = 0;
- advance_producer(rq->queue);
+ if (rq->is_user)
+ advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
+
return 0;
err1:
@@ -267,6 +281,9 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
if (udata->outlen < sizeof(*uresp))
return -EINVAL;
uresp = udata->outbuf;
+ srq->is_user = true;
+ } else {
+ srq->is_user = false;
}
err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
@@ -408,7 +425,9 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
err = -EINVAL;
goto err2;
}
- qp->is_user = 1;
+ qp->is_user = true;
+ } else {
+ qp->is_user = false;
}
rxe_add_index(qp);
@@ -577,7 +596,7 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
init_send_wr(qp, &wqe->wr, ibwr);
/* local operation */
- if (unlikely(mask & WR_REG_MASK)) {
+ if (unlikely(mask & WR_LOCAL_OP_MASK)) {
wqe->mask = mask;
wqe->state = wqe_state_posted;
return;
@@ -613,6 +632,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
struct rxe_sq *sq = &qp->sq;
struct rxe_send_wqe *send_wqe;
unsigned long flags;
+ int full;
err = validate_send_wr(qp, ibwr, mask, length);
if (err)
@@ -620,22 +640,31 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
spin_lock_irqsave(&qp->sq.sq_lock, flags);
- if (unlikely(queue_full(sq->queue))) {
- err = -ENOMEM;
- goto err1;
+ if (qp->is_user)
+ full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
+
+ if (unlikely(full)) {
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+ return -ENOMEM;
}
- send_wqe = producer_addr(sq->queue);
+ if (qp->is_user)
+ send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
+
init_send_wqe(qp, ibwr, mask, length, send_wqe);
- advance_producer(sq->queue);
+ if (qp->is_user)
+ advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
+ else
+ advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
+
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
return 0;
-
-err1:
- spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
- return err;
}
static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
@@ -823,12 +852,18 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&cq->cq_lock, flags);
for (i = 0; i < num_entries; i++) {
- cqe = queue_head(cq->queue);
+ if (cq->is_user)
+ cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
if (!cqe)
break;
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
- advance_consumer(cq->queue);
+ if (cq->is_user)
+ advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
}
spin_unlock_irqrestore(&cq->cq_lock, flags);
@@ -838,7 +873,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
{
struct rxe_cq *cq = to_rcq(ibcq);
- int count = queue_count(cq->queue);
+ int count;
+
+ if (cq->is_user)
+ count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
return (count > wc_cnt) ? wc_cnt : count;
}
@@ -848,12 +888,18 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
struct rxe_cq *cq = to_rcq(ibcq);
unsigned long irq_flags;
int ret = 0;
+ int empty;
spin_lock_irqsave(&cq->cq_lock, irq_flags);
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
- if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
+ if (cq->is_user)
+ empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
+ else
+ empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
+
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
ret = 1;
spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
@@ -899,7 +945,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
rxe_add_ref(pd);
- err = rxe_mr_init_user(pd, start, length, iova, access, udata, mr);
+ err = rxe_mr_init_user(pd, start, length, iova, access, mr);
if (err)
goto err3;
@@ -913,17 +959,6 @@ err2:
return ERR_PTR(err);
}
-static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
-{
- struct rxe_mr *mr = to_rmr(ibmr);
-
- mr->state = RXE_MR_STATE_ZOMBIE;
- rxe_drop_ref(mr_pd(mr));
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
- return 0;
-}
-
static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
@@ -1058,8 +1093,9 @@ static const struct ib_device_ops rxe_dev_ops = {
.driver_id = RDMA_DRIVER_RXE,
.uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
- .alloc_hw_stats = rxe_ib_alloc_hw_stats,
+ .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
.alloc_mr = rxe_alloc_mr,
+ .alloc_mw = rxe_alloc_mw,
.alloc_pd = rxe_alloc_pd,
.alloc_ucontext = rxe_alloc_ucontext,
.attach_mcast = rxe_attach_mcast,
@@ -1069,6 +1105,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.create_srq = rxe_create_srq,
.create_user_ah = rxe_create_ah,
.dealloc_driver = rxe_dealloc,
+ .dealloc_mw = rxe_dealloc_mw,
.dealloc_pd = rxe_dealloc_pd,
.dealloc_ucontext = rxe_dealloc_ucontext,
.dereg_mr = rxe_dereg_mr,
@@ -1077,6 +1114,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.destroy_qp = rxe_destroy_qp,
.destroy_srq = rxe_destroy_srq,
.detach_mcast = rxe_detach_mcast,
+ .device_group = &rxe_attr_group,
.enable_driver = rxe_enable_driver,
.get_dma_mr = rxe_get_dma_mr,
.get_hw_stats = rxe_ib_get_hw_stats,
@@ -1143,7 +1181,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
}
rxe->tfm = tfm;
- rdma_set_device_sysfs_group(dev, &rxe_attr_group);
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
pr_warn("%s failed with error %d\n", __func__, err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 11eba7a3ba8f..959a3260fcab 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -77,6 +77,7 @@ enum wqe_state {
};
struct rxe_sq {
+ bool is_user;
int max_wr;
int max_sge;
int max_inline;
@@ -85,6 +86,7 @@ struct rxe_sq {
};
struct rxe_rq {
+ bool is_user;
int max_wr;
int max_sge;
spinlock_t producer_lock; /* guard queue producer */
@@ -98,6 +100,7 @@ struct rxe_srq {
struct rxe_pd *pd;
struct rxe_rq rq;
u32 srq_num;
+ bool is_user;
int limit;
int error;
@@ -183,6 +186,7 @@ struct rxe_resp_info {
/* RDMA read / atomic only */
u64 va;
+ u64 offset;
struct rxe_mr *mr;
u32 resid;
u32 rkey;
@@ -211,7 +215,7 @@ struct rxe_qp {
struct ib_qp_attr attr;
unsigned int valid;
unsigned int mtu;
- int is_user;
+ bool is_user;
struct rxe_pd *pd;
struct rxe_srq *srq;
@@ -273,7 +277,16 @@ enum rxe_mr_type {
RXE_MR_TYPE_NONE,
RXE_MR_TYPE_DMA,
RXE_MR_TYPE_MR,
- RXE_MR_TYPE_MW,
+};
+
+enum rxe_mr_copy_dir {
+ RXE_TO_MR_OBJ,
+ RXE_FROM_MR_OBJ,
+};
+
+enum rxe_mr_lookup_type {
+ RXE_LOOKUP_LOCAL,
+ RXE_LOOKUP_REMOTE,
};
#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf))
@@ -287,6 +300,13 @@ struct rxe_map {
struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
};
+static inline int rkey_is_mw(u32 rkey)
+{
+ u32 index = rkey >> 8;
+
+ return (index >= RXE_MIN_MW_INDEX) && (index <= RXE_MAX_MW_INDEX);
+}
+
struct rxe_mr {
struct rxe_pool_entry pelem;
struct ib_mr ibmr;
@@ -312,18 +332,27 @@ struct rxe_mr {
u32 max_buf;
u32 num_map;
+ atomic_t num_mw;
+
struct rxe_map **map;
};
enum rxe_mw_state {
- RXE_MW_STATE_INVALID = RXE_MR_STATE_INVALID,
- RXE_MW_STATE_FREE = RXE_MR_STATE_FREE,
- RXE_MW_STATE_VALID = RXE_MR_STATE_VALID,
+ RXE_MW_STATE_INVALID = RXE_MR_STATE_INVALID,
+ RXE_MW_STATE_FREE = RXE_MR_STATE_FREE,
+ RXE_MW_STATE_VALID = RXE_MR_STATE_VALID,
};
struct rxe_mw {
- struct ib_mw ibmw;
- struct rxe_pool_entry pelem;
+ struct ib_mw ibmw;
+ struct rxe_pool_entry pelem;
+ spinlock_t lock;
+ enum rxe_mw_state state;
+ struct rxe_qp *qp; /* Type 2 only */
+ struct rxe_mr *mr;
+ int access;
+ u64 addr;
+ u64 length;
};
struct rxe_mc_grp {
@@ -455,6 +484,16 @@ static inline u32 mr_rkey(struct rxe_mr *mr)
return mr->ibmr.rkey;
}
+static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
+{
+ return to_rpd(mw->ibmw.pd);
+}
+
+static inline u32 rxe_mw_rkey(struct rxe_mw *mw)
+{
+ return mw->ibmw.rkey;
+}
+
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);