aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw/rdmavt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rdmavt')
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c250
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h7
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c402
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c41
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c69
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c7
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h9
11 files changed, 553 insertions, 302 deletions
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 0e147b32cbe9..fe99da0ff060 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -119,8 +119,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
rdma_copy_ah_attr(&ah->attr, ah_attr);
- atomic_set(&ah->refcount, 0);
-
if (dev->driver_f.notify_new_ah)
dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah);
@@ -141,8 +139,6 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
struct rvt_ah *ah = ibah_to_rvtah(ibah);
unsigned long flags;
- WARN_ON_ONCE(atomic_read(&ah->refcount));
-
spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index a06e6da7a026..a85571a4cf57 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -60,22 +60,39 @@ static struct workqueue_struct *comp_vector_wq;
* @solicited: true if @entry is solicited
*
* This may be called with qp->s_lock held.
+ *
+ * Return: return true on success, else return
+ * false if cq is full.
*/
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
{
- struct rvt_cq_wc *wc;
+ struct ib_uverbs_wc *uqueue = NULL;
+ struct ib_wc *kqueue = NULL;
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
unsigned long flags;
u32 head;
u32 next;
+ u32 tail;
spin_lock_irqsave(&cq->lock, flags);
+ if (cq->ip) {
+ u_wc = cq->queue;
+ uqueue = &u_wc->uqueue[0];
+ head = RDMA_READ_UAPI_ATOMIC(u_wc->head);
+ tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail);
+ } else {
+ k_wc = cq->kqueue;
+ kqueue = &k_wc->kqueue[0];
+ head = k_wc->head;
+ tail = k_wc->tail;
+ }
+
/*
- * Note that the head pointer might be writable by user processes.
- * Take care to verify it is a sane value.
+ * Note that the head pointer might be writable by
+ * user processes.Take care to verify it is a sane value.
*/
- wc = cq->queue;
- head = wc->head;
if (head >= (unsigned)cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
@@ -83,7 +100,12 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
next = head + 1;
}
- if (unlikely(next == wc->tail)) {
+ if (unlikely(next == tail || cq->cq_full)) {
+ struct rvt_dev_info *rdi = cq->rdi;
+
+ if (!cq->cq_full)
+ rvt_pr_err_ratelimited(rdi, "CQ is full!\n");
+ cq->cq_full = true;
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
@@ -93,30 +115,30 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
- return;
+ return false;
}
trace_rvt_cq_enter(cq, entry, head);
- if (cq->ip) {
- wc->uqueue[head].wr_id = entry->wr_id;
- wc->uqueue[head].status = entry->status;
- wc->uqueue[head].opcode = entry->opcode;
- wc->uqueue[head].vendor_err = entry->vendor_err;
- wc->uqueue[head].byte_len = entry->byte_len;
- wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
- wc->uqueue[head].qp_num = entry->qp->qp_num;
- wc->uqueue[head].src_qp = entry->src_qp;
- wc->uqueue[head].wc_flags = entry->wc_flags;
- wc->uqueue[head].pkey_index = entry->pkey_index;
- wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
- wc->uqueue[head].sl = entry->sl;
- wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
- wc->uqueue[head].port_num = entry->port_num;
+ if (uqueue) {
+ uqueue[head].wr_id = entry->wr_id;
+ uqueue[head].status = entry->status;
+ uqueue[head].opcode = entry->opcode;
+ uqueue[head].vendor_err = entry->vendor_err;
+ uqueue[head].byte_len = entry->byte_len;
+ uqueue[head].ex.imm_data = entry->ex.imm_data;
+ uqueue[head].qp_num = entry->qp->qp_num;
+ uqueue[head].src_qp = entry->src_qp;
+ uqueue[head].wc_flags = entry->wc_flags;
+ uqueue[head].pkey_index = entry->pkey_index;
+ uqueue[head].slid = ib_lid_cpu16(entry->slid);
+ uqueue[head].sl = entry->sl;
+ uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+ uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
- smp_wmb();
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next);
} else {
- wc->kqueue[head] = *entry;
+ kqueue[head] = *entry;
+ k_wc->head = next;
}
- wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
@@ -132,6 +154,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
}
spin_unlock_irqrestore(&cq->lock, flags);
+ return true;
}
EXPORT_SYMBOL(rvt_cq_enter);
@@ -166,43 +189,38 @@ static void send_complete(struct work_struct *work)
/**
* rvt_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
+ * @ibcq: Allocated CQ
* @attr: creation attributes
* @udata: user data for libibverbs.so
*
* Called by ib_create_cq() in the generic verbs code.
*
- * Return: pointer to the completion queue or negative errno values
- * for failure.
+ * Return: 0 on success
*/
-struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibcq->device;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
- struct rvt_cq *cq;
- struct rvt_cq_wc *wc;
- struct ib_cq *ret;
+ struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
u32 sz;
unsigned int entries = attr->cqe;
int comp_vector = attr->comp_vector;
+ int err;
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (entries < 1 || entries > rdi->dparms.props.max_cqe)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (comp_vector < 0)
comp_vector = 0;
comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
- /* Allocate the completion queue structure. */
- cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
- if (!cq)
- return ERR_PTR(-ENOMEM);
-
/*
* Allocate the completion queue entries and head/tail pointers.
* This is allocated separately so that it can be resized and
@@ -210,17 +228,18 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
- else
- sz += sizeof(struct ib_wc) * (entries + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_cq;
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
+ sz += sizeof(*u_wc);
+ u_wc = vmalloc_user(sz);
+ if (!u_wc)
+ return -ENOMEM;
+ } else {
+ sz = sizeof(struct ib_wc) * (entries + 1);
+ sz += sizeof(*k_wc);
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
+ if (!k_wc)
+ return -ENOMEM;
}
/*
@@ -228,26 +247,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* See rvt_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- int err;
-
- cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc);
+ cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
if (!cq->ip) {
- ret = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto bail_wc;
}
err = ib_copy_to_udata(udata, &cq->ip->offset,
sizeof(cq->ip->offset));
- if (err) {
- ret = ERR_PTR(err);
+ if (err)
goto bail_ip;
- }
}
spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
spin_unlock_irq(&rdi->n_cqs_lock);
- ret = ERR_PTR(-ENOMEM);
+ err = -ENOMEM;
goto bail_ip;
}
@@ -277,21 +292,20 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
cq->notify = RVT_CQ_NONE;
spin_lock_init(&cq->lock);
INIT_WORK(&cq->comptask, send_complete);
- cq->queue = wc;
-
- ret = &cq->ibcq;
+ if (u_wc)
+ cq->queue = u_wc;
+ else
+ cq->kqueue = k_wc;
trace_rvt_create_cq(cq, attr);
- goto done;
+ return 0;
bail_ip:
kfree(cq->ip);
bail_wc:
- vfree(wc);
-bail_cq:
- kfree(cq);
-done:
- return ret;
+ vfree(u_wc);
+ vfree(k_wc);
+ return err;
}
/**
@@ -300,10 +314,8 @@ done:
* @udata: user data or NULL for kernel object
*
* Called by ib_destroy_cq() in the generic verbs code.
- *
- * Return: always 0
*/
-int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
@@ -316,9 +328,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
vfree(cq->queue);
- kfree(cq);
-
- return 0;
}
/**
@@ -345,9 +354,16 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
- if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
- cq->queue->head != cq->queue->tail)
- ret = 1;
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ if (cq->queue) {
+ if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) !=
+ RDMA_READ_UAPI_ATOMIC(cq->queue->tail))
+ ret = 1;
+ } else {
+ if (cq->kqueue->head != cq->kqueue->tail)
+ ret = 1;
+ }
+ }
spin_unlock_irqrestore(&cq->lock, flags);
@@ -363,12 +379,14 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
- struct rvt_cq_wc *old_wc;
- struct rvt_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
struct rvt_dev_info *rdi = cq->rdi;
+ struct rvt_cq_wc *u_wc = NULL;
+ struct rvt_cq_wc *old_u_wc = NULL;
+ struct rvt_k_cq_wc *k_wc = NULL;
+ struct rvt_k_cq_wc *old_k_wc = NULL;
if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
return -EINVAL;
@@ -376,17 +394,19 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
- sz = sizeof(*wc);
- if (udata && udata->outlen >= sizeof(__u64))
- sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
- else
- sz += sizeof(struct ib_wc) * (cqe + 1);
- wc = udata ?
- vmalloc_user(sz) :
- vzalloc_node(sz, rdi->dparms.node);
- if (!wc)
- return -ENOMEM;
-
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ sz += sizeof(*u_wc);
+ u_wc = vmalloc_user(sz);
+ if (!u_wc)
+ return -ENOMEM;
+ } else {
+ sz = sizeof(struct ib_wc) * (cqe + 1);
+ sz += sizeof(*k_wc);
+ k_wc = vzalloc_node(sz, rdi->dparms.node);
+ if (!k_wc)
+ return -ENOMEM;
+ }
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
__u64 offset = 0;
@@ -401,11 +421,18 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
- old_wc = cq->queue;
- head = old_wc->head;
+ if (u_wc) {
+ old_u_wc = cq->queue;
+ head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
+ tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
+ } else {
+ old_k_wc = cq->kqueue;
+ head = old_k_wc->head;
+ tail = old_k_wc->tail;
+ }
+
if (head > (u32)cq->ibcq.cqe)
head = (u32)cq->ibcq.cqe;
- tail = old_wc->tail;
if (tail > (u32)cq->ibcq.cqe)
tail = (u32)cq->ibcq.cqe;
if (head < tail)
@@ -417,27 +444,36 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
goto bail_unlock;
}
for (n = 0; tail != head; n++) {
- if (cq->ip)
- wc->uqueue[n] = old_wc->uqueue[tail];
+ if (u_wc)
+ u_wc->uqueue[n] = old_u_wc->uqueue[tail];
else
- wc->kqueue[n] = old_wc->kqueue[tail];
+ k_wc->kqueue[n] = old_k_wc->kqueue[tail];
if (tail == (u32)cq->ibcq.cqe)
tail = 0;
else
tail++;
}
cq->ibcq.cqe = cqe;
- wc->head = n;
- wc->tail = 0;
- cq->queue = wc;
+ if (u_wc) {
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
+ RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
+ cq->queue = u_wc;
+ } else {
+ k_wc->head = n;
+ k_wc->tail = 0;
+ cq->kqueue = k_wc;
+ }
spin_unlock_irq(&cq->lock);
- vfree(old_wc);
+ if (u_wc)
+ vfree(old_u_wc);
+ else
+ vfree(old_k_wc);
if (cq->ip) {
struct rvt_mmap_info *ip = cq->ip;
- rvt_update_mmap_info(rdi, ip, sz, wc);
+ rvt_update_mmap_info(rdi, ip, sz, u_wc);
/*
* Return the offset to mmap.
@@ -461,7 +497,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
bail_unlock:
spin_unlock_irq(&cq->lock);
bail_free:
- vfree(wc);
+ vfree(u_wc);
+ vfree(k_wc);
+
return ret;
}
@@ -479,7 +517,7 @@ bail_free:
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
- struct rvt_cq_wc *wc;
+ struct rvt_k_cq_wc *wc;
unsigned long flags;
int npolled;
u32 tail;
@@ -490,7 +528,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
spin_lock_irqsave(&cq->lock, flags);
- wc = cq->queue;
+ wc = cq->kqueue;
tail = wc->tail;
if (tail > (u32)cq->ibcq.cqe)
tail = (u32)cq->ibcq.cqe;
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 3ad6faf18ecb..5e26a2eb19a4 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -51,10 +51,9 @@
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_cq.h>
-struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index f48240f66b8f..a6a39f01dca3 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -562,8 +562,7 @@ int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (ret)
goto out;
rvt_deinit_mregion(&mr->mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
out:
return ret;
@@ -613,8 +612,8 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
- trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
+ trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
return 0;
}
@@ -643,6 +642,7 @@ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
mr->mr.iova = ibmr->iova;
mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
mr->mr.length = (size_t)ibmr->length;
+ trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset);
return ret;
}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c5a50614a6c6..0b0a241c57ff 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -58,6 +58,8 @@
#include "vt.h"
#include "trace.h"
+#define RVT_RWQ_COUNT_THRESHOLD 16
+
static void rvt_rc_timeout(struct timer_list *t);
/*
@@ -803,6 +805,47 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
}
/**
+ * rvt_alloc_rq - allocate memory for user or kernel buffer
+ * @rq: receive queue data structure
+ * @size: number of request queue entries
+ * @node: The NUMA node
+ * @udata: True if user data is available or not false
+ *
+ * Return: If memory allocation failed, return -ENONEM
+ * This function is used by both shared receive
+ * queues and non-shared receive queues to allocate
+ * memory.
+ */
+int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node,
+ struct ib_udata *udata)
+{
+ if (udata) {
+ rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size);
+ if (!rq->wq)
+ goto bail;
+ /* need kwq with no buffers */
+ rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node);
+ if (!rq->kwq)
+ goto bail;
+ rq->kwq->curr_wq = rq->wq->wq;
+ } else {
+ /* need kwq with buffers */
+ rq->kwq =
+ vzalloc_node(sizeof(struct rvt_krwq) + size, node);
+ if (!rq->kwq)
+ goto bail;
+ rq->kwq->curr_wq = rq->kwq->wq;
+ }
+
+ spin_lock_init(&rq->kwq->p_lock);
+ spin_lock_init(&rq->kwq->c_lock);
+ return 0;
+bail:
+ rvt_free_rq(rq);
+ return -ENOMEM;
+}
+
+/**
* rvt_init_qp - initialize the QP state to the reset state
* @qp: the QP to init or reinit
* @type: the QP type
@@ -852,10 +895,8 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_tail_ack_queue = 0;
qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0;
- if (qp->r_rq.wq) {
- qp->r_rq.wq->head = 0;
- qp->r_rq.wq->tail = 0;
- }
+ if (qp->r_rq.kwq)
+ qp->r_rq.kwq->count = qp->r_rq.size;
qp->r_sge.num_sge = 0;
atomic_set(&qp->s_reserved_used, 0);
}
@@ -928,6 +969,61 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
}
/**
+ * get_allowed_ops - Given a QP type return the appropriate allowed OP
+ * @type: valid, supported, QP type
+ */
+static u8 get_allowed_ops(enum ib_qp_type type)
+{
+ return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ?
+ IB_OPCODE_UC : IB_OPCODE_UD;
+}
+
+/**
+ * free_ud_wq_attr - Clean up AH attribute cache for UD QPs
+ * @qp: Valid QP with allowed_ops set
+ *
+ * The rvt_swqe data structure being used is a union, so this is
+ * only valid for UD QPs.
+ */
+static void free_ud_wq_attr(struct rvt_qp *qp)
+{
+ struct rvt_swqe *wqe;
+ int i;
+
+ for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ kfree(wqe->ud_wr.attr);
+ wqe->ud_wr.attr = NULL;
+ }
+}
+
+/**
+ * alloc_ud_wq_attr - AH attribute cache for UD QPs
+ * @qp: Valid QP with allowed_ops set
+ * @node: Numa node for allocation
+ *
+ * The rvt_swqe data structure being used is a union, so this is
+ * only valid for UD QPs.
+ */
+static int alloc_ud_wq_attr(struct rvt_qp *qp, int node)
+{
+ struct rvt_swqe *wqe;
+ int i;
+
+ for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr),
+ GFP_KERNEL, node);
+ if (!wqe->ud_wr.attr) {
+ free_ud_wq_attr(qp);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -989,9 +1085,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
case IB_QPT_UC:
case IB_QPT_RC:
case IB_QPT_UD:
- sz = sizeof(struct rvt_sge) *
- init_attr->cap.max_send_sge +
- sizeof(struct rvt_swqe);
+ sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge);
swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node);
if (!swq)
return ERR_PTR(-ENOMEM);
@@ -1011,6 +1105,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
rdi->dparms.node);
if (!qp)
goto bail_swq;
+ qp->allowed_ops = get_allowed_ops(init_attr->qp_type);
RCU_INIT_POINTER(qp->next, NULL);
if (init_attr->qp_type == IB_QPT_RC) {
@@ -1048,17 +1143,12 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct rvt_rwqe);
- if (udata)
- qp->r_rq.wq = vmalloc_user(
- sizeof(struct rvt_rwq) +
- qp->r_rq.size * sz);
- else
- qp->r_rq.wq = vzalloc_node(
- sizeof(struct rvt_rwq) +
- qp->r_rq.size * sz,
- rdi->dparms.node);
- if (!qp->r_rq.wq)
+ err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz,
+ rdi->dparms.node, udata);
+ if (err) {
+ ret = ERR_PTR(err);
goto bail_driver_priv;
+ }
}
/*
@@ -1068,7 +1158,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_lock);
spin_lock_init(&qp->s_hlock);
spin_lock_init(&qp->s_lock);
- spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
atomic_set(&qp->local_ops_pending, 0);
init_waitqueue_head(&qp->wait);
@@ -1080,6 +1169,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+ err = alloc_ud_wq_attr(qp, rdi->dparms.node);
+ if (err) {
+ ret = (ERR_PTR(err));
+ goto bail_driver_priv;
+ }
err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
init_attr->qp_type,
@@ -1172,28 +1266,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
ret = &qp->ibqp;
- /*
- * We have our QP and its good, now keep track of what types of opcodes
- * can be processed on this QP. We do this by keeping track of what the
- * 3 high order bits of the opcode are.
- */
- switch (init_attr->qp_type) {
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_UD:
- qp->allowed_ops = IB_OPCODE_UD;
- break;
- case IB_QPT_RC:
- qp->allowed_ops = IB_OPCODE_RC;
- break;
- case IB_QPT_UC:
- qp->allowed_ops = IB_OPCODE_UC;
- break;
- default:
- ret = ERR_PTR(-EINVAL);
- goto bail_ip;
- }
-
return ret;
bail_ip:
@@ -1204,8 +1276,8 @@ bail_qpn:
rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
- if (!qp->ip)
- vfree(qp->r_rq.wq);
+ rvt_free_rq(&qp->r_rq);
+ free_ud_wq_attr(qp);
bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
@@ -1271,19 +1343,26 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
}
wc.status = IB_WC_WR_FLUSH_ERR;
- if (qp->r_rq.wq) {
- struct rvt_rwq *wq;
+ if (qp->r_rq.kwq) {
u32 head;
u32 tail;
-
- spin_lock(&qp->r_rq.lock);
-
+ struct rvt_rwq *wq = NULL;
+ struct rvt_krwq *kwq = NULL;
+
+ spin_lock(&qp->r_rq.kwq->c_lock);
+ /* qp->ip used to validate if there is a user buffer mmaped */
+ if (qp->ip) {
+ wq = qp->r_rq.wq;
+ head = RDMA_READ_UAPI_ATOMIC(wq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
+ } else {
+ kwq = qp->r_rq.kwq;
+ head = kwq->head;
+ tail = kwq->tail;
+ }
/* sanity check pointers before trusting them */
- wq = qp->r_rq.wq;
- head = wq->head;
if (head >= qp->r_rq.size)
head = 0;
- tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
while (tail != head) {
@@ -1292,9 +1371,11 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
tail = 0;
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
}
- wq->tail = tail;
-
- spin_unlock(&qp->r_rq.lock);
+ if (qp->ip)
+ RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
+ else
+ kwq->tail = tail;
+ spin_unlock(&qp->r_rq.kwq->c_lock);
} else if (qp->ibqp.event_handler) {
ret = 1;
}
@@ -1636,12 +1717,12 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (qp->ip)
kref_put(&qp->ip->ref, rvt_release_mmap_info);
- else
- vfree(qp->r_rq.wq);
+ kvfree(qp->r_rq.kwq);
rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue);
rdma_destroy_ah_attr(&qp->remote_ah_attr);
rdma_destroy_ah_attr(&qp->alt_ah_attr);
+ free_ud_wq_attr(qp);
vfree(qp->s_wq);
kfree(qp);
return 0;
@@ -1723,7 +1804,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
- struct rvt_rwq *wq = qp->r_rq.wq;
+ struct rvt_krwq *wq = qp->r_rq.kwq;
unsigned long flags;
int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
!qp->ibqp.srq;
@@ -1744,12 +1825,12 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
return -EINVAL;
}
- spin_lock_irqsave(&qp->r_rq.lock, flags);
+ spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags);
next = wq->head + 1;
if (next >= qp->r_rq.size)
next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ if (next == READ_ONCE(wq->tail)) {
+ spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
*bad_wr = wr;
return -ENOMEM;
}
@@ -1766,16 +1847,18 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->sg_list[i].addr = wr->sg_list[i].addr;
+ wqe->sg_list[i].length = wr->sg_list[i].length;
+ wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
+ }
/*
* Make sure queue entry is written
* before the head index.
*/
- smp_wmb();
- wq->head = next;
+ smp_store_release(&wq->head, next);
}
- spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags);
}
return 0;
}
@@ -1856,10 +1939,9 @@ static inline int rvt_qp_is_avail(
/* see rvt_qp_wqe_unreserve() */
smp_mb__before_atomic();
- reserved_used = atomic_read(&qp->s_reserved_used);
if (unlikely(reserved_op)) {
/* see rvt_qp_wqe_unreserve() */
- smp_mb__before_atomic();
+ reserved_used = atomic_read(&qp->s_reserved_used);
if (reserved_used >= rdi->dparms.reserved_operations)
return -ENOMEM;
return 0;
@@ -1867,14 +1949,13 @@ static inline int rvt_qp_is_avail(
/* non-reserved operations */
if (likely(qp->s_avail))
return 0;
- slast = READ_ONCE(qp->s_last);
+ /* See rvt_qp_complete_swqe() */
+ slast = smp_load_acquire(&qp->s_last);
if (qp->s_head >= slast)
avail = qp->s_size - (qp->s_head - slast);
else
avail = slast - qp->s_head;
- /* see rvt_qp_wqe_unreserve() */
- smp_mb__before_atomic();
reserved_used = atomic_read(&qp->s_reserved_used);
avail = avail - 1 -
(rdi->dparms.reserved_operations - reserved_used);
@@ -2011,10 +2092,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
*/
log_pmtu = qp->log_pmtu;
if (qp->allowed_ops == IB_OPCODE_UD) {
- struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ struct rvt_ah *ah = rvt_get_swqe_ah(wqe);
log_pmtu = ah->log_pmtu;
- atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr);
}
if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
@@ -2059,7 +2140,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
bail_inval_free_ref:
if (qp->allowed_ops == IB_OPCODE_UD)
- atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+ rdma_destroy_ah_attr(wqe->ud_wr.attr);
bail_inval_free:
/* release mr holds */
while (j) {
@@ -2145,7 +2226,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
- struct rvt_rwq *wq;
+ struct rvt_krwq *wq;
unsigned long flags;
for (; wr; wr = wr->next) {
@@ -2158,13 +2239,13 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
return -EINVAL;
}
- spin_lock_irqsave(&srq->rq.lock, flags);
- wq = srq->rq.wq;
+ spin_lock_irqsave(&srq->rq.kwq->p_lock, flags);
+ wq = srq->rq.kwq;
next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
- if (next == wq->tail) {
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ if (next == READ_ONCE(wq->tail)) {
+ spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
*bad_wr = wr;
return -ENOMEM;
}
@@ -2172,17 +2253,35 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
- for (i = 0; i < wr->num_sge; i++)
- wqe->sg_list[i] = wr->sg_list[i];
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->sg_list[i].addr = wr->sg_list[i].addr;
+ wqe->sg_list[i].length = wr->sg_list[i].length;
+ wqe->sg_list[i].lkey = wr->sg_list[i].lkey;
+ }
/* Make sure queue entry is written before the head index. */
- smp_wmb();
- wq->head = next;
- spin_unlock_irqrestore(&srq->rq.lock, flags);
+ smp_store_release(&wq->head, next);
+ spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags);
}
return 0;
}
/*
+ * rvt used the internal kernel struct as part of its ABI, for now make sure
+ * the kernel struct does not change layout. FIXME: rvt should never cast the
+ * user struct to a kernel struct.
+ */
+static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge)
+{
+ BUILD_BUG_ON(offsetof(struct ib_sge, addr) !=
+ offsetof(struct rvt_wqe_sge, addr));
+ BUILD_BUG_ON(offsetof(struct ib_sge, length) !=
+ offsetof(struct rvt_wqe_sge, length));
+ BUILD_BUG_ON(offsetof(struct ib_sge, lkey) !=
+ offsetof(struct rvt_wqe_sge, lkey));
+ return (struct ib_sge *)sge;
+}
+
+/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -2205,7 +2304,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue;
/* Check LKEY */
ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- NULL, &wqe->sg_list[i],
+ NULL, rvt_cast_sge(&wqe->sg_list[i]),
IB_ACCESS_LOCAL_WRITE);
if (unlikely(ret <= 0))
goto bad_lkey;
@@ -2234,6 +2333,50 @@ bad_lkey:
}
/**
+ * get_count - count numbers of request work queue entries
+ * in circular buffer
+ * @rq: data structure for request queue entry
+ * @tail: tail indices of the circular buffer
+ * @head: head indices of the circular buffer
+ *
+ * Return - total number of entries in the circular buffer
+ */
+static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head)
+{
+ u32 count;
+
+ count = head;
+
+ if (count >= rq->size)
+ count = 0;
+ if (count < tail)
+ count += rq->size - tail;
+ else
+ count -= tail;
+
+ return count;
+}
+
+/**
+ * get_rvt_head - get head indices of the circular buffer
+ * @rq: data structure for request queue entry
+ * @ip: the QP
+ *
+ * Return - head index value
+ */
+static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip)
+{
+ u32 head;
+
+ if (ip)
+ head = RDMA_READ_UAPI_ATOMIC(rq->wq->head);
+ else
+ head = rq->kwq->head;
+
+ return head;
+}
+
+/**
* rvt_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update qp->r_wr_id only, not qp->r_sge
@@ -2247,39 +2390,54 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
{
unsigned long flags;
struct rvt_rq *rq;
+ struct rvt_krwq *kwq = NULL;
struct rvt_rwq *wq;
struct rvt_srq *srq;
struct rvt_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
+ u32 head;
int ret;
+ void *ip = NULL;
if (qp->ibqp.srq) {
srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
+ ip = srq->ip;
} else {
srq = NULL;
handler = NULL;
rq = &qp->r_rq;
+ ip = qp->ip;
}
- spin_lock_irqsave(&rq->lock, flags);
+ spin_lock_irqsave(&rq->kwq->c_lock, flags);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
+ kwq = rq->kwq;
+ if (ip) {
+ wq = rq->wq;
+ tail = RDMA_READ_UAPI_ATOMIC(wq->tail);
+ } else {
+ tail = kwq->tail;
+ }
- wq = rq->wq;
- tail = wq->tail;
/* Validate tail before using it since it is user writable. */
if (tail >= rq->size)
tail = 0;
- if (unlikely(tail == wq->head)) {
+
+ if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) {
+ head = get_rvt_head(rq, ip);
+ kwq->count = get_count(rq, tail, head);
+ }
+ if (unlikely(kwq->count == 0)) {
ret = 0;
goto unlock;
}
- /* Make sure entry is read after head index is read. */
+ /* Make sure entry is read after the count is read. */
smp_rmb();
wqe = rvt_get_rwqe_ptr(rq, tail);
/*
@@ -2289,43 +2447,41 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
*/
if (++tail >= rq->size)
tail = 0;
- wq->tail = tail;
+ if (ip)
+ RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail);
+ else
+ kwq->tail = tail;
if (!wr_id_only && !init_sge(qp, wqe)) {
ret = -1;
goto unlock;
}
qp->r_wr_id = wqe->wr_id;
+ kwq->count--;
ret = 1;
set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
if (handler) {
- u32 n;
-
/*
* Validate head pointer value and compute
* the number of remaining WQEs.
*/
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
+ if (kwq->count < srq->limit) {
+ kwq->count = get_count(rq, tail, get_rvt_head(rq, ip));
+ if (kwq->count < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
}
}
unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
+ spin_unlock_irqrestore(&rq->kwq->c_lock, flags);
bail:
return ret;
}
@@ -2667,27 +2823,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ struct rvt_dev_info *rdi;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
+ rdi = ib_to_rvt(qp->ibqp.device);
- last = qp->s_last;
- old_last = last;
- trace_rvt_qp_send_completion(qp, wqe, last);
- if (++last >= qp->s_size)
- last = 0;
- trace_rvt_qp_send_completion(qp, wqe, last);
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_qp_swqe(qp, wqe);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- rdi->wc_opcode[wqe->wr.opcode],
- status);
-
+ old_last = qp->s_last;
+ trace_rvt_qp_send_completion(qp, wqe, old_last);
+ last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
@@ -3021,8 +3166,7 @@ do_write:
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_unlock_irqrestore(&qp->r_lock, flags);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 6db1619389b0..2cdba1283bf6 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -68,4 +68,6 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int rvt_wss_init(struct rvt_dev_info *rdi);
void rvt_wss_exit(struct rvt_dev_info *rdi);
+int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node,
+ struct ib_udata *udata);
#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 09f0cf538be6..890d7b760d2e 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -104,26 +104,33 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp)
} else {
u32 min, max, x;
u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
u32 head;
u32 tail;
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * There is a small chance that the pair of reads are
- * not atomic, which is OK, since the fuzziness is
- * resolved as further ACKs go out.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
+ credits = READ_ONCE(qp->r_rq.kwq->count);
+ if (credits == 0) {
+ /* sanity check pointers before trusting them */
+ if (qp->ip) {
+ head = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->tail);
+ } else {
+ head = READ_ONCE(qp->r_rq.kwq->head);
+ tail = READ_ONCE(qp->r_rq.kwq->tail);
+ }
+ if (head >= qp->r_rq.size)
+ head = 0;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * There is a small chance that the pair of reads are
+ * not atomic, which is OK, since the fuzziness is
+ * resolved as further ACKs go out.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ }
/*
* Binary search the credit table to find the code to
* use.
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 8d6b3e764255..24fef021d51d 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -52,7 +52,7 @@
#include "srq.h"
#include "vt.h"
-
+#include "qp.h"
/**
* rvt_driver_srq_init - init srq resources on a per driver basis
* @rdi: rvt dev structure
@@ -97,11 +97,8 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr,
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct rvt_rwqe);
- srq->rq.wq = udata ?
- vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) :
- vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz,
- dev->dparms.node);
- if (!srq->rq.wq) {
+ if (rvt_alloc_rq(&srq->rq, srq->rq.size * sz,
+ dev->dparms.node, udata)) {
ret = -ENOMEM;
goto bail_srq;
}
@@ -152,7 +149,7 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr,
bail_ip:
kfree(srq->ip);
bail_wq:
- vfree(srq->rq.wq);
+ rvt_free_rq(&srq->rq);
bail_srq:
return ret;
}
@@ -172,11 +169,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
{
struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
- struct rvt_rwq *wq;
+ struct rvt_rq tmp_rq = {};
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
- struct rvt_rwq *owq;
+ struct rvt_krwq *okwq = NULL;
+ struct rvt_rwq *owq = NULL;
struct rvt_rwqe *p;
u32 sz, size, n, head, tail;
@@ -185,17 +183,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
((attr_mask & IB_SRQ_LIMIT) ?
attr->srq_limit : srq->limit) > attr->max_wr)
return -EINVAL;
-
sz = sizeof(struct rvt_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
- wq = udata ?
- vmalloc_user(sizeof(struct rvt_rwq) + size * sz) :
- vzalloc_node(sizeof(struct rvt_rwq) + size * sz,
- dev->dparms.node);
- if (!wq)
+ if (rvt_alloc_rq(&tmp_rq, size * sz, dev->dparms.node,
+ udata))
return -ENOMEM;
-
/* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
@@ -213,14 +206,20 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto bail_free;
}
- spin_lock_irq(&srq->rq.lock);
+ spin_lock_irq(&srq->rq.kwq->c_lock);
/*
* validate head and tail pointer values and compute
* the number of remaining WQEs.
*/
- owq = srq->rq.wq;
- head = owq->head;
- tail = owq->tail;
+ if (udata) {
+ owq = srq->rq.wq;
+ head = RDMA_READ_UAPI_ATOMIC(owq->head);
+ tail = RDMA_READ_UAPI_ATOMIC(owq->tail);
+ } else {
+ okwq = srq->rq.kwq;
+ head = okwq->head;
+ tail = okwq->tail;
+ }
if (head >= srq->rq.size || tail >= srq->rq.size) {
ret = -EINVAL;
goto bail_unlock;
@@ -235,7 +234,7 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto bail_unlock;
}
n = 0;
- p = wq->wq;
+ p = tmp_rq.kwq->curr_wq;
while (tail != head) {
struct rvt_rwqe *wqe;
int i;
@@ -250,22 +249,29 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (++tail >= srq->rq.size)
tail = 0;
}
- srq->rq.wq = wq;
+ srq->rq.kwq = tmp_rq.kwq;
+ if (udata) {
+ srq->rq.wq = tmp_rq.wq;
+ RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->head, n);
+ RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->tail, 0);
+ } else {
+ tmp_rq.kwq->head = n;
+ tmp_rq.kwq->tail = 0;
+ }
srq->rq.size = size;
- wq->head = n;
- wq->tail = 0;
if (attr_mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
vfree(owq);
+ kvfree(okwq);
if (srq->ip) {
struct rvt_mmap_info *ip = srq->ip;
struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device);
u32 s = sizeof(struct rvt_rwq) + size * sz;
- rvt_update_mmap_info(dev, ip, s, wq);
+ rvt_update_mmap_info(dev, ip, s, tmp_rq.wq);
/*
* Return the offset to mmap.
@@ -289,19 +295,19 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
- spin_lock_irq(&srq->rq.lock);
+ spin_lock_irq(&srq->rq.kwq->c_lock);
if (attr->srq_limit >= srq->rq.size)
ret = -EINVAL;
else
srq->limit = attr->srq_limit;
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
}
return ret;
bail_unlock:
- spin_unlock_irq(&srq->rq.lock);
+ spin_unlock_irq(&srq->rq.kwq->c_lock);
bail_free:
- vfree(wq);
+ rvt_free_rq(&tmp_rq);
return ret;
}
@@ -336,6 +342,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
kref_put(&srq->ip->ref, rvt_release_mmap_info);
- else
- vfree(srq->rq.wq);
+ kvfree(srq->rq.kwq);
}
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
index 976e482930a3..95b8a0e3b8bd 100644
--- a/drivers/infiniband/sw/rdmavt/trace_mr.h
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -54,6 +54,8 @@
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_mr.h>
+#include "mr.h"
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rvt_mr
DECLARE_EVENT_CLASS(
@@ -64,8 +66,12 @@ DECLARE_EVENT_CLASS(
RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
__field(void *, vaddr)
__field(struct page *, page)
+ __field(u64, iova)
+ __field(u64, user_base)
__field(size_t, len)
+ __field(size_t, length)
__field(u32, lkey)
+ __field(u32, offset)
__field(u16, m)
__field(u16, n)
),
@@ -73,18 +79,28 @@ DECLARE_EVENT_CLASS(
RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
__entry->vaddr = v;
__entry->page = virt_to_page(v);
+ __entry->iova = mr->iova;
+ __entry->user_base = mr->user_base;
+ __entry->lkey = mr->lkey;
__entry->m = m;
__entry->n = n;
__entry->len = len;
+ __entry->length = mr->length;
+ __entry->offset = mr->offset;
),
TP_printk(
- "[%s] vaddr %p page %p m %u n %u len %ld",
+ "[%s] lkey %x iova %llx user_base %llx mr_len %lu vaddr %llx page %p m %u n %u len %lu off %u",
__get_str(dev),
- __entry->vaddr,
+ __entry->lkey,
+ __entry->iova,
+ __entry->user_base,
+ __entry->length,
+ (unsigned long long)__entry->vaddr,
__entry->page,
__entry->m,
__entry->n,
- __entry->len
+ __entry->len,
+ __entry->offset
)
);
@@ -165,6 +181,40 @@ DEFINE_EVENT(
TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
TP_ARGS(sge, isge));
+TRACE_EVENT(
+ rvt_map_mr_sg,
+ TP_PROTO(struct ib_mr *ibmr, int sg_nents, unsigned int *sg_offset),
+ TP_ARGS(ibmr, sg_nents, sg_offset),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(to_imr(ibmr)->mr.pd->device))
+ __field(u64, iova)
+ __field(u64, ibmr_iova)
+ __field(u64, user_base)
+ __field(u64, ibmr_length)
+ __field(int, sg_nents)
+ __field(uint, sg_offset)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device))
+ __entry->ibmr_iova = ibmr->iova;
+ __entry->iova = to_imr(ibmr)->mr.iova;
+ __entry->user_base = to_imr(ibmr)->mr.user_base;
+ __entry->ibmr_length = to_imr(ibmr)->mr.length;
+ __entry->sg_nents = sg_nents;
+ __entry->sg_offset = sg_offset ? *sg_offset : 0;
+ ),
+ TP_printk(
+ "[%s] ibmr_iova %llx iova %llx user_base %llx length %llx sg_nents %d sg_offset %u",
+ __get_str(dev),
+ __entry->ibmr_iova,
+ __entry->iova,
+ __entry->user_base,
+ __entry->ibmr_length,
+ __entry->sg_nents,
+ __entry->sg_offset
+ )
+);
+
#endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 9546a837a8ac..18da1e1ea979 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -382,6 +382,8 @@ enum {
};
static const struct ib_device_ops rvt_dev_ops = {
+ .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION,
+
.alloc_fmr = rvt_alloc_fmr,
.alloc_mr = rvt_alloc_mr,
.alloc_pd = rvt_alloc_pd,
@@ -427,6 +429,7 @@ static const struct ib_device_ops rvt_dev_ops = {
.unmap_fmr = rvt_unmap_fmr,
INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext),
@@ -530,7 +533,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
*
* Return: 0 on success otherwise an errno.
*/
-int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
+int rvt_register_device(struct rvt_dev_info *rdi)
{
int ret = 0, i;
@@ -600,7 +603,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
* exactly which functions rdmavt supports, nor do they know the ABI
* version, so we do all of this sort of stuff here.
*/
- rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION;
rdi->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -636,7 +638,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
if (!rdi->ibdev.num_comp_vectors)
rdi->ibdev.num_comp_vectors = 1;
- rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
if (ret) {
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index 0675ea6c3872..d19ff817c2c7 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -78,6 +78,12 @@
fmt, \
##__VA_ARGS__)
+#define rvt_pr_err_ratelimited(rdi, fmt, ...) \
+ __rvt_pr_err_ratelimited((rdi)->driver_f.get_pci_dev(rdi), \
+ rvt_get_ibdev_name(rdi), \
+ fmt, \
+ ##__VA_ARGS__)
+
#define __rvt_pr_info(pdev, name, fmt, ...) \
dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
@@ -87,6 +93,9 @@
#define __rvt_pr_err(pdev, name, fmt, ...) \
dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+#define __rvt_pr_err_ratelimited(pdev, name, fmt, ...) \
+ dev_err_ratelimited(&(pdev)->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);