aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c21
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c29
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h7
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c111
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c13
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h10
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c34
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c67
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c97
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c77
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c103
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h9
25 files changed, 348 insertions, 367 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 49c9541050d4..728795043496 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -381,15 +381,14 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct rvt_mr *mr;
struct ib_umem *umem;
- struct scatterlist *sg;
- int n, m, entry;
+ struct sg_page_iter sg_iter;
+ int n, m;
struct ib_mr *ret;
if (length == 0)
return ERR_PTR(-EINVAL);
- umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags, 0);
+ umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
if (IS_ERR(umem))
return (void *)umem;
@@ -408,23 +407,21 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
- mr->mr.page_shift = umem->page_shift;
+ mr->mr.page_shift = PAGE_SHIFT;
m = 0;
n = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
void *vaddr;
- vaddr = page_address(sg_page(sg));
+ vaddr = page_address(sg_page_iter_page(&sg_iter));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail_inval;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = BIT(umem->page_shift);
- trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr,
- BIT(umem->page_shift));
- n++;
- if (n == RVT_SEGSZ) {
+ mr->mr.map[m]->segs[n].length = PAGE_SIZE;
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE);
+ if (++n == RVT_SEGSZ) {
m++;
n = 0;
}
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index 8a89afff3363..6033054b22fa 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -50,7 +50,7 @@
/**
* rvt_alloc_pd - allocate a protection domain
- * @ibdev: ib device
+ * @ibpd: PD
* @context: optional user context
* @udata: optional user data
*
@@ -58,19 +58,14 @@
*
* Return: 0 on success
*/
-struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibpd->device;
struct rvt_dev_info *dev = ib_to_rvt(ibdev);
- struct rvt_pd *pd;
- struct ib_pd *ret;
+ struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
+ int ret = 0;
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
/*
* While we could continue allocating protecetion domains, being
* constrained only by system resources. The IBTA spec defines that
@@ -81,8 +76,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
spin_lock(&dev->n_pds_lock);
if (dev->n_pds_allocated == dev->dparms.props.max_pd) {
spin_unlock(&dev->n_pds_lock);
- kfree(pd);
- ret = ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
goto bail;
}
@@ -92,8 +86,6 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
/* ib_alloc_pd() will initialize pd->ibpd. */
pd->user = !!udata;
- ret = &pd->ibpd;
-
bail:
return ret;
}
@@ -104,16 +96,11 @@ bail:
*
* Return: always 0
*/
-int rvt_dealloc_pd(struct ib_pd *ibpd)
+void rvt_dealloc_pd(struct ib_pd *ibpd)
{
- struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
spin_lock(&dev->n_pds_lock);
dev->n_pds_allocated--;
spin_unlock(&dev->n_pds_lock);
-
- kfree(pd);
-
- return 0;
}
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
index 1892ca4a9746..7a887e4a45e7 100644
--- a/drivers/infiniband/sw/rdmavt/pd.h
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -50,9 +50,8 @@
#include <rdma/rdma_vt.h>
-struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int rvt_dealloc_pd(struct ib_pd *ibpd);
+int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+ struct ib_udata *udata);
+void rvt_dealloc_pd(struct ib_pd *ibpd);
#endif /* DEF_RDMAVTPD_H */
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index a1bd8cfc2c25..a34b9a2a32b6 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -53,6 +53,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/opa_addr.h>
+#include <rdma/uverbs_ioctl.h>
#include "qp.h"
#include "vt.h"
#include "trace.h"
@@ -854,6 +855,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
qp->s_mig_state = IB_MIG_MIGRATED;
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
+ qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
@@ -955,6 +957,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
size_t sg_list_sz;
struct ib_qp *ret = ERR_PTR(-ENOMEM);
struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+ struct rvt_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct rvt_ucontext, ibucontext);
void *priv = NULL;
size_t sqsize;
@@ -1128,7 +1132,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
qp->ip = rvt_create_mmap_info(rdi, s,
- ibpd->uobject->context,
+ &ucontext->ibucontext,
qp->r_rq.wq);
if (!qp->ip) {
ret = ERR_PTR(-ENOMEM);
@@ -1642,11 +1646,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
kref_put(&qp->ip->ref, rvt_release_mmap_info);
else
vfree(qp->r_rq.wq);
- vfree(qp->s_wq);
rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue);
rdma_destroy_ah_attr(&qp->remote_ah_attr);
rdma_destroy_ah_attr(&qp->alt_ah_attr);
+ vfree(qp->s_wq);
kfree(qp);
return 0;
}
@@ -2393,11 +2397,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth)
}
/*
- * rvt_add_retry_timer - add/start a retry timer
+ * rvt_add_retry_timer_ext - add/start a retry timer
* @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
* add a retry timer on the QP
*/
-void rvt_add_retry_timer(struct rvt_qp *qp)
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
{
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -2405,11 +2410,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies;
+ qp->s_timer.expires = jiffies + rdi->busy_jiffies +
+ (qp->timeout_jiffies << shift);
add_timer(&qp->s_timer);
}
-EXPORT_SYMBOL(rvt_add_retry_timer);
+EXPORT_SYMBOL(rvt_add_retry_timer_ext);
/**
* rvt_add_rnr_timer - add/start an rnr timer
@@ -2785,6 +2790,18 @@ again:
}
EXPORT_SYMBOL(rvt_copy_sge);
+static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp,
+ struct rvt_qp *sqp)
+{
+ rvp->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ return sqp->ibqp.qp_type == IB_QPT_RC ?
+ IB_WC_RETRY_EXC_ERR : IB_WC_SUCCESS;
+}
+
/**
* ruc_loopback - handle UC and RC loopback requests
* @sqp: the sending QP
@@ -2857,17 +2874,14 @@ again:
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
+ if (!qp) {
+ send_status = loopback_qp_drop(rvp, sqp);
+ goto serr_no_r_lock;
+ }
+ spin_lock_irqsave(&qp->r_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- rvp->n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
+ send_status = loopback_qp_drop(rvp, sqp);
goto serr;
}
@@ -2893,23 +2907,31 @@ again:
goto send_comp;
case IB_WR_SEND_WITH_INV:
- if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
- wc.wc_flags = IB_WC_WITH_INVALIDATE;
- wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
- }
- goto send;
-
case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
case IB_WR_SEND:
-send:
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
+ if (wqe->length > qp->r_len)
+ goto inv_err;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND_WITH_INV:
+ if (!rvt_invalidate_rkey(qp,
+ wqe->wr.ex.invalidate_rkey)) {
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ wc.ex.invalidate_rkey =
+ wqe->wr.ex.invalidate_rkey;
+ }
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ break;
+ default:
+ break;
+ }
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2986,34 +3008,12 @@ do_write:
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
- u32 len = sqp->s_len;
+ u32 len = rvt_get_sge_length(sge, sqp->s_len);
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
WARN_ON_ONCE(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
len, release, copy_last);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(&sqp->s_sge, len, !release);
sqp->s_len -= len;
}
if (release)
@@ -3039,6 +3039,7 @@ do_write:
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
spin_lock_irqsave(&sqp->s_lock, flags);
rvp->n_loop_pkts++;
flush_send:
@@ -3065,6 +3066,7 @@ rnr_nak:
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
+ spin_unlock_irqrestore(&qp->r_lock, flags);
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
@@ -3078,7 +3080,10 @@ op_err:
goto err;
inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
+ send_status =
+ sqp->ibqp.qp_type == IB_QPT_RC ?
+ IB_WC_REM_INV_REQ_ERR :
+ IB_WC_SUCCESS;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
@@ -3090,6 +3095,8 @@ err:
rvt_rc_error(qp, wc.status);
serr:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+serr_no_r_lock:
spin_lock_irqsave(&sqp->s_lock, flags);
rvt_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 6131cc558bdb..8d71647820a8 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
}
}
EXPORT_SYMBOL(rvt_get_credit);
+
+/* rvt_restart_sge - rewind the sge state for a wqe */
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
+{
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ss->total_len = wqe->length;
+ rvt_skip_sge(ss, len, false);
+ return wqe->length - len;
+}
+EXPORT_SYMBOL(rvt_restart_sge);
+
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 78e06fc456c5..895b3fabd0bf 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -48,6 +48,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <rdma/uverbs_ioctl.h>
#include "srq.h"
#include "vt.h"
@@ -77,6 +78,8 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
struct ib_udata *udata)
{
struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+ struct rvt_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct rvt_ucontext, ibucontext);
struct rvt_srq *srq;
u32 sz;
struct ib_srq *ret;
@@ -119,7 +122,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
srq->ip =
- rvt_create_mmap_info(dev, s, ibpd->uobject->context,
+ rvt_create_mmap_info(dev, s, &ucontext->ibucontext,
srq->rq.wq);
if (!srq->ip) {
ret = ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
index df8e1adbef9d..e3c416c6f900 100644
--- a/drivers/infiniband/sw/rdmavt/trace_cq.h
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -105,7 +105,7 @@ DEFINE_EVENT(rvt_cq_template, rvt_create_cq,
TP_ARGS(cq, attr));
#define CQ_PRN \
-"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
+"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x flags %x imm %x"
DECLARE_EVENT_CLASS(
rvt_cq_entry_template,
@@ -119,6 +119,8 @@ DECLARE_EVENT_CLASS(
__field(u32, qpn)
__field(u32, length)
__field(u32, idx)
+ __field(u32, flags)
+ __field(u32, imm)
),
TP_fast_assign(
RDI_DEV_ASSIGN(cq->rdi)
@@ -128,6 +130,8 @@ DECLARE_EVENT_CLASS(
__entry->length = wc->byte_len;
__entry->qpn = wc->qp->qp_num;
__entry->idx = idx;
+ __entry->flags = wc->wc_flags;
+ __entry->imm = be32_to_cpu(wc->ex.imm_data);
),
TP_printk(
CQ_PRN,
@@ -137,7 +141,9 @@ DECLARE_EVENT_CLASS(
__entry->status,
__entry->opcode, show_wc_opcode(__entry->opcode),
__entry->length,
- __entry->qpn
+ __entry->qpn,
+ __entry->flags,
+ __entry->imm
)
);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index aef3aa3fe667..42c9d35f832d 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
{
struct rvt_dev_info *rdi;
- rdi = (struct rvt_dev_info *)ib_alloc_device(size);
+ rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev);
if (!rdi)
return rdi;
@@ -284,10 +284,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
&gid->global.interface_id);
}
-struct rvt_ucontext {
- struct ib_ucontext ibucontext;
-};
-
static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
*ibucontext)
{
@@ -296,28 +292,21 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
/**
* rvt_alloc_ucontext - Allocate a user context
- * @ibdev: Verbs IB dev
+ * @uctx: Verbs context
* @udata: User data allocated
*/
-static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
- struct rvt_ucontext *context;
-
- context = kmalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
- return &context->ibucontext;
+ return 0;
}
/**
- *rvt_dealloc_ucontext - Free a user context
- *@context - Free this
+ * rvt_dealloc_ucontext - Free a user context
+ * @context - Free this
*/
-static int rvt_dealloc_ucontext(struct ib_ucontext *context)
+static void rvt_dealloc_ucontext(struct ib_ucontext *context)
{
- kfree(to_iucontext(context));
- return 0;
+ return;
}
static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -436,6 +425,8 @@ static const struct ib_device_ops rvt_dev_ops = {
.req_notify_cq = rvt_req_notify_cq,
.resize_cq = rvt_resize_cq,
.unmap_fmr = rvt_unmap_fmr,
+ INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext),
};
static noinline int check_support(struct rvt_dev_info *rdi, int verb)
@@ -446,7 +437,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
* These functions are not part of verbs specifically but are
* required for rdmavt to function.
*/
- if ((!rdi->driver_f.port_callback) ||
+ if ((!rdi->ibdev.ops.init_port) ||
(!rdi->driver_f.get_pci_dev))
return -EINVAL;
break;
@@ -644,8 +635,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
- ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
- rdi->driver_f.port_callback);
+ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
goto bail_wss;
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 383e65c7bbc0..a8c11b5e1e94 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <rdma/rdma_netlink.h>
#include <net/addrconf.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -50,8 +51,10 @@ static void rxe_cleanup_ports(struct rxe_dev *rxe)
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
-static void rxe_cleanup(struct rxe_dev *rxe)
+void rxe_dealloc(struct ib_device *ib_dev)
{
+ struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+
rxe_pool_cleanup(&rxe->uc_pool);
rxe_pool_cleanup(&rxe->pd_pool);
rxe_pool_cleanup(&rxe->ah_pool);
@@ -65,16 +68,8 @@ static void rxe_cleanup(struct rxe_dev *rxe)
rxe_cleanup_ports(rxe);
- crypto_free_shash(rxe->tfm);
-}
-
-/* called when all references have been dropped */
-void rxe_release(struct kref *kref)
-{
- struct rxe_dev *rxe = container_of(kref, struct rxe_dev, ref_cnt);
-
- rxe_cleanup(rxe);
- ib_dealloc_device(&rxe->ib_dev);
+ if (rxe->tfm)
+ crypto_free_shash(rxe->tfm);
}
/* initialize rxe device parameters */
@@ -279,7 +274,6 @@ static int rxe_init(struct rxe_dev *rxe)
spin_lock_init(&rxe->mmap_offset_lock);
spin_lock_init(&rxe->pending_lock);
INIT_LIST_HEAD(&rxe->pending_mmaps);
- INIT_LIST_HEAD(&rxe->list);
mutex_init(&rxe->usdev_lock);
@@ -308,37 +302,46 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
/* called by ifc layer to create new rxe device.
* The caller should allocate memory for rxe by calling ib_alloc_device.
*/
-int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
{
int err;
- kref_init(&rxe->ref_cnt);
-
err = rxe_init(rxe);
if (err)
- goto err1;
+ return err;
rxe_set_mtu(rxe, mtu);
- err = rxe_register_device(rxe);
- if (err)
- goto err1;
-
- return 0;
-
-err1:
- rxe_dev_put(rxe);
- return err;
+ return rxe_register_device(rxe, ibdev_name);
}
-/* called by the ifc layer to remove a device */
-void rxe_remove(struct rxe_dev *rxe)
+static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
{
- rxe_unregister_device(rxe);
+ struct rxe_dev *exists;
+ int err = 0;
+
+ exists = rxe_get_dev_from_net(ndev);
+ if (exists) {
+ ib_device_put(&exists->ib_dev);
+ pr_err("already configured on %s\n", ndev->name);
+ err = -EEXIST;
+ goto err;
+ }
- rxe_dev_put(rxe);
+ err = rxe_net_add(ibdev_name, ndev);
+ if (err) {
+ pr_err("failed to add %s\n", ndev->name);
+ goto err;
+ }
+err:
+ return err;
}
+static struct rdma_link_ops rxe_link_ops = {
+ .type = "rxe",
+ .newlink = rxe_newlink,
+};
+
static int __init rxe_module_init(void)
{
int err;
@@ -354,13 +357,15 @@ static int __init rxe_module_init(void)
if (err)
return err;
+ rdma_link_register(&rxe_link_ops);
pr_info("loaded\n");
return 0;
}
static void __exit rxe_module_exit(void)
{
- rxe_remove_all();
+ rdma_link_unregister(&rxe_link_ops);
+ ib_unregister_driver(RDMA_DRIVER_RXE);
rxe_net_exit();
rxe_cache_exit();
@@ -369,3 +374,5 @@ static void __exit rxe_module_exit(void)
late_initcall(rxe_module_init);
module_exit(rxe_module_exit);
+
+MODULE_ALIAS_RDMA_LINK("rxe");
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 5bde2ad964d2..2e2dff478833 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -95,18 +95,20 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
-int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
-void rxe_remove(struct rxe_dev *rxe);
-void rxe_remove_all(void);
+int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
void rxe_rcv(struct sk_buff *skb);
-static inline void rxe_dev_put(struct rxe_dev *rxe)
+/* The caller must do a matching ib_device_put(&dev->ib_dev) */
+static inline struct rxe_dev *rxe_get_dev_from_net(struct net_device *ndev)
{
- kref_put(&rxe->ref_cnt, rxe_release);
+ struct ib_device *ibdev =
+ ib_device_get_by_netdev(ndev, RDMA_DRIVER_RXE);
+
+ if (!ibdev)
+ return NULL;
+ return container_of(ibdev, struct rxe_dev, ib_dev);
}
-struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 26fe8d7dbc55..81ee756c19b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -34,6 +34,13 @@
#include "rxe.h"
#include "rxe_loc.h"
+void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
+{
+ rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
+ rxe_av_fill_ip_info(av, attr);
+ memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
+}
+
int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
{
struct rxe_port *port;
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index e996da67a851..00eb99d3df86 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -146,8 +146,7 @@ void retransmit_timer(struct timer_list *t)
}
}
-void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct sk_buff *skb)
+void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
int must_sched;
@@ -155,7 +154,8 @@ void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
must_sched = skb_queue_len(&qp->resp_pkts) > 1;
if (must_sched != 0)
- rxe_counter_inc(rxe, RXE_CNT_COMPLETER_SCHED);
+ rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
+
rxe_run_task(&qp->comp.task, must_sched);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 01b74597b36a..3d8cef836f0d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -35,6 +35,7 @@
#define RXE_LOC_H
/* rxe_av.c */
+void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
@@ -231,7 +232,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd);
-void rxe_release(struct kref *kref);
+void rxe_dealloc(struct ib_device *ib_dev);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
@@ -239,11 +240,9 @@ int rxe_responder(void *arg);
u32 rxe_icrc_hdr(struct rxe_pkt_info *pkt, struct sk_buff *skb);
-void rxe_resp_queue_pkt(struct rxe_dev *rxe,
- struct rxe_qp *qp, struct sk_buff *skb);
+void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb);
-void rxe_comp_queue_pkt(struct rxe_dev *rxe,
- struct rxe_qp *qp, struct sk_buff *skb);
+void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb);
static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 9d3916b93f23..42f0f25e396c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -162,16 +162,15 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mem)
{
- int entry;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
- struct scatterlist *sg;
+ struct sg_page_iter sg_iter;
int num_buf;
void *vaddr;
int err;
- umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
+ umem = ib_umem_get(udata, start, length, access, 0);
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
@@ -191,16 +190,16 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
goto err1;
}
- mem->page_shift = umem->page_shift;
- mem->page_mask = BIT(umem->page_shift) - 1;
+ mem->page_shift = PAGE_SHIFT;
+ mem->page_mask = PAGE_SIZE - 1;
num_buf = 0;
map = mem->map;
if (length > 0) {
buf = map[0]->buf;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- vaddr = page_address(sg_page(sg));
+ for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
+ vaddr = page_address(sg_page_iter_page(&sg_iter));
if (!vaddr) {
pr_warn("null vaddr\n");
err = -ENOMEM;
@@ -208,7 +207,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
}
buf->addr = (uintptr_t)vaddr;
- buf->size = BIT(umem->page_shift);
+ buf->size = PAGE_SIZE;
num_buf++;
buf++;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8fd03ae20efc..753cabcd441c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -45,43 +45,6 @@
#include "rxe_net.h"
#include "rxe_loc.h"
-static LIST_HEAD(rxe_dev_list);
-static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
-
-struct rxe_dev *net_to_rxe(struct net_device *ndev)
-{
- struct rxe_dev *rxe;
- struct rxe_dev *found = NULL;
-
- spin_lock_bh(&dev_list_lock);
- list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (rxe->ndev == ndev) {
- found = rxe;
- break;
- }
- }
- spin_unlock_bh(&dev_list_lock);
-
- return found;
-}
-
-struct rxe_dev *get_rxe_by_name(const char *name)
-{
- struct rxe_dev *rxe;
- struct rxe_dev *found = NULL;
-
- spin_lock_bh(&dev_list_lock);
- list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
- found = rxe;
- break;
- }
- }
- spin_unlock_bh(&dev_list_lock);
- return found;
-}
-
-
static struct rxe_recv_sockets recv_sockets;
struct device *rxe_dma_device(struct rxe_dev *rxe)
@@ -229,18 +192,19 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct udphdr *udph;
struct net_device *ndev = skb->dev;
struct net_device *rdev = ndev;
- struct rxe_dev *rxe = net_to_rxe(ndev);
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
if (!rxe && is_vlan_dev(rdev)) {
rdev = vlan_dev_real_dev(ndev);
- rxe = net_to_rxe(rdev);
+ rxe = rxe_get_dev_from_net(rdev);
}
if (!rxe)
goto drop;
if (skb_linearize(skb)) {
pr_err("skb_linearize failed\n");
+ ib_device_put(&rxe->ib_dev);
goto drop;
}
@@ -253,6 +217,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
rxe_rcv(skb);
+ /*
+ * FIXME: this is in the wrong place, it needs to be done when pkt is
+ * destroyed
+ */
+ ib_device_put(&rxe->ib_dev);
+
return 0;
drop:
kfree_skb(skb);
@@ -384,9 +354,6 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
return -EHOSTUNREACH;
}
- if (!memcmp(saddr, daddr, sizeof(*daddr)))
- pkt->mask |= RXE_LOOPBACK_MASK;
-
prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
cpu_to_be16(ROCE_V2_UDP_DPORT));
@@ -411,9 +378,6 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
return -EHOSTUNREACH;
}
- if (!memcmp(saddr, daddr, sizeof(*daddr)))
- pkt->mask |= RXE_LOOPBACK_MASK;
-
prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
cpu_to_be16(ROCE_V2_UDP_DPORT));
@@ -437,6 +401,9 @@ int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
*crc = rxe_icrc_hdr(pkt, skb);
+ if (ether_addr_equal(skb->dev->dev_addr, av->dmac))
+ pkt->mask |= RXE_LOOPBACK_MASK;
+
return err;
}
@@ -550,42 +517,24 @@ enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
return IB_LINK_LAYER_ETHERNET;
}
-struct rxe_dev *rxe_net_add(struct net_device *ndev)
+int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
{
int err;
struct rxe_dev *rxe = NULL;
- rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
+ rxe = ib_alloc_device(rxe_dev, ib_dev);
if (!rxe)
- return NULL;
+ return -ENOMEM;
rxe->ndev = ndev;
- err = rxe_add(rxe, ndev->mtu);
+ err = rxe_add(rxe, ndev->mtu, ibdev_name);
if (err) {
ib_dealloc_device(&rxe->ib_dev);
- return NULL;
+ return err;
}
- spin_lock_bh(&dev_list_lock);
- list_add_tail(&rxe->list, &rxe_dev_list);
- spin_unlock_bh(&dev_list_lock);
- return rxe;
-}
-
-void rxe_remove_all(void)
-{
- spin_lock_bh(&dev_list_lock);
- while (!list_empty(&rxe_dev_list)) {
- struct rxe_dev *rxe =
- list_first_entry(&rxe_dev_list, struct rxe_dev, list);
-
- list_del(&rxe->list);
- spin_unlock_bh(&dev_list_lock);
- rxe_remove(rxe);
- spin_lock_bh(&dev_list_lock);
- }
- spin_unlock_bh(&dev_list_lock);
+ return 0;
}
static void rxe_port_event(struct rxe_dev *rxe,
@@ -638,15 +587,14 @@ static int rxe_notify(struct notifier_block *not_blk,
void *arg)
{
struct net_device *ndev = netdev_notifier_info_to_dev(arg);
- struct rxe_dev *rxe = net_to_rxe(ndev);
+ struct rxe_dev *rxe = rxe_get_dev_from_net(ndev);
if (!rxe)
- goto out;
+ return NOTIFY_OK;
switch (event) {
case NETDEV_UNREGISTER:
- list_del(&rxe->list);
- rxe_remove(rxe);
+ ib_unregister_device_queued(&rxe->ib_dev);
break;
case NETDEV_UP:
rxe_port_up(rxe);
@@ -671,7 +619,8 @@ static int rxe_notify(struct notifier_block *not_blk,
event, ndev->name);
break;
}
-out:
+
+ ib_device_put(&rxe->ib_dev);
return NOTIFY_OK;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 106c586dbb26..2ca71d3d245c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -43,7 +43,7 @@ struct rxe_recv_sockets {
struct socket *sk6;
};
-struct rxe_dev *rxe_net_add(struct net_device *ndev);
+int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
int rxe_net_init(void);
void rxe_net_exit(void);
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index bdea899a58ac..1abed47ca221 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -78,7 +78,8 @@ enum rxe_device_param {
| IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
- | IB_DEVICE_MEM_MGT_EXTENSIONS,
+ | IB_DEVICE_MEM_MGT_EXTENSIONS
+ | IB_DEVICE_ALLOW_USER_UNREG,
RXE_MAX_SGE = 32,
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b5c91df22047..120fa9005954 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -42,10 +42,12 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
.size = sizeof(struct rxe_ucontext),
+ .flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_PD] = {
.name = "rxe-pd",
.size = sizeof(struct rxe_pd),
+ .flags = RXE_POOL_NO_ALLOC,
},
[RXE_TYPE_AH] = {
.name = "rxe-ah",
@@ -119,8 +121,10 @@ static void rxe_cache_clean(size_t cnt)
for (i = 0; i < cnt; i++) {
type = &rxe_type_info[i];
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
+ if (!(type->flags & RXE_POOL_NO_ALLOC)) {
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
}
}
@@ -134,14 +138,17 @@ int rxe_cache_init(void)
for (i = 0; i < RXE_NUM_TYPES; i++) {
type = &rxe_type_info[i];
size = ALIGN(type->size, RXE_POOL_ALIGN);
- type->cache = kmem_cache_create(type->name, size,
- RXE_POOL_ALIGN,
- RXE_POOL_CACHE_FLAGS, NULL);
- if (!type->cache) {
- pr_err("Unable to init kmem cache for %s\n",
- type->name);
- err = -ENOMEM;
- goto err1;
+ if (!(type->flags & RXE_POOL_NO_ALLOC)) {
+ type->cache =
+ kmem_cache_create(type->name, size,
+ RXE_POOL_ALIGN,
+ RXE_POOL_CACHE_FLAGS, NULL);
+ if (!type->cache) {
+ pr_err("Unable to init kmem cache for %s\n",
+ type->name);
+ err = -ENOMEM;
+ goto err1;
+ }
}
}
@@ -392,29 +399,64 @@ void *rxe_alloc(struct rxe_pool *pool)
kref_get(&pool->ref_cnt);
read_unlock_irqrestore(&pool->pool_lock, flags);
- kref_get(&pool->rxe->ref_cnt);
+ if (!ib_device_try_get(&pool->rxe->ib_dev))
+ goto out_put_pool;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
- goto out_put_pool;
+ goto out_cnt;
elem = kmem_cache_zalloc(pool_cache(pool),
(pool->flags & RXE_POOL_ATOMIC) ?
GFP_ATOMIC : GFP_KERNEL);
if (!elem)
- goto out_put_pool;
+ goto out_cnt;
elem->pool = pool;
kref_init(&elem->ref_cnt);
return elem;
-out_put_pool:
+out_cnt:
atomic_dec(&pool->num_elem);
- rxe_dev_put(pool->rxe);
+ ib_device_put(&pool->rxe->ib_dev);
+out_put_pool:
rxe_pool_put(pool);
return NULL;
}
+int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+{
+ unsigned long flags;
+
+ might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+
+ read_lock_irqsave(&pool->pool_lock, flags);
+ if (pool->state != RXE_POOL_STATE_VALID) {
+ read_unlock_irqrestore(&pool->pool_lock, flags);
+ return -EINVAL;
+ }
+ kref_get(&pool->ref_cnt);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
+
+ if (!ib_device_try_get(&pool->rxe->ib_dev))
+ goto out_put_pool;
+
+ if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
+ goto out_cnt;
+
+ elem->pool = pool;
+ kref_init(&elem->ref_cnt);
+
+ return 0;
+
+out_cnt:
+ atomic_dec(&pool->num_elem);
+ ib_device_put(&pool->rxe->ib_dev);
+out_put_pool:
+ rxe_pool_put(pool);
+ return -EINVAL;
+}
+
void rxe_elem_release(struct kref *kref)
{
struct rxe_pool_entry *elem =
@@ -424,9 +466,10 @@ void rxe_elem_release(struct kref *kref)
if (pool->cleanup)
pool->cleanup(elem);
- kmem_cache_free(pool_cache(pool), elem);
+ if (!(pool->flags & RXE_POOL_NO_ALLOC))
+ kmem_cache_free(pool_cache(pool), elem);
atomic_dec(&pool->num_elem);
- rxe_dev_put(pool->rxe);
+ ib_device_put(&pool->rxe->ib_dev);
rxe_pool_put(pool);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 72968c29e01f..2f2cff1cbe43 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -41,6 +41,7 @@ enum rxe_pool_flags {
RXE_POOL_ATOMIC = BIT(0),
RXE_POOL_INDEX = BIT(1),
RXE_POOL_KEY = BIT(2),
+ RXE_POOL_NO_ALLOC = BIT(4),
};
enum rxe_elem_type {
@@ -131,6 +132,9 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
+/* connect already allocated object to pool */
+int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+
/* assign an index to an indexed object and insert object into
* pool's rb tree
*/
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index fd86fd2fbb26..09ede70dc1e8 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -35,6 +35,7 @@
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
+#include <rdma/uverbs_ioctl.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -343,7 +344,8 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+ struct rxe_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc);
rxe_add_ref(pd);
rxe_add_ref(rcq);
@@ -358,11 +360,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
rxe_qp_init_misc(rxe, qp, init);
- err = rxe_qp_init_req(rxe, qp, init, context, uresp);
+ err = rxe_qp_init_req(rxe, qp, init, &ucontext->ibuc, uresp);
if (err)
goto err1;
- err = rxe_qp_init_resp(rxe, qp, init, context, uresp);
+ err = rxe_qp_init_resp(rxe, qp, init, &ucontext->ibuc, uresp);
if (err)
goto err2;
@@ -631,14 +633,11 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.qkey = attr->qkey;
if (mask & IB_QP_AV) {
- rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr);
- rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr);
+ rxe_init_av(&attr->ah_attr, &qp->pri_av);
}
if (mask & IB_QP_ALT_PATH) {
- rxe_av_from_attr(attr->alt_port_num, &qp->alt_av,
- &attr->alt_ah_attr);
- rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr);
+ rxe_init_av(&attr->alt_ah_attr, &qp->alt_av);
qp->attr.alt_port_num = attr->alt_port_num;
qp->attr.alt_pkey_index = attr->alt_pkey_index;
qp->attr.alt_timeout = attr->alt_timeout;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 5c29a1bb575a..f9a492ed900b 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -266,14 +266,12 @@ err1:
return -EINVAL;
}
-static inline void rxe_rcv_pkt(struct rxe_dev *rxe,
- struct rxe_pkt_info *pkt,
- struct sk_buff *skb)
+static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
if (pkt->mask & RXE_REQ_MASK)
- rxe_resp_queue_pkt(rxe, pkt->qp, skb);
+ rxe_resp_queue_pkt(pkt->qp, skb);
else
- rxe_comp_queue_pkt(rxe, pkt->qp, skb);
+ rxe_comp_queue_pkt(pkt->qp, skb);
}
static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
@@ -319,7 +317,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
pkt->qp = qp;
rxe_add_ref(qp);
- rxe_rcv_pkt(rxe, pkt, skb);
+ rxe_rcv_pkt(pkt, skb);
}
spin_unlock_bh(&mcg->mcg_lock);
@@ -411,7 +409,7 @@ void rxe_rcv(struct sk_buff *skb)
if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
rxe_rcv_mcast_pkt(rxe, skb);
else
- rxe_rcv_pkt(rxe, pkt, skb);
+ rxe_rcv_pkt(pkt, skb);
return;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 231528188250..aca9f60f9b21 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -104,8 +104,7 @@ static char *resp_state_name[] = {
};
/* rxe_recv calls here to add a request packet to the input queue */
-void rxe_resp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct sk_buff *skb)
+void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
{
int must_sched;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 95a15892f7e6..ccda5f5a3bc0 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -58,41 +58,37 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
int len;
int err = 0;
char intf[32];
- struct net_device *ndev = NULL;
- struct rxe_dev *rxe;
+ struct net_device *ndev;
+ struct rxe_dev *exists;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("add: invalid interface name\n");
- err = -EINVAL;
- goto err;
+ return -EINVAL;
}
ndev = dev_get_by_name(&init_net, intf);
if (!ndev) {
pr_err("interface %s not found\n", intf);
- err = -EINVAL;
- goto err;
+ return -EINVAL;
}
- if (net_to_rxe(ndev)) {
+ exists = rxe_get_dev_from_net(ndev);
+ if (exists) {
+ ib_device_put(&exists->ib_dev);
pr_err("already configured on %s\n", intf);
err = -EINVAL;
goto err;
}
- rxe = rxe_net_add(ndev);
- if (!rxe) {
+ err = rxe_net_add("rxe%d", ndev);
+ if (err) {
pr_err("failed to add %s\n", intf);
- err = -EINVAL;
goto err;
}
- rxe_set_port_state(rxe);
- dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
err:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
return err;
}
@@ -100,7 +96,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
{
int len;
char intf[32];
- struct rxe_dev *rxe;
+ struct ib_device *ib_dev;
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
@@ -110,19 +106,17 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
if (strncmp("all", intf, len) == 0) {
pr_info("rxe_sys: remove all");
- rxe_remove_all();
+ ib_unregister_driver(RDMA_DRIVER_RXE);
return 0;
}
- rxe = get_rxe_by_name(intf);
-
- if (!rxe) {
+ ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE);
+ if (!ib_dev) {
pr_err("not configured on %s\n", intf);
return -EINVAL;
}
- list_del(&rxe->list);
- rxe_remove(rxe);
+ ib_unregister_device_and_put(ib_dev);
return 0;
}
@@ -136,6 +130,6 @@ static const struct kernel_param_ops rxe_remove_ops = {
};
module_param_cb(add, &rxe_add_ops, NULL, 0200);
-MODULE_PARM_DESC(add, "Create RXE device over network interface");
+MODULE_PARM_DESC(add, "DEPRECATED. Create RXE device over network interface");
module_param_cb(remove, &rxe_remove_ops, NULL, 0200);
-MODULE_PARM_DESC(remove, "Remove RXE device over network interface");
+MODULE_PARM_DESC(remove, "DEPRECATED. Remove RXE device over network interface");
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index b20e6e0415f5..6ecf28570ff0 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -33,6 +33,7 @@
#include <linux/dma-mapping.h>
#include <net/addrconf.h>
+#include <rdma/uverbs_ioctl.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -79,19 +80,6 @@ static int rxe_query_port(struct ib_device *dev,
return rc;
}
-static struct net_device *rxe_get_netdev(struct ib_device *device,
- u8 port_num)
-{
- struct rxe_dev *rxe = to_rdev(device);
-
- if (rxe->ndev) {
- dev_hold(rxe->ndev);
- return rxe->ndev;
- }
-
- return NULL;
-}
-
static int rxe_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
@@ -154,22 +142,19 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
return rxe_link_layer(rxe, port_num);
}
-static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
- struct ib_udata *udata)
+static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
- struct rxe_dev *rxe = to_rdev(dev);
- struct rxe_ucontext *uc;
+ struct rxe_dev *rxe = to_rdev(uctx->device);
+ struct rxe_ucontext *uc = to_ruc(uctx);
- uc = rxe_alloc(&rxe->uc_pool);
- return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
+ return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
}
-static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
+static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
{
struct rxe_ucontext *uc = to_ruc(ibuc);
rxe_drop_ref(uc);
- return 0;
}
static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
@@ -191,30 +176,20 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
return 0;
}
-static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+ struct ib_udata *udata)
{
- struct rxe_dev *rxe = to_rdev(dev);
- struct rxe_pd *pd;
+ struct rxe_dev *rxe = to_rdev(ibpd->device);
+ struct rxe_pd *pd = to_rpd(ibpd);
- pd = rxe_alloc(&rxe->pd_pool);
- return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
+ return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
}
-static int rxe_dealloc_pd(struct ib_pd *ibpd)
+static void rxe_dealloc_pd(struct ib_pd *ibpd)
{
struct rxe_pd *pd = to_rpd(ibpd);
rxe_drop_ref(pd);
- return 0;
-}
-
-static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
- struct rxe_av *av)
-{
- rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
- rxe_av_fill_ip_info(av, attr);
}
static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -239,7 +214,7 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
rxe_add_ref(pd);
ah->pd = pd;
- rxe_init_av(rxe, attr, &ah->av);
+ rxe_init_av(attr, &ah->av);
return &ah->ibah;
}
@@ -253,7 +228,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
if (err)
return err;
- rxe_init_av(rxe, attr, &ah->av);
+ rxe_init_av(attr, &ah->av);
return 0;
}
@@ -330,8 +305,9 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
int err;
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
+ struct rxe_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc);
struct rxe_srq *srq;
- struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
struct rxe_create_srq_resp __user *uresp = NULL;
if (udata) {
@@ -354,7 +330,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
rxe_add_ref(pd);
srq->pd = pd;
- err = rxe_srq_from_init(rxe, srq, init, context, uresp);
+ err = rxe_srq_from_init(rxe, srq, init, &ucontext->ibuc, uresp);
if (err)
goto err2;
@@ -1129,8 +1105,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
static ssize_t parent_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- struct rxe_dev *rxe = container_of(device, struct rxe_dev,
- ib_dev.dev);
+ struct rxe_dev *rxe =
+ rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
@@ -1146,6 +1122,15 @@ static const struct attribute_group rxe_attr_group = {
.attrs = rxe_dev_attributes,
};
+static int rxe_enable_driver(struct ib_device *ib_dev)
+{
+ struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+
+ rxe_set_port_state(rxe);
+ dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
+ return 0;
+}
+
static const struct ib_device_ops rxe_dev_ops = {
.alloc_hw_stats = rxe_ib_alloc_hw_stats,
.alloc_mr = rxe_alloc_mr,
@@ -1156,6 +1141,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.create_cq = rxe_create_cq,
.create_qp = rxe_create_qp,
.create_srq = rxe_create_srq,
+ .dealloc_driver = rxe_dealloc,
.dealloc_pd = rxe_dealloc_pd,
.dealloc_ucontext = rxe_dealloc_ucontext,
.dereg_mr = rxe_dereg_mr,
@@ -1164,10 +1150,10 @@ static const struct ib_device_ops rxe_dev_ops = {
.destroy_qp = rxe_destroy_qp,
.destroy_srq = rxe_destroy_srq,
.detach_mcast = rxe_detach_mcast,
+ .enable_driver = rxe_enable_driver,
.get_dma_mr = rxe_get_dma_mr,
.get_hw_stats = rxe_ib_get_hw_stats,
.get_link_layer = rxe_get_link_layer,
- .get_netdev = rxe_get_netdev,
.get_port_immutable = rxe_port_immutable,
.map_mr_sg = rxe_map_mr_sg,
.mmap = rxe_mmap,
@@ -1190,9 +1176,11 @@ static const struct ib_device_ops rxe_dev_ops = {
.reg_user_mr = rxe_reg_user_mr,
.req_notify_cq = rxe_req_notify_cq,
.resize_cq = rxe_resize_cq,
+ INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
};
-int rxe_register_device(struct rxe_dev *rxe)
+int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
{
int err;
struct ib_device *dev = &rxe->ib_dev;
@@ -1247,6 +1235,9 @@ int rxe_register_device(struct rxe_dev *rxe)
;
ib_set_device_ops(dev, &rxe_dev_ops);
+ err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
+ if (err)
+ return err;
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
@@ -1258,23 +1249,13 @@ int rxe_register_device(struct rxe_dev *rxe)
rdma_set_device_sysfs_group(dev, &rxe_attr_group);
dev->driver_id = RDMA_DRIVER_RXE;
- err = ib_register_device(dev, "rxe%d", NULL);
- if (err) {
+ err = ib_register_device(dev, ibdev_name);
+ if (err)
pr_warn("%s failed with error %d\n", __func__, err);
- goto err1;
- }
-
- return 0;
-
-err1:
- crypto_free_shash(rxe->tfm);
+ /*
+ * Note that rxe may be invalid at this point if another thread
+ * unregistered it.
+ */
return err;
}
-
-void rxe_unregister_device(struct rxe_dev *rxe)
-{
- struct ib_device *dev = &rxe->ib_dev;
-
- ib_unregister_device(dev);
-}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 74e04801d34d..157e51aeb1e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -61,13 +61,13 @@ static inline int psn_compare(u32 psn_a, u32 psn_b)
}
struct rxe_ucontext {
+ struct ib_ucontext ibuc;
struct rxe_pool_entry pelem;
- struct ib_ucontext ibuc;
};
struct rxe_pd {
+ struct ib_pd ibpd;
struct rxe_pool_entry pelem;
- struct ib_pd ibpd;
};
struct rxe_ah {
@@ -385,7 +385,6 @@ struct rxe_dev {
struct ib_device_attr attr;
int max_ucontext;
int max_inline_data;
- struct kref ref_cnt;
struct mutex usdev_lock;
struct net_device *ndev;
@@ -412,7 +411,6 @@ struct rxe_dev {
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
- struct list_head list;
struct crypto_shash *tfm;
};
@@ -466,8 +464,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
}
-int rxe_register_device(struct rxe_dev *rxe);
-void rxe_unregister_device(struct rxe_dev *rxe);
+int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);