aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c677
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h42
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c49
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h4
22 files changed, 874 insertions, 168 deletions
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 98e798007f75..7df896a18d38 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
- depends on 64BIT && ARCH_DMA_ADDR_T_64BIT
+ depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
depends on PCI
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5ce403c6cddb..1735deb1a9d4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -118,6 +118,187 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
+/* platform specific: return the last level cache (llc) size, in KiB */
+static int rvt_wss_llc_size(void)
+{
+ /* assume that the boot CPU value is universal for all CPUs */
+ return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+ /*
+ * Use the only available X64 cacheless copy. Add a __user cast
+ * to quiet sparse. The src agument is already in the kernel so
+ * there are no security issues. The extra fault recovery machinery
+ * is not invoked.
+ */
+ __copy_user_nocache(dst, (void __user *)src, n, 0);
+}
+
+void rvt_wss_exit(struct rvt_dev_info *rdi)
+{
+ struct rvt_wss *wss = rdi->wss;
+
+ if (!wss)
+ return;
+
+ /* coded to handle partially initialized and repeat callers */
+ kfree(wss->entries);
+ wss->entries = NULL;
+ kfree(rdi->wss);
+ rdi->wss = NULL;
+}
+
+/**
+ * rvt_wss_init - Init wss data structures
+ *
+ * Return: 0 on success
+ */
+int rvt_wss_init(struct rvt_dev_info *rdi)
+{
+ unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+ unsigned int wss_threshold = rdi->dparms.wss_threshold;
+ unsigned int wss_clean_period = rdi->dparms.wss_clean_period;
+ long llc_size;
+ long llc_bits;
+ long table_size;
+ long table_bits;
+ struct rvt_wss *wss;
+ int node = rdi->dparms.node;
+
+ if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) {
+ rdi->wss = NULL;
+ return 0;
+ }
+
+ rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node);
+ if (!rdi->wss)
+ return -ENOMEM;
+ wss = rdi->wss;
+
+ /* check for a valid percent range - default to 80 if none or invalid */
+ if (wss_threshold < 1 || wss_threshold > 100)
+ wss_threshold = 80;
+
+ /* reject a wildly large period */
+ if (wss_clean_period > 1000000)
+ wss_clean_period = 256;
+
+ /* reject a zero period */
+ if (wss_clean_period == 0)
+ wss_clean_period = 1;
+
+ /*
+ * Calculate the table size - the next power of 2 larger than the
+ * LLC size. LLC size is in KiB.
+ */
+ llc_size = rvt_wss_llc_size() * 1024;
+ table_size = roundup_pow_of_two(llc_size);
+
+ /* one bit per page in rounded up table */
+ llc_bits = llc_size / PAGE_SIZE;
+ table_bits = table_size / PAGE_SIZE;
+ wss->pages_mask = table_bits - 1;
+ wss->num_entries = table_bits / BITS_PER_LONG;
+
+ wss->threshold = (llc_bits * wss_threshold) / 100;
+ if (wss->threshold == 0)
+ wss->threshold = 1;
+
+ wss->clean_period = wss_clean_period;
+ atomic_set(&wss->clean_counter, wss_clean_period);
+
+ wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries),
+ GFP_KERNEL, node);
+ if (!wss->entries) {
+ rvt_wss_exit(rdi);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Advance the clean counter. When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking. Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information. Since this is only a heuristic, this is
+ * OK. Any innaccuracies will clean themselves out as the counter
+ * advances. That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero. When zero
+ * is reached an entry is cleaned.
+ */
+static void wss_advance_clean_counter(struct rvt_wss *wss)
+{
+ int entry;
+ int weight;
+ unsigned long bits;
+
+ /* become the cleaner if we decrement the counter to zero */
+ if (atomic_dec_and_test(&wss->clean_counter)) {
+ /*
+ * Set, not add, the clean period. This avoids an issue
+ * where the counter could decrement below the clean period.
+ * Doing a set can result in lost decrements, slowing the
+ * clean advance. Since this a heuristic, this possible
+ * slowdown is OK.
+ *
+ * An alternative is to loop, advancing the counter by a
+ * clean period until the result is > 0. However, this could
+ * lead to several threads keeping another in the clean loop.
+ * This could be mitigated by limiting the number of times
+ * we stay in the loop.
+ */
+ atomic_set(&wss->clean_counter, wss->clean_period);
+
+ /*
+ * Uniquely grab the entry to clean and move to next.
+ * The current entry is always the lower bits of
+ * wss.clean_entry. The table size, wss.num_entries,
+ * is always a power-of-2.
+ */
+ entry = (atomic_inc_return(&wss->clean_entry) - 1)
+ & (wss->num_entries - 1);
+
+ /* clear the entry and count the bits */
+ bits = xchg(&wss->entries[entry], 0);
+ weight = hweight64((u64)bits);
+ /* only adjust the contended total count if needed */
+ if (weight)
+ atomic_sub(weight, &wss->total_count);
+ }
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(struct rvt_wss *wss, void *address)
+{
+ u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask;
+ u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+ u32 nr = page & (BITS_PER_LONG - 1);
+
+ if (!test_and_set_bit(nr, &wss->entries[entry]))
+ atomic_inc(&wss->total_count);
+
+ wss_advance_clean_counter(wss);
+}
+
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline bool wss_exceeds_threshold(struct rvt_wss *wss)
+{
+ return atomic_read(&wss->total_count) >= wss->threshold;
+}
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map)
{
@@ -1164,11 +1345,8 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int lastwqe = 0;
int mig = 0;
int pmtu = 0; /* for gcc warning only */
- enum rdma_link_layer link;
int opa_ah;
- link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
-
spin_lock_irq(&qp->r_lock);
spin_lock(&qp->s_hlock);
spin_lock(&qp->s_lock);
@@ -1179,7 +1357,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, link))
+ attr_mask))
goto inval;
if (rdi->driver_f.check_modify_qp &&
@@ -1718,7 +1896,7 @@ static inline int rvt_qp_is_avail(
*/
static int rvt_post_one_wr(struct rvt_qp *qp,
const struct ib_send_wr *wr,
- int *call_send)
+ bool *call_send)
{
struct rvt_swqe *wqe;
u32 next;
@@ -1823,15 +2001,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.num_sge = j;
}
- /* general part of wqe valid - allow for driver checks */
- if (rdi->driver_f.check_send_wqe) {
- ret = rdi->driver_f.check_send_wqe(qp, wqe);
- if (ret < 0)
- goto bail_inval_free;
- if (ret)
- *call_send = ret;
- }
-
+ /*
+ * Calculate and set SWQE PSN values prior to handing it off
+ * to the driver's check routine. This give the driver the
+ * opportunity to adjust PSN values based on internal checks.
+ */
log_pmtu = qp->log_pmtu;
if (qp->ibqp.qp_type != IB_QPT_UC &&
qp->ibqp.qp_type != IB_QPT_RC) {
@@ -1856,8 +2030,18 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
(wqe->length ?
((wqe->length - 1) >> log_pmtu) :
0);
- qp->s_next_psn = wqe->lpsn + 1;
}
+
+ /* general part of wqe valid - allow for driver checks */
+ if (rdi->driver_f.setup_wqe) {
+ ret = rdi->driver_f.setup_wqe(qp, wqe, call_send);
+ if (ret < 0)
+ goto bail_inval_free_ref;
+ }
+
+ if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
+ qp->s_next_psn = wqe->lpsn + 1;
+
if (unlikely(reserved_op)) {
wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
rvt_qp_wqe_reserve(qp, wqe);
@@ -1871,6 +2055,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
return 0;
+bail_inval_free_ref:
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC)
+ atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
bail_inval_free:
/* release mr holds */
while (j) {
@@ -1897,7 +2085,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
unsigned long flags = 0;
- int call_send;
+ bool call_send;
unsigned nreq = 0;
int err = 0;
@@ -1930,7 +2118,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
bail:
spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) {
- if (call_send)
+ /*
+ * Only call do_send if there is exactly one packet, and the
+ * driver said it was ok.
+ */
+ if (nreq == 1 && call_send)
rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
@@ -2465,3 +2657,454 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
rcu_read_unlock();
}
EXPORT_SYMBOL(rvt_qp_iter);
+
+/*
+ * This should be called with s_lock held.
+ */
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ last = qp->s_last;
+ old_last = last;
+ trace_rvt_qp_send_completion(qp, wqe, last);
+ if (++last >= qp->s_size)
+ last = 0;
+ trace_rvt_qp_send_completion(qp, wqe, last);
+ qp->s_last = last;
+ /* See post_send() */
+ barrier();
+ rvt_put_swqe(wqe);
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
+
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ rdi->wc_opcode[wqe->wr.opcode],
+ status);
+
+ if (qp->s_acked == old_last)
+ qp->s_acked = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
+}
+EXPORT_SYMBOL(rvt_send_complete);
+
+/**
+ * rvt_copy_sge - copy data to SGE memory
+ * @qp: associated QP
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ * @release: boolean to release MR
+ * @copy_last: do a separate copy of the last 8 bytes
+ */
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+ void *data, u32 length,
+ bool release, bool copy_last)
+{
+ struct rvt_sge *sge = &ss->sge;
+ int i;
+ bool in_last = false;
+ bool cacheless_copy = false;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ struct rvt_wss *wss = rdi->wss;
+ unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
+
+ if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) {
+ cacheless_copy = length >= PAGE_SIZE;
+ } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) {
+ if (length >= PAGE_SIZE) {
+ /*
+ * NOTE: this *assumes*:
+ * o The first vaddr is the dest.
+ * o If multiple pages, then vaddr is sequential.
+ */
+ wss_insert(wss, sge->vaddr);
+ if (length >= (2 * PAGE_SIZE))
+ wss_insert(wss, (sge->vaddr + PAGE_SIZE));
+
+ cacheless_copy = wss_exceeds_threshold(wss);
+ } else {
+ wss_advance_clean_counter(wss);
+ }
+ }
+
+ if (copy_last) {
+ if (length > 8) {
+ length -= 8;
+ } else {
+ copy_last = false;
+ in_last = true;
+ }
+ }
+
+again:
+ while (length) {
+ u32 len = rvt_get_sge_length(sge, length);
+
+ WARN_ON_ONCE(len == 0);
+ if (unlikely(in_last)) {
+ /* enforce byte transfer ordering */
+ for (i = 0; i < len; i++)
+ ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+ } else if (cacheless_copy) {
+ cacheless_memcpy(sge->vaddr, data, len);
+ } else {
+ memcpy(sge->vaddr, data, len);
+ }
+ rvt_update_sge(ss, len, release);
+ data += len;
+ length -= len;
+ }
+
+ if (copy_last) {
+ copy_last = false;
+ in_last = true;
+ length = 8;
+ goto again;
+ }
+}
+EXPORT_SYMBOL(rvt_copy_sge);
+
+/**
+ * ruc_loopback - handle UC and RC loopback requests
+ * @sqp: the sending QP
+ *
+ * This is called from rvt_do_send() to forward a WQE addressed to the same HFI
+ * Note that although we are single threaded due to the send engine, we still
+ * have to protect against post_send(). We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+void rvt_ruc_loopback(struct rvt_qp *sqp)
+{
+ struct rvt_ibport *rvp = NULL;
+ struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device);
+ struct rvt_qp *qp;
+ struct rvt_swqe *wqe;
+ struct rvt_sge *sge;
+ unsigned long flags;
+ struct ib_wc wc;
+ u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ bool release;
+ int ret;
+ bool copy_last = false;
+ int local_ops = 0;
+
+ rcu_read_lock();
+ rvp = rdi->ports[sqp->port_num - 1];
+
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
+
+ qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp,
+ sqp->remote_qpn);
+
+ spin_lock_irqsave(&sqp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+ !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
+
+ sqp->s_flags |= RVT_S_BUSY;
+
+again:
+ if (sqp->s_last == READ_ONCE(sqp->s_head))
+ goto clr_busy;
+ wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work request. */
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
+ }
+
+ /*
+ * We can rely on the entry not changing without the s_lock
+ * being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
+ */
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+ if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
+ qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+ rvp->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof(wc));
+ send_status = IB_WC_SUCCESS;
+
+ release = true;
+ sqp->s_sge.sge = wqe->sg_list[0];
+ sqp->s_sge.sg_list = wqe->sg_list + 1;
+ sqp->s_sge.num_sge = wqe->wr.num_sge;
+ sqp->s_len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_REG_MR:
+ goto send_comp;
+
+ case IB_WR_LOCAL_INV:
+ if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
+ if (rvt_invalidate_rkey(sqp,
+ wqe->wr.ex.invalidate_rkey))
+ send_status = IB_WC_LOC_PROT_ERR;
+ local_ops = 1;
+ }
+ goto send_comp;
+
+ case IB_WR_SEND_WITH_INV:
+ if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
+ wc.wc_flags = IB_WC_WITH_INVALIDATE;
+ wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
+ }
+ goto send;
+
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ /* FALLTHROUGH */
+ case IB_WR_SEND:
+send:
+ ret = rvt_get_rwqe(qp, false);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ break;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ ret = rvt_get_rwqe(qp, true);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ /* skip copy_last set and qp_access_flags recheck */
+ goto do_write;
+ case IB_WR_RDMA_WRITE:
+ copy_last = rvt_is_user_qp(qp);
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+do_write:
+ if (wqe->length == 0)
+ break;
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.num_sge = 1;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ wqe->rdma_wr.remote_addr,
+ wqe->rdma_wr.rkey,
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
+ release = false;
+ sqp->s_sge.sg_list = NULL;
+ sqp->s_sge.num_sge = 1;
+ qp->r_sge.sge = wqe->sg_list[0];
+ qp->r_sge.sg_list = wqe->sg_list + 1;
+ qp->r_sge.num_sge = wqe->wr.num_sge;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->atomic_wr.remote_addr,
+ wqe->atomic_wr.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
+ sdata = wqe->atomic_wr.compare_add;
+ *(u64 *)sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64)atomic64_add_return(sdata, maddr) - sdata :
+ (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
+ sdata, wqe->atomic_wr.swap);
+ rvt_put_mr(qp->r_sge.sge.mr);
+ qp->r_sge.num_sge = 0;
+ goto send_comp;
+
+ default:
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
+ }
+
+ sge = &sqp->s_sge.sge;
+ while (sqp->s_len) {
+ u32 len = sqp->s_len;
+
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ WARN_ON_ONCE(len == 0);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
+ len, release, copy_last);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (!release)
+ rvt_put_mr(sge->mr);
+ if (--sqp->s_sge.num_sge)
+ *sge = *sqp->s_sge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= RVT_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ sqp->s_len -= len;
+ }
+ if (release)
+ rvt_put_ss(&qp->r_sge);
+
+ if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+ goto send_comp;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ rvp->n_loop_pkts++;
+flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ rvt_send_complete(sqp, wqe, send_status);
+ if (local_ops) {
+ atomic_dec(&sqp->local_ops_pending);
+ local_ops = 0;
+ }
+ goto again;
+
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ rvp->n_rnr_naks++;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
+ goto clr_busy;
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
+ goto clr_busy;
+
+op_err:
+ send_status = IB_WC_REM_OP_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ rvt_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ rvt_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~RVT_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~RVT_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(rvt_ruc_loopback);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 264811fdc530..6d883972e0b8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -66,4 +66,6 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
+int rvt_wss_init(struct rvt_dev_info *rdi);
+void rvt_wss_exit(struct rvt_dev_info *rdi);
#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0ef25fc49f25..d5df352eadb1 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -153,6 +153,48 @@ TRACE_EVENT(
)
);
+TRACE_EVENT(
+ rvt_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
#endif /* __RVT_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 17e4abc067af..723d3daf2eba 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -774,6 +774,13 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
goto bail_no_mr;
}
+ /* Memory Working Set Size */
+ ret = rvt_wss_init(rdi);
+ if (ret) {
+ rvt_pr_err(rdi, "Error in WSS init.\n");
+ goto bail_mr;
+ }
+
/* Completion queues */
spin_lock_init(&rdi->n_cqs_lock);
@@ -828,10 +835,11 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
- ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
+ rdi->driver_f.port_callback);
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
- goto bail_mr;
+ goto bail_wss;
}
rvt_create_mad_agents(rdi);
@@ -839,6 +847,8 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rvt_pr_info(rdi, "Registration with rdmavt done.\n");
return ret;
+bail_wss:
+ rvt_wss_exit(rdi);
bail_mr:
rvt_mr_exit(rdi);
@@ -862,6 +872,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
rvt_free_mad_agents(rdi);
ib_unregister_device(&rdi->ibdev);
+ rvt_wss_exit(rdi);
rvt_mr_exit(rdi);
rvt_qp_exit(rdi);
}
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 10999fa69281..383e65c7bbc0 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -103,7 +103,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
- rxe->attr.atomic_cap = RXE_ATOMIC_CAP;
+ rxe->attr.atomic_cap = IB_ATOMIC_HCA;
rxe->attr.max_ee = RXE_MAX_EE;
rxe->attr.max_rdd = RXE_MAX_RDD;
rxe->attr.max_mw = RXE_MAX_MW;
@@ -128,9 +128,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
/* initialize port attributes */
static int rxe_init_port_param(struct rxe_port *port)
{
- port->attr.state = RXE_PORT_STATE;
- port->attr.max_mtu = RXE_PORT_MAX_MTU;
- port->attr.active_mtu = RXE_PORT_ACTIVE_MTU;
+ port->attr.state = IB_PORT_DOWN;
+ port->attr.max_mtu = IB_MTU_4096;
+ port->attr.active_mtu = IB_MTU_256;
port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
@@ -147,8 +147,7 @@ static int rxe_init_port_param(struct rxe_port *port)
port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
port->attr.phys_state = RXE_PORT_PHYS_STATE;
- port->mtu_cap =
- ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
+ port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256);
port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
return 0;
@@ -300,7 +299,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
mtu = eth_mtu_int_to_enum(ndev_mtu);
/* Make sure that new MTU in range */
- mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256;
+ mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256;
port->attr.active_mtu = mtu;
port->mtu_cap = ib_mtu_enum_to_int(mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 83311dd07019..ea089cb091ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -191,6 +191,7 @@ static inline void reset_retry_counters(struct rxe_qp *qp)
{
qp->comp.retry_cnt = qp->attr.retry_cnt;
qp->comp.rnr_retry = qp->attr.rnr_retry;
+ qp->comp.started_retry = 0;
}
static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -253,6 +254,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
+ /* read retries of partial data may restart from
+ * read response first or response only.
+ */
+ if ((pkt->psn == wqe->first_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) ||
+ (wqe->first_psn == wqe->last_psn &&
+ pkt->opcode ==
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY))
+ break;
+
return COMPST_ERROR;
}
break;
@@ -499,11 +511,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- qp->comp.opcode = -1;
-
- if (pkt) {
- if (psn_compare(pkt->psn, qp->comp.psn) >= 0)
- qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ if (pkt && wqe->state == wqe_state_pending) {
+ if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) {
+ qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK;
+ qp->comp.opcode = -1;
+ }
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
@@ -676,6 +688,20 @@ int rxe_completer(void *arg)
goto exit;
}
+ /* if we've started a retry, don't start another
+ * retry sequence, unless this is a timeout.
+ */
+ if (qp->comp.started_retry &&
+ !qp->comp.timeout_retry) {
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ skb = NULL;
+ }
+
+ goto done;
+ }
+
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
qp->comp.retry_cnt--;
@@ -692,6 +718,7 @@ int rxe_completer(void *arg)
rxe_counter_inc(rxe,
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
+ qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
@@ -701,7 +728,7 @@ int rxe_completer(void *arg)
skb = NULL;
}
- goto exit;
+ goto done;
} else {
rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 2ee4b08b00ea..a57276f2cb84 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -30,7 +30,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context,
cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
if (err) {
- kvfree(cq->queue->buf);
+ vfree(cq->queue->buf);
kfree(cq->queue);
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87d14f7ef21b..afd53f57a62b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -144,8 +144,7 @@ void rxe_loopback(struct sk_buff *skb);
int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc);
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
struct device *rxe_dma_device(struct rxe_dev *rxe);
@@ -196,7 +195,7 @@ static inline int qp_mtu(struct rxe_qp *qp)
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
return qp->attr.path_mtu;
else
- return RXE_PORT_MAX_MTU;
+ return IB_MTU_4096;
}
static inline int rcv_wqe_size(int max_sge)
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index dff605fdf60f..9d3916b93f23 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -573,33 +573,20 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
int index = key >> 8;
- if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) {
- mem = rxe_pool_get_index(&rxe->mr_pool, index);
- if (!mem)
- goto err1;
- } else {
- goto err1;
+ mem = rxe_pool_get_index(&rxe->mr_pool, index);
+ if (!mem)
+ return NULL;
+
+ if (unlikely((type == lookup_local && mem->lkey != key) ||
+ (type == lookup_remote && mem->rkey != key) ||
+ mem->pd != pd ||
+ (access && !(access & mem->access)) ||
+ mem->state != RXE_MEM_STATE_VALID)) {
+ rxe_drop_ref(mem);
+ mem = NULL;
}
- if ((type == lookup_local && mem->lkey != key) ||
- (type == lookup_remote && mem->rkey != key))
- goto err2;
-
- if (mem->pd != pd)
- goto err2;
-
- if (access && !(access & mem->access))
- goto err2;
-
- if (mem->state != RXE_MEM_STATE_VALID)
- goto err2;
-
return mem;
-
-err2:
- rxe_drop_ref(mem);
-err1:
- return NULL;
}
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8094cbaa54a9..40e82e0f6c2d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -72,7 +72,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
spin_lock_bh(&dev_list_lock);
list_for_each_entry(rxe, &rxe_dev_list, list) {
- if (!strcmp(name, rxe->ib_dev.name)) {
+ if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
found = rxe;
break;
}
@@ -182,19 +182,11 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
-static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
+static struct dst_entry *rxe_find_route(struct net_device *ndev,
struct rxe_qp *qp,
struct rxe_av *av)
{
- const struct ib_gid_attr *attr;
struct dst_entry *dst = NULL;
- struct net_device *ndev;
-
- attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
- av->grh.sgid_index);
- if (IS_ERR(attr))
- return NULL;
- ndev = attr->ndev;
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
@@ -229,7 +221,6 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
sk_dst_set(qp->sk->sk, dst);
}
}
- rdma_put_gid_attr(attr);
return dst;
}
@@ -377,8 +368,8 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
-static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, struct rxe_av *av)
+static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+ struct rxe_av *av)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
@@ -387,7 +378,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route(rxe, qp, av);
+ dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -396,8 +387,8 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
- htons(ROCE_V2_UDP_DPORT));
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
@@ -406,15 +397,15 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, struct rxe_av *av)
+static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
+ struct rxe_av *av)
{
struct rxe_qp *qp = pkt->qp;
struct dst_entry *dst;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route(rxe, qp, av);
+ dst = rxe_find_route(skb->dev, qp, av);
if (!dst) {
pr_err("Host not reachable\n");
return -EHOSTUNREACH;
@@ -423,8 +414,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
if (!memcmp(saddr, daddr, sizeof(*daddr)))
pkt->mask |= RXE_LOOPBACK_MASK;
- prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
- htons(ROCE_V2_UDP_DPORT));
+ prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
+ cpu_to_be16(ROCE_V2_UDP_DPORT));
prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
av->grh.traffic_class,
@@ -434,16 +425,15 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
{
int err = 0;
struct rxe_av *av = rxe_get_av(pkt);
if (av->network_type == RDMA_NETWORK_IPV4)
- err = prepare4(rxe, pkt, skb, av);
+ err = prepare4(pkt, skb, av);
else if (av->network_type == RDMA_NETWORK_IPV6)
- err = prepare6(rxe, pkt, skb, av);
+ err = prepare6(pkt, skb, av);
*crc = rxe_icrc_hdr(pkt, skb);
@@ -501,11 +491,6 @@ void rxe_loopback(struct sk_buff *skb)
rxe_rcv(skb);
}
-static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
-{
- return rxe->port.port_guid == av->grh.dgid.global.interface_id;
-}
-
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt)
{
@@ -625,7 +610,7 @@ void rxe_port_up(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
- pr_info("set %s active\n", rxe->ib_dev.name);
+ dev_info(&rxe->ib_dev.dev, "set active\n");
}
/* Caller must hold net_info_lock */
@@ -638,7 +623,7 @@ void rxe_port_down(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
- pr_info("set %s down\n", rxe->ib_dev.name);
+ dev_info(&rxe->ib_dev.dev, "set down\n");
}
static int rxe_notify(struct notifier_block *not_blk,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 4555510d86c4..bdea899a58ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -90,7 +90,6 @@ enum rxe_device_param {
RXE_MAX_RES_RD_ATOM = 0x3f000,
RXE_MAX_QP_INIT_RD_ATOM = 128,
RXE_MAX_EE_INIT_RD_ATOM = 0,
- RXE_ATOMIC_CAP = 1,
RXE_MAX_EE = 0,
RXE_MAX_RDD = 0,
RXE_MAX_MW = 0,
@@ -139,9 +138,6 @@ enum rxe_device_param {
/* default/initial rxe port parameters */
enum rxe_port_param {
- RXE_PORT_STATE = IB_PORT_DOWN,
- RXE_PORT_MAX_MTU = IB_MTU_4096,
- RXE_PORT_ACTIVE_MTU = IB_MTU_256,
RXE_PORT_GID_TBL_LEN = 1024,
RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
RXE_PORT_MAX_MSG_SZ = 0x800000,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b4a8acc7bb7d..36b53fb94a49 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -207,7 +207,7 @@ int rxe_pool_init(
kref_init(&pool->ref_cnt);
- spin_lock_init(&pool->pool_lock);
+ rwlock_init(&pool->pool_lock);
if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
err = rxe_pool_init_index(pool,
@@ -222,7 +222,7 @@ int rxe_pool_init(
pool->key_size = rxe_type_info[type].key_size;
}
- pool->state = rxe_pool_valid;
+ pool->state = RXE_POOL_STATE_VALID;
out:
return err;
@@ -232,7 +232,7 @@ static void rxe_pool_release(struct kref *kref)
{
struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
- pool->state = rxe_pool_invalid;
+ pool->state = RXE_POOL_STATE_INVALID;
kfree(pool->table);
}
@@ -245,12 +245,12 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
- pool->state = rxe_pool_invalid;
+ write_lock_irqsave(&pool->pool_lock, flags);
+ pool->state = RXE_POOL_STATE_INVALID;
if (atomic_read(&pool->num_elem) > 0)
pr_warn("%s pool destroyed with unfree'd elem\n",
pool_name(pool));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
@@ -336,10 +336,10 @@ void rxe_add_key(void *arg, void *key)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
insert_key(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_key(void *arg)
@@ -348,9 +348,9 @@ void rxe_drop_key(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_add_index(void *arg)
@@ -359,10 +359,10 @@ void rxe_add_index(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
elem->index = alloc_index(pool);
insert_index(pool, elem);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void rxe_drop_index(void *arg)
@@ -371,10 +371,10 @@ void rxe_drop_index(void *arg)
struct rxe_pool *pool = elem->pool;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ write_lock_irqsave(&pool->pool_lock, flags);
clear_bit(elem->index - pool->min_index, pool->table);
rb_erase(&elem->node, &pool->tree);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ write_unlock_irqrestore(&pool->pool_lock, flags);
}
void *rxe_alloc(struct rxe_pool *pool)
@@ -384,13 +384,13 @@ void *rxe_alloc(struct rxe_pool *pool)
might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
- spin_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
+ if (pool->state != RXE_POOL_STATE_VALID) {
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return NULL;
}
kref_get(&pool->ref_cnt);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
kref_get(&pool->rxe->ref_cnt);
@@ -436,9 +436,9 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
struct rxe_pool_entry *elem = NULL;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -450,15 +450,14 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
node = node->rb_left;
else if (elem->index < index)
node = node->rb_right;
- else
+ else {
+ kref_get(&elem->ref_cnt);
break;
+ }
}
- if (node)
- kref_get(&elem->ref_cnt);
-
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
@@ -469,9 +468,9 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
int cmp;
unsigned long flags;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ read_lock_irqsave(&pool->pool_lock, flags);
- if (pool->state != rxe_pool_valid)
+ if (pool->state != RXE_POOL_STATE_VALID)
goto out;
node = pool->tree.rb_node;
@@ -494,6 +493,6 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
kref_get(&elem->ref_cnt);
out:
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ read_unlock_irqrestore(&pool->pool_lock, flags);
return node ? elem : NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 47df28e43acf..aa4ba307097b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -74,8 +74,8 @@ struct rxe_type_info {
extern struct rxe_type_info rxe_type_info[];
enum rxe_pool_state {
- rxe_pool_invalid,
- rxe_pool_valid,
+ RXE_POOL_STATE_INVALID,
+ RXE_POOL_STATE_VALID,
};
struct rxe_pool_entry {
@@ -90,7 +90,7 @@ struct rxe_pool_entry {
struct rxe_pool {
struct rxe_dev *rxe;
- spinlock_t pool_lock; /* pool spinlock */
+ rwlock_t pool_lock; /* protects pool add/del/search */
size_t elem_size;
struct kref ref_cnt;
void (*cleanup)(struct rxe_pool_entry *obj);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c58452daffc7..b9710907dac2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -34,6 +34,7 @@
#include <linux/skbuff.h>
#include <linux/delay.h>
#include <linux/sched.h>
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
@@ -227,6 +228,16 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
qp->sk->sk->sk_user_data = qp;
+ /* pick a source UDP port number for this QP based on
+ * the source QPN. this spreads traffic for different QPs
+ * across different NIC RX queues (while using a single
+ * flow for a given QP to maintain packet order).
+ * the port number must be in the Dynamic Ports range
+ * (0xc000 - 0xffff).
+ */
+ qp->src_port = RXE_ROCE_V2_SPORT +
+ (hash_32_generic(qp_num(qp), 14) & 0x3fff);
+
qp->sq.max_wr = init->cap.max_send_wr;
qp->sq.max_sge = init->cap.max_send_sge;
qp->sq.max_inline = init->cap.max_inline_data;
@@ -247,7 +258,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
&qp->sq.queue->ip);
if (err) {
- kvfree(qp->sq.queue->buf);
+ vfree(qp->sq.queue->buf);
kfree(qp->sq.queue);
return err;
}
@@ -300,7 +311,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->rq.queue->buf, qp->rq.queue->buf_size,
&qp->rq.queue->ip);
if (err) {
- kvfree(qp->rq.queue->buf);
+ vfree(qp->rq.queue->buf);
kfree(qp->rq.queue);
return err;
}
@@ -408,8 +419,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
pr_warn("invalid mask or state for qp\n");
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index d30dbac24583..5c29a1bb575a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -122,7 +122,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
set_bad_pkey_cntr(port);
goto err1;
}
- } else if (qpn != 0) {
+ } else {
if (unlikely(!pkey_match(pkey,
port->pkey_tbl[qp->attr.pkey_index]
))) {
@@ -134,7 +134,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
}
if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
- qpn != 0 && pkt->mask) {
+ pkt->mask) {
u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
if (unlikely(deth_qkey(pkt) != qkey)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 8be27238a86e..6c361d70d7cd 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp)
int npsn;
int first = 1;
- wqe = queue_head(qp->sq.queue);
- npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
-
qp->req.wqe_index = consumer_index(qp->sq.queue);
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
@@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp)
if (first) {
first = 0;
- if (mask & WR_WRITE_OR_SEND_MASK)
+ if (mask & WR_WRITE_OR_SEND_MASK) {
+ npsn = (qp->comp.psn - wqe->first_psn) &
+ BTH_PSN_MASK;
retry_first_write_send(qp, wqe, mask, npsn);
+ }
- if (mask & WR_READ_MASK)
+ if (mask & WR_READ_MASK) {
+ npsn = (wqe->dma.length - wqe->dma.resid) /
+ qp->mtu;
wqe->iova += npsn * qp->mtu;
+ }
}
wqe->state = wqe_state_posted;
@@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_RETH_MASK) {
reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
reth_set_va(pkt, wqe->iova);
- reth_set_len(pkt, wqe->dma.length);
+ reth_set_len(pkt, wqe->dma.resid);
}
if (pkt->mask & RXE_IMMDT_MASK)
@@ -476,7 +479,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 *p;
int err;
- err = rxe_prepare(rxe, pkt, skb, &crc);
+ err = rxe_prepare(pkt, skb, &crc);
if (err)
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index aa5833318372..c962160292f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
if (ack->mask & RXE_ATMACK_MASK)
atmack_set_orig(ack, qp->resp.atomic_orig);
- err = rxe_prepare(rxe, ack, skb, &crc);
+ err = rxe_prepare(ack, skb, &crc);
if (err) {
kfree_skb(skb);
return NULL;
@@ -682,6 +682,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
rxe_advance_resp_resource(qp);
res->type = RXE_READ_MASK;
+ res->replay = 0;
res->read.va = qp->resp.va;
res->read.va_org = qp->resp.va;
@@ -752,7 +753,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
state = RESPST_DONE;
} else {
qp->resp.res = NULL;
- qp->resp.opcode = -1;
+ if (!res->replay)
+ qp->resp.opcode = -1;
if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
qp->resp.psn = res->cur_psn;
state = RESPST_CLEANUP;
@@ -814,6 +816,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
@@ -1065,7 +1068,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
enum resp_states rc;
- u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK;
+ u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
if (pkt->mask & RXE_SEND_MASK ||
pkt->mask & RXE_WRITE_MASK) {
@@ -1108,6 +1111,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->state = (pkt->psn == res->first_psn) ?
rdatm_res_state_new :
rdatm_res_state_replay;
+ res->replay = 1;
/* Reset the resource, except length. */
res->read.va_org = iova;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 0d6c04ba7fc3..c41a5fee81f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -129,13 +130,18 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf,
q->buf_size, &q->ip);
- if (err)
+ if (err) {
+ vfree(q->buf);
+ kfree(q);
return err;
+ }
if (uresp) {
if (copy_to_user(&uresp->srq_num, &srq->srq_num,
- sizeof(uresp->srq_num)))
+ sizeof(uresp->srq_num))) {
+ rxe_queue_cleanup(q);
return -EFAULT;
+ }
}
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index d5ed7571128f..73a19f808e1b 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -105,7 +105,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
}
rxe_set_port_state(ndev);
- pr_info("added %s to %s\n", rxe->ib_dev.name, intf);
+ dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
err:
if (ndev)
dev_put(ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f5b1e0ad6142..9c19f2027511 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1148,18 +1148,21 @@ static ssize_t parent_show(struct device *device,
static DEVICE_ATTR_RO(parent);
-static struct device_attribute *rxe_dev_attributes[] = {
- &dev_attr_parent,
+static struct attribute *rxe_dev_attributes[] = {
+ &dev_attr_parent.attr,
+ NULL
+};
+
+static const struct attribute_group rxe_attr_group = {
+ .attrs = rxe_dev_attributes,
};
int rxe_register_device(struct rxe_dev *rxe)
{
int err;
- int i;
struct ib_device *dev = &rxe->ib_dev;
struct crypto_shash *tfm;
- strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
dev->owner = THIS_MODULE;
@@ -1260,26 +1263,16 @@ int rxe_register_device(struct rxe_dev *rxe)
}
rxe->tfm = tfm;
+ rdma_set_device_sysfs_group(dev, &rxe_attr_group);
dev->driver_id = RDMA_DRIVER_RXE;
- err = ib_register_device(dev, NULL);
+ err = ib_register_device(dev, "rxe%d", NULL);
if (err) {
pr_warn("%s failed with error %d\n", __func__, err);
goto err1;
}
- for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
- err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
- if (err) {
- pr_warn("%s failed with error %d for attr number %d\n",
- __func__, err, i);
- goto err2;
- }
- }
-
return 0;
-err2:
- ib_unregister_device(dev);
err1:
crypto_free_shash(rxe->tfm);
@@ -1288,12 +1281,8 @@ err1:
int rxe_unregister_device(struct rxe_dev *rxe)
{
- int i;
struct ib_device *dev = &rxe->ib_dev;
- for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
- device_remove_file(&dev->dev, rxe_dev_attributes[i]);
-
ib_unregister_device(dev);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index af1470d29391..82e670d6eeea 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -158,6 +158,7 @@ struct rxe_comp_info {
int opcode;
int timeout;
int timeout_retry;
+ int started_retry;
u32 retry_cnt;
u32 rnr_retry;
struct rxe_task task;
@@ -171,6 +172,7 @@ enum rdatm_res_state {
struct resp_res {
int type;
+ int replay;
u32 first_psn;
u32 last_psn;
u32 cur_psn;
@@ -195,6 +197,7 @@ struct rxe_resp_info {
enum rxe_qp_state state;
u32 msn;
u32 psn;
+ u32 ack_psn;
int opcode;
int drop_msg;
int goto_error;
@@ -248,6 +251,7 @@ struct rxe_qp {
struct socket *sk;
u32 dst_cookie;
+ u16 src_port;
struct rxe_av pri_av;
struct rxe_av alt_av;