aboutsummaryrefslogtreecommitdiffstats
path: root/include/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib.h2
-rw-r--r--include/rdma/ib_umem.h2
-rw-r--r--include/rdma/ib_umem_odp.h58
-rw-r--r--include/rdma/ib_verbs.h85
-rw-r--r--include/rdma/iw_portmap.h3
-rw-r--r--include/rdma/opa_port_info.h2
-rw-r--r--include/rdma/rdma_netlink.h10
-rw-r--r--include/rdma/rdma_vt.h1
-rw-r--r--include/rdma/rdmavt_cq.h1
-rw-r--r--include/rdma/rdmavt_qp.h44
-rw-r--r--include/rdma/restrack.h3
-rw-r--r--include/rdma/signature.h2
12 files changed, 168 insertions, 45 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 4f385ec54f80..fe2fc9e91588 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -36,6 +36,8 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
struct ib_addr {
union {
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 1052d0d62be7..a91b2af64ec4 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,7 +42,7 @@ struct ib_ucontext;
struct ib_umem_odp;
struct ib_umem {
- struct ib_ucontext *context;
+ struct ib_device *ibdev;
struct mm_struct *owning_mm;
size_t length;
unsigned long address;
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 479db5c98ff6..253df1a1fa54 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -37,11 +37,6 @@
#include <rdma/ib_verbs.h>
#include <linux/interval_tree.h>
-struct umem_odp_node {
- u64 __subtree_last;
- struct rb_node rb;
-};
-
struct ib_umem_odp {
struct ib_umem umem;
struct ib_ucontext_per_mm *per_mm;
@@ -72,7 +67,15 @@ struct ib_umem_odp {
int npages;
/* Tree tracking */
- struct umem_odp_node interval_tree;
+ struct interval_tree_node interval_tree;
+
+ /*
+ * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+ * only as an anchor for the driver to hold onto the per_mm. FIXME:
+ * This should be removed and drivers should work with the per_mm
+ * directly.
+ */
+ bool is_implicit_odp;
struct completion notifier_completion;
int dying;
@@ -88,14 +91,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
/* Returns the first page of an ODP umem. */
static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
{
- return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift);
+ return umem_odp->interval_tree.start;
}
/* Returns the address of the page after the last one of an ODP umem. */
static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
{
- return ALIGN(umem_odp->umem.address + umem_odp->umem.length,
- 1UL << umem_odp->page_shift);
+ return umem_odp->interval_tree.last + 1;
}
static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
@@ -120,25 +122,20 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_ucontext_per_mm {
- struct ib_ucontext *context;
- struct mm_struct *mm;
+ struct mmu_notifier mn;
struct pid *tgid;
- bool active;
struct rb_root_cached umem_tree;
/* Protects umem_tree */
struct rw_semaphore umem_rwsem;
-
- struct mmu_notifier mn;
- unsigned int odp_mrs_count;
-
- struct list_head ucontext_list;
- struct rcu_head rcu;
};
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem,
- unsigned long addr, size_t size);
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+ size_t size, int access);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+ int access);
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
+ unsigned long addr, size_t size);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
@@ -163,8 +160,17 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
* Find first region intersecting with address range.
* Return NULL if not found
*/
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
- u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+ struct interval_tree_node *node;
+
+ node = interval_tree_iter_first(root, addr, addr + length - 1);
+ if (!node)
+ return NULL;
+ return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq)
@@ -185,9 +191,11 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
+ unsigned long addr,
+ size_t size, int access)
{
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c5f8a9f17063..6a47ba85c54c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -98,15 +98,54 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
#if defined(CONFIG_DYNAMIC_DEBUG)
#define ibdev_dbg(__dev, format, args...) \
dynamic_ibdev_dbg(__dev, format, ##args)
-#elif defined(DEBUG)
-#define ibdev_dbg(__dev, format, args...) \
- ibdev_printk(KERN_DEBUG, __dev, format, ##args)
#else
__printf(2, 3) __cold
static inline
void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
#endif
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
union ib_gid {
u8 raw[16];
struct {
@@ -451,6 +490,16 @@ enum ib_port_state {
IB_PORT_ACTIVE_DEFER = 5
};
+enum ib_port_phys_state {
+ IB_PORT_PHYS_STATE_SLEEP = 1,
+ IB_PORT_PHYS_STATE_POLLING = 2,
+ IB_PORT_PHYS_STATE_DISABLED = 3,
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_2X = 16,
@@ -1417,11 +1466,6 @@ struct ib_ucontext {
bool cleanup_retryable;
- void (*invalidate_range)(struct ib_umem_odp *umem_odp,
- unsigned long start, unsigned long end);
- struct mutex per_mm_list_lock;
- struct list_head per_mm_list;
-
struct ib_rdmacg_object cg_obj;
/*
* Implementation details of the RDMA core, don't use in drivers:
@@ -2378,6 +2422,8 @@ struct ib_device_ops {
u64 iova);
int (*unmap_fmr)(struct list_head *fmr_list);
int (*dealloc_fmr)(struct ib_fmr *fmr);
+ void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+ unsigned long start, unsigned long end);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
@@ -2647,7 +2693,9 @@ struct ib_client {
const union ib_gid *gid,
const struct sockaddr *addr,
void *client_data);
- struct list_head list;
+
+ refcount_t uses;
+ struct completion uses_zero;
u32 client_id;
/* kverbs are not required by the client */
@@ -3711,6 +3759,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
NULL);
}
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+ void *private, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+ KBUILD_MODNAME);
+}
+
/**
* ib_free_cq_user - Free kernel/user CQ
* @cq: The CQ to free
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index b9fee7feeeb5..c89535047c42 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -33,6 +33,9 @@
#ifndef _IW_PORTMAP_H
#define _IW_PORTMAP_H
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
#define IWPM_ULIBNAME_SIZE 32
#define IWPM_DEVNAME_SIZE 32
#define IWPM_IFNAME_SIZE 16
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index 7147a9263011..bdbfe25d3854 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -33,6 +33,8 @@
#if !defined(OPA_PORT_INFO_H)
#define OPA_PORT_INFO_H
+#include <rdma/opa_smi.h>
+
#define OPA_PORT_LINK_MODE_NOP 0 /* No change */
#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 6631624e4d7c..ab22759de7ea 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -76,28 +76,32 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
/**
* Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @pid: Userspace netlink process ID
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid);
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
/**
* Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @pid: Userspace netlink process ID
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid);
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
/**
* Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @group: Netlink group ID
* @flags: allocation flags
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+ unsigned int group, gfp_t flags);
/**
* Check if there are any listeners to the netlink group
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 525848e227dc..ac5a9430abb6 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -116,6 +116,7 @@ struct rvt_ibport {
u64 n_unaligned;
u64 n_rc_dupreq;
u64 n_rc_seqnak;
+ u64 n_rc_crwaits;
u16 pkey_violations;
u16 qkey_violations;
u16 mkey_violations;
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 04c519ef6d71..574eb7278f46 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -53,6 +53,7 @@
#include <linux/kthread.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 0eeea520a853..b550ae89bf85 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -608,7 +608,7 @@ static inline void rvt_qp_wqe_reserve(
/**
* rvt_qp_wqe_unreserve - clean reserved operation
* @qp - the rvt qp
- * @wqe - the send wqe
+ * @flags - send wqe flags
*
* This decrements the reserve use count.
*
@@ -620,11 +620,9 @@ static inline void rvt_qp_wqe_reserve(
* the compiler does not juggle the order of the s_last
* ring index and the decrementing of s_reserved_used.
*/
-static inline void rvt_qp_wqe_unreserve(
- struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
{
- if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+ if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
atomic_dec(&qp->s_reserved_used);
/* insure no compiler re-order up to s_last change */
smp_mb__after_atomic();
@@ -853,6 +851,7 @@ rvt_qp_complete_swqe(struct rvt_qp *qp,
u32 byte_len, last;
int flags = wqe->wr.send_flags;
+ rvt_qp_wqe_unreserve(qp, flags);
rvt_put_qp_swqe(qp, wqe);
need_completion =
@@ -974,6 +973,41 @@ static inline void rvt_free_rq(struct rvt_rq *rq)
rq->wq = NULL;
}
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ lockdep_assert_held(&qp->s_lock);
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ rvp->n_rc_crwaits++;
+ return false;
+ }
+ return true;
+}
+
struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
u64 v,
void (*cb)(struct rvt_qp *qp, u64 v));
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index b0fc6b26bdf5..83df1ec6664e 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -105,8 +105,7 @@ struct rdma_restrack_entry {
};
int rdma_restrack_count(struct ib_device *dev,
- enum rdma_restrack_type type,
- struct pid_namespace *ns);
+ enum rdma_restrack_type type);
void rdma_restrack_kadd(struct rdma_restrack_entry *res);
void rdma_restrack_uadd(struct rdma_restrack_entry *res);
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
index f24cc2a1d3c5..d16b0fcc8344 100644
--- a/include/rdma/signature.h
+++ b/include/rdma/signature.h
@@ -6,6 +6,8 @@
#ifndef _RDMA_SIGNATURE_H_
#define _RDMA_SIGNATURE_H_
+#include <linux/types.h>
+
enum ib_signature_prot_cap {
IB_PROT_T10DIF_TYPE_1 = 1,
IB_PROT_T10DIF_TYPE_2 = 1 << 1,