aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/include/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib.h2
-rw-r--r--include/rdma/ib_addr.h24
-rw-r--r--include/rdma/ib_cache.h21
-rw-r--r--include/rdma/ib_cm.h13
-rw-r--r--include/rdma/ib_hdrs.h6
-rw-r--r--include/rdma/ib_mad.h32
-rw-r--r--include/rdma/ib_pack.h5
-rw-r--r--include/rdma/ib_sa.h44
-rw-r--r--include/rdma/ib_smi.h12
-rw-r--r--include/rdma/ib_sysfs.h37
-rw-r--r--include/rdma/ib_umem.h160
-rw-r--r--include/rdma/ib_umem_odp.h21
-rw-r--r--include/rdma/ib_verbs.h1073
-rw-r--r--include/rdma/iw_cm.h22
-rw-r--r--include/rdma/opa_vnic.h5
-rw-r--r--include/rdma/rdma_cm.h77
-rw-r--r--include/rdma/rdma_counter.h21
-rw-r--r--include/rdma/rdma_netlink.h2
-rw-r--r--include/rdma/rdma_vt.h20
-rw-r--r--include/rdma/rdmavt_qp.h3
-rw-r--r--include/rdma/restrack.h56
-rw-r--r--include/rdma/rw.h18
-rw-r--r--include/rdma/uverbs_ioctl.h139
-rw-r--r--include/rdma/uverbs_named_ioctl.h2
-rw-r--r--include/rdma/uverbs_types.h9
25 files changed, 1040 insertions, 784 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 83139b9ce409..f7c185ff7a11 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -75,7 +75,7 @@ struct sockaddr_ib {
*/
static inline bool ib_safe_file_access(struct file *filp)
{
- return filp->f_cred == current_cred() && !uaccess_kernel();
+ return filp->f_cred == current_cred();
}
#endif /* _RDMA_IB_H */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index b0e636ac6690..811a0f11d0db 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -7,6 +7,7 @@
#ifndef IB_ADDR_H
#define IB_ADDR_H
+#include <linux/ethtool.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
@@ -193,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
return 0;
}
-static inline int iboe_get_rate(struct net_device *dev)
-{
- struct ethtool_link_ksettings cmd;
- int err;
-
- rtnl_lock();
- err = __ethtool_get_link_ksettings(dev, &cmd);
- rtnl_unlock();
- if (err)
- return IB_RATE_PORT_CURRENT;
-
- if (cmd.base.speed >= 40000)
- return IB_RATE_40_GBPS;
- else if (cmd.base.speed >= 30000)
- return IB_RATE_30_GBPS;
- else if (cmd.base.speed >= 20000)
- return IB_RATE_20_GBPS;
- else if (cmd.base.speed >= 10000)
- return IB_RATE_10_GBPS;
- else
- return IB_RATE_PORT_CURRENT;
-}
-
static inline int rdma_link_local_addr(struct in6_addr *addr)
{
if (addr->s6_addr32[0] == htonl(0xfe800000) &&
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 66a8f369a2fa..226ae3702d8a 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -10,7 +10,7 @@
#include <rdma/ib_verbs.h>
-int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
+int rdma_query_gid(struct ib_device *device, u32 port_num, int index,
union ib_gid *gid);
void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr);
const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
@@ -20,10 +20,10 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
- u8 port,
+ u32 port,
struct net_device *ndev);
const struct ib_gid_attr *rdma_find_gid_by_filter(
- struct ib_device *device, const union ib_gid *gid, u8 port_num,
+ struct ib_device *device, const union ib_gid *gid, u32 port_num,
bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
void *),
void *context);
@@ -43,7 +43,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
* the local software cache.
*/
int ib_get_cached_pkey(struct ib_device *device_handle,
- u8 port_num,
+ u32 port_num,
int index,
u16 *pkey);
@@ -59,7 +59,7 @@ int ib_get_cached_pkey(struct ib_device *device_handle,
* the local software cache.
*/
int ib_find_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u16 pkey,
u16 *index);
@@ -75,7 +75,7 @@ int ib_find_cached_pkey(struct ib_device *device,
* the local software cache.
*/
int ib_find_exact_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u16 pkey,
u16 *index);
@@ -89,7 +89,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
* the local software cache.
*/
int ib_get_cached_lmc(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u8 *lmc);
/**
@@ -102,13 +102,16 @@ int ib_get_cached_lmc(struct ib_device *device,
* the local software cache.
*/
int ib_get_cached_port_state(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_port_state *port_active);
bool rdma_is_zero_gid(const union ib_gid *gid);
const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
- u8 port_num, int index);
+ u32 port_num, int index);
void rdma_put_gid_attr(const struct ib_gid_attr *attr);
void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries);
#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 382427add677..a2ac62b4a6cf 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -14,9 +14,6 @@
#include <rdma/ib_sa.h>
#include <rdma/rdma_cm.h>
-/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
-extern struct class cm_class;
-
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
@@ -297,7 +294,6 @@ struct ib_cm_id {
void *context;
struct ib_device *device;
__be64 service_id;
- __be64 service_mask;
enum ib_cm_state state; /* internal CM/debug use */
enum ib_cm_lap_state lap_state; /* internal CM/debug use */
__be32 local_id;
@@ -343,13 +339,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
- __be64 service_mask);
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id);
struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
@@ -357,6 +348,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
struct ib_cm_req_param {
struct sa_path_rec *primary_path;
+ struct sa_path_rec *primary_path_inbound;
+ struct sa_path_rec *primary_path_outbound;
struct sa_path_rec *alternate_path;
const struct ib_gid_attr *ppath_sgid_attr;
__be64 service_id;
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 57c1ac881d08..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -206,11 +206,6 @@ static inline u8 ib_get_lver(struct ib_header *hdr)
IB_LVER_MASK);
}
-static inline u16 ib_get_len(struct ib_header *hdr)
-{
- return (u16)(be16_to_cpu(hdr->lrh[2]));
-}
-
static inline u32 ib_get_qkey(struct ib_other_headers *ohdr)
{
return be32_to_cpu(ohdr->u.ud.deth[0]);
@@ -237,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
+#define IB_BTH_OPCODE_CNP 0x81
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 8dfb1ddf345a..3f1b58d8b4bf 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -276,6 +276,9 @@ enum ib_port_capability_mask2_bits {
IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3,
IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4,
IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5,
+ IB_PORT_LINK_SPEED_NDR_SUP = 1 << 10,
+ IB_PORT_EXTENDED_SPEEDS2_SUP = 1 << 11,
+ IB_PORT_LINK_SPEED_XDR_SUP = 1 << 12,
};
#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
@@ -668,7 +671,7 @@ struct ib_mad_reg_req {
* @registration_flags: Registration flags to set for this agent
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
@@ -718,27 +721,26 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
/**
- * ib_cancel_mad - Cancels an outstanding send MAD operation.
- * @mad_agent: Specifies the registration associated with sent MAD.
- * @send_buf: Indicates the MAD to cancel.
- *
- * MADs will be returned to the user through the corresponding
- * ib_mad_send_handler.
- */
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf);
-
-/**
* ib_modify_mad - Modifies an outstanding send MAD operation.
- * @mad_agent: Specifies the registration associated with sent MAD.
* @send_buf: Indicates the MAD to modify.
* @timeout_ms: New timeout value for sent MAD.
*
* This call will reset the timeout value for a sent MAD to the specified
* value.
*/
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+
+/**
+ * ib_cancel_mad - Cancels an outstanding send MAD operation.
+ * @send_buf: Indicates the MAD to cancel.
+ *
+ * MADs will be returned to the user through the corresponding
+ * ib_mad_send_handler.
+ */
+static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf)
+{
+ ib_modify_mad(send_buf, 0);
+}
/**
* ib_create_send_mad - Allocate and initialize a data buffer and work request
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a9162f25beaf..b8c56d7dc35d 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -84,6 +84,8 @@ enum {
/* opcode 0x15 is reserved */
IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
+ IB_OPCODE_FLUSH = 0x1C,
+ IB_OPCODE_ATOMIC_WRITE = 0x1D,
/* real constants follow -- see comment about above IB_OPCODE()
macro for more details */
@@ -112,6 +114,8 @@ enum {
IB_OPCODE(RC, FETCH_ADD),
IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+ IB_OPCODE(RC, FLUSH),
+ IB_OPCODE(RC, ATOMIC_WRITE),
/* UC */
IB_OPCODE(UC, SEND_FIRST),
@@ -149,6 +153,7 @@ enum {
IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
IB_OPCODE(RD, COMPARE_SWAP),
IB_OPCODE(RD, FETCH_ADD),
+ IB_OPCODE(RD, FLUSH),
/* UD */
IB_OPCODE(UD, SEND_ONLY),
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 693285e76f13..b46353fc53bf 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -186,6 +186,7 @@ struct sa_path_rec {
struct sa_path_rec_opa opa;
};
enum sa_path_rec_type rec_type;
+ u32 flags;
};
static inline enum ib_gid_type
@@ -366,20 +367,6 @@ struct ib_sa_mcmember_rec {
#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF
-struct ib_sa_service_rec {
- u64 id;
- union ib_gid gid;
- __be16 pkey;
- /* reserved */
- u32 lease;
- u8 key[16];
- u8 name[64];
- u8 data8[16];
- u16 data16[8];
- u32 data32[4];
- u64 data64[2];
-};
-
#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
@@ -423,23 +410,13 @@ struct ib_sa_query;
void ib_sa_cancel_query(int id, struct ib_sa_query *query);
int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
- u8 port_num, struct sa_path_rec *rec,
+ u32 port_num, struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
gfp_t gfp_mask,
void (*callback)(int status, struct sa_path_rec *resp,
- void *context),
+ unsigned int num_prs, void *context),
void *context, struct ib_sa_query **query);
-int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
- gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context, struct ib_sa_query **sa_query);
-
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
ib_sa_comp_mask comp_mask;
@@ -477,7 +454,8 @@ struct ib_sa_multicast {
* group, and the user must rejoin the group to continue using it.
*/
struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device,
+ u32 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
@@ -506,20 +484,20 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
* @mgid: MGID of multicast group.
* @rec: Location to copy SA multicast member record.
*/
-int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
/**
* ib_init_ah_from_mcmember - Initialize address handle attributes based on
* an SA multicast member record.
*/
-int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct rdma_ah_attr *ah_attr);
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *sgid_attr);
@@ -538,7 +516,7 @@ void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec);
/* Support GuidInfoRecord */
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
unsigned long timeout_ms, gfp_t gfp_mask,
@@ -547,10 +525,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context),
void *context, struct ib_sa_query **sa_query);
-bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num);
-
static inline bool sa_path_is_roce(struct sa_path_rec *rec)
{
return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) ||
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index fdb8633cbaff..fc16b826b2c1 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -144,5 +144,15 @@ ib_get_smp_direction(struct ib_smp *smp)
#define IB_NOTICE_TRAP_DR_NOTICE 0x80
#define IB_NOTICE_TRAP_DR_TRUNC 0x40
-
+/**
+ * ib_init_query_mad - Initialize query MAD.
+ * @mad: MAD to initialize.
+ */
+static inline void ib_init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = IB_MGMT_BASE_VERSION;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
#endif /* IB_SMI_H */
diff --git a/include/rdma/ib_sysfs.h b/include/rdma/ib_sysfs.h
new file mode 100644
index 000000000000..3b77cfd74d9a
--- /dev/null
+++ b/include/rdma/ib_sysfs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (c) 2021 Mellanox Technologies Ltd. All rights reserved.
+ */
+#ifndef DEF_RDMA_IB_SYSFS_H
+#define DEF_RDMA_IB_SYSFS_H
+
+#include <linux/sysfs.h>
+
+struct ib_device;
+
+struct ib_port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, const char *buf,
+ size_t count);
+};
+
+#define IB_PORT_ATTR_RW(_name) \
+ struct ib_port_attribute ib_port_attr_##_name = __ATTR_RW(_name)
+
+#define IB_PORT_ATTR_ADMIN_RW(_name) \
+ struct ib_port_attribute ib_port_attr_##_name = \
+ __ATTR_RW_MODE(_name, 0600)
+
+#define IB_PORT_ATTR_RO(_name) \
+ struct ib_port_attribute ib_port_attr_##_name = __ATTR_RO(_name)
+
+#define IB_PORT_ATTR_WO(_name) \
+ struct ib_port_attribute ib_port_attr_##_name = __ATTR_WO(_name)
+
+struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj,
+ u32 *port_num);
+
+#endif
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 71f573a418bf..565a85044541 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
*/
#ifndef IB_UMEM_H
@@ -13,45 +14,146 @@
struct ib_ucontext;
struct ib_umem_odp;
+struct dma_buf_attach_ops;
struct ib_umem {
struct ib_device *ibdev;
struct mm_struct *owning_mm;
+ u64 iova;
size_t length;
unsigned long address;
u32 writable : 1;
u32 is_odp : 1;
- struct work_struct work;
- struct sg_table sg_head;
- int nmap;
- unsigned int sg_nents;
+ u32 is_dmabuf : 1;
+ struct sg_append_table sgt_append;
};
+struct ib_umem_dmabuf {
+ struct ib_umem umem;
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+ struct scatterlist *first_sg;
+ struct scatterlist *last_sg;
+ unsigned long first_sg_offset;
+ unsigned long last_sg_trim;
+ void *private;
+ u8 pinned : 1;
+};
+
+static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem)
+{
+ return container_of(umem, struct ib_umem_dmabuf, umem);
+}
+
/* Returns the offset of the umem start relative to the first page. */
static inline int ib_umem_offset(struct ib_umem *umem)
{
return umem->address & ~PAGE_MASK;
}
+static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) &
+ (pgsz - 1);
+}
+
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+ ALIGN_DOWN(umem->iova, pgsz))) /
+ pgsz;
+}
+
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
- ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
- PAGE_SHIFT;
+ return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+}
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+ struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl,
+ umem->sgt_append.sgt.nents, pgsz);
+ biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1);
+ biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz);
}
+static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter)
+{
+ return __rdma_block_iter_next(biter) && biter->__sg_numblocks--;
+}
+
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \
+ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \
+ __rdma_umem_block_iter_next(biter);)
+
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access);
void ib_umem_release(struct ib_umem *umem);
-int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt);
+/**
+ * ib_umem_find_best_pgoff - Find best HW page size
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap bitmap of HW supported page sizes
+ * @pgoff_bitmask: Mask of bits that can be represented with an offset
+ *
+ * This is very similar to ib_umem_find_best_pgsz() except instead of accepting
+ * an IOVA it accepts a bitmask specifying what address bits can be represented
+ * with a page offset.
+ *
+ * For instance if the HW has multiple page sizes, requires 64 byte alignemnt,
+ * and can support aligned offsets up to 4032 then pgoff_bitmask would be
+ * "111111000000".
+ *
+ * If the pgoff_bitmask requires either alignment in the low bit or an
+ * unavailable page size for the high bits, this function returns 0.
+ */
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ struct scatterlist *sg = umem->sgt_append.sgt.sgl;
+ dma_addr_t dma_addr;
+
+ dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
+ return ib_umem_find_best_pgsz(umem, pgsz_bitmap,
+ dma_addr & pgoff_bitmask);
+}
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops);
+struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access);
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf);
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf);
+
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
@@ -60,20 +162,46 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device,
unsigned long addr, size_t size,
int access)
{
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
-static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length) {
- return -EINVAL;
+ return -EOPNOTSUPP;
}
-static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
- unsigned long pgsz_bitmap,
- unsigned long virt) {
- return -EINVAL;
+static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt)
+{
+ return 0;
}
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ return 0;
+}
+static inline
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access,
+ struct dma_buf_attach_ops *ops)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+static inline struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset,
+ size_t size, int fd, int access)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ return -EOPNOTSUPP;
+}
+static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { }
+static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { }
#endif /* CONFIG_INFINIBAND_USER_MEM */
-
#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index d16d2c17e733..0844c1d05ac6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -14,17 +14,13 @@ struct ib_umem_odp {
struct mmu_interval_notifier notifier;
struct pid *tgid;
+ /* An array of the pfns included in the on-demand paging umem. */
+ unsigned long *pfn_list;
+
/*
- * An array of the pages included in the on-demand paging umem.
- * Indices of pages that are currently not mapped into the device will
- * contain NULL.
- */
- struct page **page_list;
- /*
- * An array of the same size as page_list, with DMA addresses mapped
- * for pages the pages in page_list. The lower two bits designate
- * access permissions. See ODP_READ_ALLOWED_BIT and
- * ODP_WRITE_ALLOWED_BIT.
+ * An array with DMA addresses mapped for pfns in pfn_list.
+ * The lower two bits designate access permissions.
+ * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
*/
dma_addr_t *dma_list;
/*
@@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq);
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask, bool fault);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c0b2fa7e9b95..477bf9dd5e71 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
- * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -12,6 +12,7 @@
#ifndef IB_VERBS_H
#define IB_VERBS_H
+#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
@@ -49,6 +50,8 @@ struct ib_uqp_object;
struct ib_usrq_object;
struct ib_uwq_object;
struct rdma_cm_id;
+struct ib_port;
+struct hw_stats_device_data;
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
@@ -138,10 +141,9 @@ union ib_gid {
extern union ib_gid zgid;
enum ib_gid_type {
- /* If link layer is Ethernet, this is RoCE V1 */
- IB_GID_TYPE_IB = 0,
- IB_GID_TYPE_ROCE = 0,
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
IB_GID_TYPE_SIZE
};
@@ -152,7 +154,7 @@ struct ib_gid_attr {
union ib_gid gid;
enum ib_gid_type gid_type;
u16 index;
- u8 port_num;
+ u32 port_num;
};
enum {
@@ -180,7 +182,7 @@ rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
@@ -190,9 +192,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
- return IB_GID_TYPE_IB;
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
+ return IB_GID_TYPE_ROCE;
+ else
+ return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type
@@ -201,6 +204,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
+ return RDMA_NETWORK_ROCE_V1;
+
if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
@@ -214,32 +220,24 @@ enum rdma_link_layer {
};
enum ib_device_cap_flags {
- IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
- IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
- IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
- IB_DEVICE_RAW_MULTI = (1 << 3),
- IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
- IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
- IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
- IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
- IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
- /* Not in use, former INIT_TYPE = (1 << 9),*/
- IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
- IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
- IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
- IB_DEVICE_SRQ_RESIZE = (1 << 13),
- IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
-
- /*
- * This device supports a per-device lkey or stag that can be
- * used without performing a memory registration for the local
- * memory. Note that ULPs should never check this flag, but
- * instead of use the local_dma_lkey flag in the ib_pd structure,
- * which will always contain a usable lkey.
- */
- IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
- /* Reserved, old SEND_W_INV = (1 << 16),*/
- IB_DEVICE_MEM_WINDOW = (1 << 17),
+ IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR,
+ IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR,
+ IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR,
+ IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI,
+ IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG,
+ IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT,
+ IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
+ IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD,
+ IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT,
+ /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */
+ IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT,
+ IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID,
+ IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN,
+ IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE,
+ IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ,
+
+ /* Reserved, old SEND_W_INV = 1 << 16,*/
+ IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW,
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -247,9 +245,8 @@ enum ib_device_cap_flags {
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
- IB_DEVICE_UD_IP_CSUM = (1 << 18),
- IB_DEVICE_UD_TSO = (1 << 19),
- IB_DEVICE_XRC = (1 << 20),
+ IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM,
+ IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC,
/*
* This device supports the IB "base memory management extension",
@@ -260,31 +257,57 @@ enum ib_device_cap_flags {
* IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
* stag.
*/
- IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
- IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
- IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS,
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A,
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B,
+ IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM,
/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
- IB_DEVICE_RAW_IP_CSUM = (1 << 26),
- /*
- * Devices should set IB_DEVICE_CROSS_CHANNEL if they
- * support execution of WQEs that involve synchronization
- * of I/O operations with single completion queue managed
- * by hardware.
- */
- IB_DEVICE_CROSS_CHANNEL = (1 << 27),
- IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
- IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
- IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
- IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
- IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
+ IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM,
+ IB_DEVICE_MANAGED_FLOW_STEERING =
+ IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING,
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
- IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
- IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35),
+ IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS,
/* The device supports padding incoming writes to cacheline. */
- IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
- IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
+ IB_DEVICE_PCI_WRITE_END_PADDING =
+ IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
+ /* Placement type attributes */
+ IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL,
+ IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT,
+ IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
+};
+
+enum ib_kernel_cap_flags {
+ /*
+ * This device supports a per-device lkey or stag that can be
+ * used without performing a memory registration for the local
+ * memory. Note that ULPs should never check this flag, but
+ * instead of use the local_dma_lkey flag in the ib_pd structure,
+ * which will always contain a usable lkey.
+ */
+ IBK_LOCAL_DMA_LKEY = 1 << 0,
+ /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */
+ IBK_INTEGRITY_HANDOVER = 1 << 1,
+ /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */
+ IBK_ON_DEMAND_PAGING = 1 << 2,
+ /* IB_MR_TYPE_SG_GAPS is supported */
+ IBK_SG_GAPS_REG = 1 << 3,
+ /* Driver supports RDMA_NLDEV_CMD_DELLINK */
+ IBK_ALLOW_USER_UNREG = 1 << 4,
+
+ /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */
+ IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5,
+ /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */
+ IBK_UD_TSO = 1 << 6,
+ /* iopib will use the device ops:
+ * get_vf_config
+ * get_vf_guid
+ * get_vf_stats
+ * set_vf_guid
+ * set_vf_link_state
+ */
+ IBK_VIRTUAL_FUNCTION = 1 << 7,
+ /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */
+ IBK_RDMA_NETDEV_OPA = 1 << 8,
};
enum ib_atomic_cap {
@@ -383,6 +406,7 @@ struct ib_device_attr {
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
+ u64 kernel_cap_flags;
int max_send_sge;
int max_recv_sge;
int max_sge_rd;
@@ -535,21 +559,41 @@ enum ib_port_speed {
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
- IB_SPEED_HDR = 64
+ IB_SPEED_HDR = 64,
+ IB_SPEED_NDR = 128,
+ IB_SPEED_XDR = 256,
+};
+
+enum ib_stat_flag {
+ IB_STAT_FLAG_OPTIONAL = 1 << 0,
+};
+
+/**
+ * struct rdma_stat_desc
+ * @name - The name of the counter
+ * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
+ * @priv - Driver private information; Core code should not use
+ */
+struct rdma_stat_desc {
+ const char *name;
+ unsigned int flags;
+ const void *priv;
};
/**
* struct rdma_hw_stats
* @lock - Mutex to protect parallel write access to lifespan and values
- * of counters, which are 64bits and not guaranteeed to be written
+ * of counters, which are 64bits and not guaranteed to be written
* atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
* to 10 milliseconds, drivers can override the default be specifying
* their own value during their allocation routine.
- * @name - Array of pointers to static names used for the counters in
- * directory.
+ * @descs - Array of pointers to static descriptors used for the counters
+ * in directory.
+ * @is_disabled - A bitmap to indicate each counter is currently disabled
+ * or not.
* @num_counters - How many hardware counters there are. If name is
* shorter than this number, a kernel oops will result. Driver authors
* are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
@@ -561,36 +605,19 @@ struct rdma_hw_stats {
struct mutex lock; /* Protect lifespan and values[] */
unsigned long timestamp;
unsigned long lifespan;
- const char * const *names;
+ const struct rdma_stat_desc *descs;
+ unsigned long *is_disabled;
int num_counters;
- u64 value[];
+ u64 value[] __counted_by(num_counters);
};
#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
-/**
- * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
- * for drivers.
- * @names - Array of static const char *
- * @num_counters - How many elements in array
- * @lifespan - How many milliseconds between updates
- */
-static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
- const char * const *names, int num_counters,
- unsigned long lifespan)
-{
- struct rdma_hw_stats *stats;
-
- stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
- GFP_KERNEL);
- if (!stats)
- return NULL;
- stats->names = names;
- stats->num_counters = num_counters;
- stats->lifespan = msecs_to_jiffies(lifespan);
- return stats;
-}
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+ const struct rdma_stat_desc *descs, int num_counters,
+ unsigned long lifespan);
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
/* Define bits for the various functionality this port needs to be supported by
* the core.
@@ -669,7 +696,7 @@ struct ib_port_attr {
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u16 port_cap_flags2;
};
@@ -731,7 +758,7 @@ struct ib_event {
struct ib_qp *qp;
struct ib_srq *srq;
struct ib_wq *wq;
- u8 port_num;
+ u32 port_num;
} element;
enum ib_event_type event;
};
@@ -814,6 +841,7 @@ enum ib_rate {
IB_RATE_50_GBPS = 20,
IB_RATE_400_GBPS = 21,
IB_RATE_600_GBPS = 22,
+ IB_RATE_800_GBPS = 23,
};
/**
@@ -914,7 +942,7 @@ struct rdma_ah_attr {
struct ib_global_route grh;
u8 sl;
u8 static_rate;
- u8 port_num;
+ u32 port_num;
u8 ah_flags;
enum rdma_ah_attr_type type;
union {
@@ -952,16 +980,19 @@ enum ib_wc_status {
const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
- IB_WC_SEND,
- IB_WC_RDMA_WRITE,
- IB_WC_RDMA_READ,
- IB_WC_COMP_SWAP,
- IB_WC_FETCH_ADD,
- IB_WC_LSO,
- IB_WC_LOCAL_INV,
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
+ IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
+ IB_WC_FLUSH = IB_UVERBS_WC_FLUSH,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@@ -1000,7 +1031,7 @@ struct ib_wc {
u16 pkey_index;
u8 sl;
u8 dlid_path_bits;
- u8 port_num; /* valid only for DR SMPs on switches */
+ u32 port_num; /* valid only for DR SMPs on switches */
u8 smac[ETH_ALEN];
u16 vlan_id;
u8 network_hdr_type;
@@ -1065,7 +1096,7 @@ struct ib_qp_cap {
/*
* Maximum number of rdma_rw_ctx structures in flight at a time.
- * ib_create_qp() will calculate the right amount of neededed WRs
+ * ib_create_qp() will calculate the right amount of needed WRs
* and MRs based on this.
*/
u32 max_rdma_ctxs;
@@ -1139,7 +1170,7 @@ enum ib_qp_create_flags {
*/
struct ib_qp_init_attr {
- /* Consumer's event_handler callback must not block */
+ /* This callback occurs in workqueue context */
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
@@ -1155,7 +1186,7 @@ struct ib_qp_init_attr {
/*
* Only needed for special QP types, or when using the RW API.
*/
- u8 port_num;
+ u32 port_num;
struct ib_rwq_ind_table *rwq_ind_tbl;
u32 source_qpn;
};
@@ -1229,6 +1260,8 @@ enum ib_qp_attr_mask {
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
IB_QP_RATE_LIMIT = (1<<25),
+
+ IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
};
enum ib_qp_state {
@@ -1272,11 +1305,11 @@ struct ib_qp_attr {
u8 max_rd_atomic;
u8 max_dest_rd_atomic;
u8 min_rnr_timer;
- u8 port_num;
+ u32 port_num;
u8 timeout;
u8 retry_cnt;
u8 rnr_retry;
- u8 alt_port_num;
+ u32 alt_port_num;
u8 alt_timeout;
u32 rate_limit;
struct net_device *xmit_slave;
@@ -1291,6 +1324,7 @@ enum ib_wr_opcode {
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
@@ -1299,6 +1333,8 @@ enum ib_wr_opcode {
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ IB_WR_FLUSH = IB_UVERBS_WR_FLUSH,
+ IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
@@ -1392,7 +1428,7 @@ struct ib_ud_wr {
u32 remote_qpn;
u32 remote_qkey;
u16 pkey_index; /* valid for GSI only */
- u8 port_num; /* valid for DR SMPs on switch only */
+ u32 port_num; /* valid for DR SMPs on switch only */
};
static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
@@ -1432,10 +1468,12 @@ enum ib_access_flags {
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
+ IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL,
+ IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT,
IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
IB_ACCESS_SUPPORTED =
- ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
+ ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@@ -1463,11 +1501,8 @@ enum rdma_remove_reason {
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
- /*
- * uobj has been fully created, with the uobj->object set, but is being
- * cleaned up before being comitted
- */
- RDMA_REMOVE_ABORT_HWOBJ,
+ /* The driver failed to destroy the uobject and is being disconnected */
+ RDMA_REMOVE_DRIVER_FAILURE,
};
struct ib_rdmacg_object {
@@ -1479,14 +1514,6 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- /*
- * 'closing' can be read by the driver only during a destroy callback,
- * it is set when we are closing the file descriptor and indicates
- * that mm_sem may be locked.
- */
- bool closing;
-
- bool cleanup_retryable;
struct ib_rdmacg_object cg_obj;
/*
@@ -1612,22 +1639,31 @@ struct ib_srq {
} xrc;
};
} ext;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
enum ib_raw_packet_caps {
- /* Strip cvlan from incoming packet and report it in the matching work
+ /*
+ * Strip cvlan from incoming packet and report it in the matching work
* completion is supported.
*/
- IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
- /* Scatter FCS field of an incoming packet to host memory is supported.
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING =
+ IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING,
+ /*
+ * Scatter FCS field of an incoming packet to host memory is supported.
*/
- IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
+ IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS,
/* Checksum offloads are supported (for both send and receive). */
- IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
- /* When a packet is received for an RQ with no receive WQEs, the
+ IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM,
+ /*
+ * When a packet is received for an RQ with no receive WQEs, the
* packet processing is delayed.
*/
- IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3),
+ IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP,
};
enum ib_wq_type {
@@ -1710,7 +1746,7 @@ struct ib_qp_security;
struct ib_port_pkey {
enum port_pkey_state state;
u16 pkey_index;
- u8 port_num;
+ u32 port_num;
struct list_head qp_list;
struct list_head to_error_list;
struct ib_qp_security *sec;
@@ -1771,7 +1807,7 @@ struct ib_qp {
enum ib_qp_type qp_type;
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_qp_security *qp_sec;
- u8 port;
+ u32 port;
bool integrity_en;
/*
@@ -1863,17 +1899,6 @@ enum ib_flow_spec_type {
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
- IB_FLOW_DOMAIN_USER,
- IB_FLOW_DOMAIN_ETHTOOL,
- IB_FLOW_DOMAIN_RFS,
- IB_FLOW_DOMAIN_NIC,
- IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
@@ -1885,8 +1910,6 @@ struct ib_flow_eth_filter {
u8 src_mac[6];
__be16 ether_type;
__be16 vlan_tag;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_eth {
@@ -1899,8 +1922,6 @@ struct ib_flow_spec_eth {
struct ib_flow_ib_filter {
__be16 dlid;
__u8 sl;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_ib {
@@ -1924,8 +1945,6 @@ struct ib_flow_ipv4_filter {
u8 tos;
u8 ttl;
u8 flags;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_ipv4 {
@@ -1942,9 +1961,7 @@ struct ib_flow_ipv6_filter {
u8 next_hdr;
u8 traffic_class;
u8 hop_limit;
- /* Must be last */
- u8 real_sz[];
-};
+} __packed;
struct ib_flow_spec_ipv6 {
u32 type;
@@ -1956,8 +1973,6 @@ struct ib_flow_spec_ipv6 {
struct ib_flow_tcp_udp_filter {
__be16 dst_port;
__be16 src_port;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_tcp_udp {
@@ -1969,7 +1984,6 @@ struct ib_flow_spec_tcp_udp {
struct ib_flow_tunnel_filter {
__be32 tunnel_id;
- u8 real_sz[];
};
/* ib_flow_spec_tunnel describes the Vxlan tunnel
@@ -1985,8 +1999,6 @@ struct ib_flow_spec_tunnel {
struct ib_flow_esp_filter {
__be32 spi;
__be32 seq;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_esp {
@@ -2000,8 +2012,6 @@ struct ib_flow_gre_filter {
__be16 c_ks_res0_ver;
__be16 protocol;
__be32 key;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_gre {
@@ -2013,8 +2023,6 @@ struct ib_flow_spec_gre {
struct ib_flow_mpls_filter {
__be32 tag;
- /* Must be last */
- u8 real_sz[];
};
struct ib_flow_spec_mpls {
@@ -2078,7 +2086,7 @@ struct ib_flow_attr {
u16 priority;
u32 flags;
u8 num_of_specs;
- u8 port;
+ u32 port;
union ib_flow_spec flows[];
};
@@ -2147,7 +2155,6 @@ struct ib_flow_action {
};
struct ib_mad;
-struct ib_grh;
enum ib_process_mad_flags {
IB_MAD_IGNORE_MKEY = 1,
@@ -2183,15 +2190,18 @@ struct ib_port_data {
struct ib_port_immutable immutable;
spinlock_t pkey_list_lock;
+
+ spinlock_t netdev_lock;
+
struct list_head pkey_list;
struct ib_port_cache cache;
- spinlock_t netdev_lock;
struct net_device __rcu *netdev;
+ netdevice_tracker netdev_tracker;
struct hlist_node ndev_hash_link;
struct rdma_port_counter port_counter;
- struct rdma_hw_stats *hw_stats;
+ struct ib_port *sysfs;
};
/* rdma netdev type - specifies protocol type */
@@ -2207,7 +2217,7 @@ enum rdma_netdev_t {
struct rdma_netdev {
void *clnt_priv;
struct ib_device *hca;
- u8 port_num;
+ u32 port_num;
int mtu;
/*
@@ -2228,6 +2238,8 @@ struct rdma_netdev {
int set_qkey, u32 qkey);
int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
union ib_gid *gid, u16 mlid);
+ /* timeout */
+ void (*tx_timeout)(struct net_device *dev, unsigned int txqueue);
};
struct rdma_netdev_alloc_params {
@@ -2236,7 +2248,7 @@ struct rdma_netdev_alloc_params {
unsigned int rxqs;
void *param;
- int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+ int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num,
struct net_device *netdev, void *param);
};
@@ -2271,8 +2283,13 @@ struct iw_cm_conn_param;
!__same_type(((struct drv_struct *)NULL)->member, \
struct ib_struct)))
-#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
- ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+ gfp, false))
+
+#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type) \
+ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+ GFP_KERNEL, true))
#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
@@ -2305,6 +2322,14 @@ struct ib_device_ops {
u32 uverbs_abi_ver;
unsigned int uverbs_no_driver_id_binding:1;
+ /*
+ * NOTE: New drivers should not make use of device_group; instead new
+ * device parameter should be exposed via netlink command. This
+ * mechanism exists only for existing drivers.
+ */
+ const struct attribute_group *device_group;
+ const struct attribute_group **port_groups;
+
int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
@@ -2314,12 +2339,11 @@ struct ib_device_ops {
int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
- int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
int (*post_srq_recv)(struct ib_srq *srq,
const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int (*process_mad)(struct ib_device *device, int process_mad_flags,
- u8 port_num, const struct ib_wc *in_wc,
+ u32 port_num, const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad *in_mad, struct ib_mad *out_mad,
size_t *out_mad_size, u16 *out_mad_pkey_index);
@@ -2331,9 +2355,9 @@ struct ib_device_ops {
void (*get_dev_fw_str)(struct ib_device *device, char *str);
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
- int (*query_port)(struct ib_device *device, u8 port_num,
+ int (*query_port)(struct ib_device *device, u32 port_num,
struct ib_port_attr *port_attr);
- int (*modify_port)(struct ib_device *device, u8 port_num,
+ int (*modify_port)(struct ib_device *device, u32 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify);
/**
@@ -2342,10 +2366,10 @@ struct ib_device_ops {
* structure to avoid cache line misses when accessing struct ib_device
* in fast paths.
*/
- int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+ int (*get_port_immutable)(struct ib_device *device, u32 port_num,
struct ib_port_immutable *immutable);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
- u8 port_num);
+ u32 port_num);
/**
* When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num or NULL if such
@@ -2354,7 +2378,8 @@ struct ib_device_ops {
* that this function returns NULL before the net device has finished
* NETDEV_UNREGISTER state.
*/
- struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+ struct net_device *(*get_netdev)(struct ib_device *device,
+ u32 port_num);
/**
* rdma netdev operation
*
@@ -2362,11 +2387,11 @@ struct ib_device_ops {
* must return -EOPNOTSUPP if it doesn't support the specified type.
*/
struct net_device *(*alloc_rdma_netdev)(
- struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+ struct ib_device *device, u32 port_num, enum rdma_netdev_t type,
const char *name, unsigned char name_assign_type,
void (*setup)(struct net_device *));
- int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+ int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params);
/**
@@ -2374,7 +2399,7 @@ struct ib_device_ops {
* link layer is either IB or iWarp. It is no-op if @port_num port
* is RoCE link layer.
*/
- int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+ int (*query_gid)(struct ib_device *device, u32 port_num, int index,
union ib_gid *gid);
/**
* When calling add_gid, the HW vendor's driver should add the gid
@@ -2399,7 +2424,7 @@ struct ib_device_ops {
* This function is only called when roce_gid_table is used.
*/
int (*del_gid)(const struct ib_gid_attr *attr, void **context);
- int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+ int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index,
u16 *pkey);
int (*alloc_ucontext)(struct ib_ucontext *context,
struct ib_udata *udata);
@@ -2414,12 +2439,14 @@ struct ib_device_ops {
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
struct ib_udata *udata);
+ int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
- void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
int (*create_srq)(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -2427,10 +2454,9 @@ struct ib_device_ops {
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
- void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
- struct ib_qp *(*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -2439,15 +2465,20 @@ struct ib_device_ops {
int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+ struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr, int fd,
+ int mr_access_flags,
+ struct ib_udata *udata);
+ struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -2458,51 +2489,49 @@ struct ib_device_ops {
enum ib_uverbs_advise_mr_advice advice, u32 flags,
struct ib_sge *sg_list, u32 num_sge,
struct uverbs_attr_bundle *attrs);
+
+ /*
+ * Kernel users should universally support relaxed ordering (RO), as
+ * they are designed to read data only after observing the CQE and use
+ * the DMA API correctly.
+ *
+ * Some drivers implicitly enable RO if platform supports it.
+ */
int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
- struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
int (*dealloc_mw)(struct ib_mw *mw);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
- void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow *(*create_flow)(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata);
+ struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
- struct ib_flow_action *(*create_flow_action_esp)(
- struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(
- struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port,
int state);
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_config)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *ivf);
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_stats)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats);
- int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_guid)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid);
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid,
int type);
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
- struct ib_rwq_ind_table *(*create_rwq_ind_table)(
- struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *(*alloc_dm)(struct ib_device *device,
struct ib_ucontext *context,
@@ -2514,7 +2543,7 @@ struct ib_device_ops {
struct uverbs_attr_bundle *attrs);
int (*create_counters)(struct ib_counters *counters,
struct uverbs_attr_bundle *attrs);
- void (*destroy_counters)(struct ib_counters *counters);
+ int (*destroy_counters)(struct ib_counters *counters);
int (*read_counters)(struct ib_counters *counters,
struct ib_counters_read_attr *counters_read_attr,
struct uverbs_attr_bundle *attrs);
@@ -2524,13 +2553,14 @@ struct ib_device_ops {
unsigned int *meta_sg_offset);
/**
- * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
- * driver initialized data. The struct is kfree()'ed by the sysfs
- * core when the device is removed. A lifespan of -1 in the return
- * struct tells the core to set a default lifespan.
+ * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
+ * fill in the driver initialized data. The struct is kfree()'ed by
+ * the sysfs core when the device is removed. A lifespan of -1 in the
+ * return struct tells the core to set a default lifespan.
*/
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
- u8 port_num);
+ struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
+ struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
+ u32 port_num);
/**
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
@@ -2544,13 +2574,15 @@ struct ib_device_ops {
* one given in index at their option
*/
int (*get_hw_stats)(struct ib_device *device,
- struct rdma_hw_stats *stats, u8 port, int index);
- /*
- * This function is called once for each port when a ib device is
- * registered.
+ struct rdma_hw_stats *stats, u32 port, int index);
+
+ /**
+ * modify_hw_stat - Modify the counter configuration
+ * @enable: true/false when enable/disable a counter
+ * Return codes - 0 on success or error code otherwise.
*/
- int (*init_port)(struct ib_device *device, u8 port_num,
- struct kobject *port_sysfs);
+ int (*modify_hw_stat)(struct ib_device *device, u32 port,
+ unsigned int counter_index, bool enable);
/**
* Allows rdma drivers to add their own restrack attributes.
*/
@@ -2561,6 +2593,8 @@ struct ib_device_ops {
int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
+ int (*fill_res_srq_entry)(struct sk_buff *msg, struct ib_srq *ib_srq);
+ int (*fill_res_srq_entry_raw)(struct sk_buff *msg, struct ib_srq *ib_srq);
/* Device lifecycle callbacks */
/*
@@ -2621,10 +2655,19 @@ struct ib_device_ops {
int (*query_ucontext)(struct ib_ucontext *context,
struct uverbs_attr_bundle *attrs);
+ /*
+ * Provide NUMA node. This API exists for rdmavt/hfi1 only.
+ * Everyone else relies on Linux memory management model.
+ */
+ int (*get_numa_node)(struct ib_device *dev);
+
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_qp);
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
@@ -2674,14 +2717,14 @@ struct ib_device {
struct ib_core_device coredev;
};
- /* First group for device attributes,
- * Second group for driver provided attributes (optional).
- * It is NULL terminated array.
+ /* First group is for device attributes,
+ * Second group is for driver provided attributes (optional).
+ * Third group is for the hw_stats
+ * It is a NULL terminated array.
*/
- const struct attribute_group *groups[3];
+ const struct attribute_group *groups[4];
u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
char node_desc[IB_DEVICE_NODE_DESC_MAX];
__be64 node_guid;
@@ -2692,10 +2735,9 @@ struct ib_device {
/* CQ adaptive moderation (RDMA DIM) */
u16 use_cq_dim:1;
u8 node_type;
- u8 phys_port_cnt;
+ u32 phys_port_cnt;
struct ib_device_attr attrs;
- struct attribute_group *hw_stats_ag;
- struct rdma_hw_stats *hw_stats;
+ struct hw_stats_device_data *hw_stats_data;
#ifdef CONFIG_CGROUP_RDMA
struct rdmacg_device cg_device;
@@ -2731,6 +2773,15 @@ struct ib_device {
u32 lag_flags;
};
+static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
+ gfp_t gfp, bool is_numa_aware)
+{
+ if (is_numa_aware && dev->ops.get_numa_node)
+ return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev));
+
+ return kzalloc(size, gfp);
+}
+
struct ib_client_nl_info;
struct ib_client {
const char *name;
@@ -2758,7 +2809,7 @@ struct ib_client {
* netdev. */
struct net_device *(*get_net_dev_by_params)(
struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr,
@@ -2782,6 +2833,7 @@ struct ib_block_iter {
/* internal states */
struct scatterlist *__sg; /* sg holding the current aligned block */
dma_addr_t __dma_addr; /* unaligned DMA address of this block */
+ size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */
unsigned int __sg_nents; /* number of SG entries */
unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
unsigned int __pg_bit; /* alignment of current block */
@@ -2798,7 +2850,8 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device, const char *name);
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device);
void ib_unregister_device(struct ib_device *device);
void ib_unregister_driver(enum rdma_driver_id driver_id);
void ib_unregister_device_and_put(struct ib_device *device);
@@ -2870,6 +2923,15 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
size_t length, u32 min_pgoff,
u32 max_pgoff);
+static inline int
+rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 pgoff)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff,
+ pgoff);
+}
+
struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
unsigned long pgoff);
@@ -2916,46 +2978,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
- * ib_is_destroy_retryable - Check whether the uobject destruction
- * is retryable.
- * @ret: The initial destruction return code
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * This function is a helper function that IB layer and low-level drivers
- * can use to consider whether the destruction of the given uobject is
- * retry-able.
- * It checks the original return code, if it wasn't success the destruction
- * is retryable according to the ucontext state (i.e. cleanup_retryable) and
- * the remove reason. (i.e. why).
- * Must be called with the object locked for destroy.
- */
-static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- return ret && (why == RDMA_REMOVE_DESTROY ||
- uobj->context->cleanup_retryable);
-}
-
-/**
- * ib_destroy_usecnt - Called during destruction to check the usecnt
- * @usecnt: The usecnt atomic
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * Non-zero usecnts will block destruction unless destruction was triggered by
- * a ucontext cleanup.
- */
-static inline int ib_destroy_usecnt(atomic_t *usecnt,
- enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
- return -EBUSY;
- return 0;
-}
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -2978,10 +3000,10 @@ void ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(const struct ib_event *event);
int ib_query_port(struct ib_device *device,
- u8 port_num, struct ib_port_attr *port_attr);
+ u32 port_num, struct ib_port_attr *port_attr);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
- u8 port_num);
+ u32 port_num);
/**
* rdma_cap_ib_switch - Check if the device is IB switch
@@ -3005,7 +3027,7 @@ static inline bool rdma_cap_ib_switch(const struct ib_device *device)
*
* Return start port number
*/
-static inline u8 rdma_start_port(const struct ib_device *device)
+static inline u32 rdma_start_port(const struct ib_device *device)
{
return rdma_cap_ib_switch(device) ? 0 : 1;
}
@@ -3016,9 +3038,10 @@ static inline u8 rdma_start_port(const struct ib_device *device)
* @iter - The unsigned int to store the port number
*/
#define rdma_for_each_port(device, iter) \
- for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
- unsigned int, iter))); \
- iter <= rdma_end_port(device); (iter)++)
+ for (iter = rdma_start_port(device + \
+ BUILD_BUG_ON_ZERO(!__same_type(u32, \
+ iter))); \
+ iter <= rdma_end_port(device); iter++)
/**
* rdma_end_port - Return the last valid port number for the device
@@ -3028,7 +3051,7 @@ static inline u8 rdma_start_port(const struct ib_device *device)
*
* Return last port number
*/
-static inline u8 rdma_end_port(const struct ib_device *device)
+static inline u32 rdma_end_port(const struct ib_device *device)
{
return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
}
@@ -3041,55 +3064,63 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
}
static inline bool rdma_is_grh_required(const struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_PORT_IB_GRH_REQUIRED;
}
-static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_ib(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_IB;
}
-static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
(RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
}
-static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
}
-static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_ROCE;
}
-static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_iwarp(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_IWARP;
}
-static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+static inline bool rdma_ib_or_roce(const struct ib_device *device,
+ u32 port_num)
{
return rdma_protocol_ib(device, port_num) ||
rdma_protocol_roce(device, port_num);
}
-static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_RAW_PACKET;
}
-static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_usnic(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_USNIC;
@@ -3107,7 +3138,7 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n
*
* Return: true if the port supports sending/receiving of MAD packets.
*/
-static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_MAD;
@@ -3132,7 +3163,7 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
*
* Return: true if the port supports OPA MAD packet formats.
*/
-static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_MAD;
@@ -3158,7 +3189,7 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
*
* Return: true if the port provides an SMI.
*/
-static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_SMI;
@@ -3179,7 +3210,7 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
* Return: true if the port supports an IB CM (this does not guarantee that
* a CM is actually running however).
*/
-static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_CM;
@@ -3197,7 +3228,7 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
* Return: true if the port supports an iWARP CM (this does not guarantee that
* a CM is actually running however).
*/
-static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IW_CM;
@@ -3218,7 +3249,7 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
* Administration interface. This does not imply that the SA service is
* running locally.
*/
-static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_SA;
@@ -3241,7 +3272,8 @@ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
* overhead of registering/unregistering with the SM and tracking of the
* total number of queue pairs attached to the multicast group.
*/
-static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device,
+ u32 port_num)
{
return rdma_cap_ib_sa(device, port_num);
}
@@ -3259,7 +3291,7 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num
* Return: true if the port uses a GID address to identify devices on the
* network.
*/
-static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_AF_IB;
@@ -3281,7 +3313,7 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
* addition of a Global Route Header built from our Ethernet Address
* Handle into our header list for connectionless packets.
*/
-static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_ETH_AH;
@@ -3296,7 +3328,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
* Return: true if we are running on an OPA device which supports
* the extended OPA addressing.
*/
-static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num)
{
return (device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
@@ -3314,7 +3346,8 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
* Return the max MAD size required by the Port. Will return 0 if the port
* does not support MADs
*/
-static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+static inline size_t rdma_max_mad_size(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.max_mad_size;
}
@@ -3333,7 +3366,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n
* its GIDs.
*/
static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return rdma_protocol_roce(device, port_num) &&
device->ops.add_gid && device->ops.del_gid;
@@ -3352,30 +3385,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
}
/**
- * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
- *
- * @addr: address
- * @pgsz_bitmap: bitmap of HW supported page sizes
- */
-static inline unsigned int rdma_find_pg_bit(unsigned long addr,
- unsigned long pgsz_bitmap)
-{
- unsigned long align;
- unsigned long pgsz;
-
- align = addr & -addr;
-
- /* Find page bit such that addr is aligned to the highest supported
- * HW page size
- */
- pgsz = pgsz_bitmap & ~(-align << 1);
- if (!pgsz)
- return __ffs(pgsz_bitmap);
-
- return __fls(pgsz);
-}
-
-/**
* rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
* @device: Device
* @port_num: 1 based Port number
@@ -3398,7 +3407,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device,
* Return the MTU size supported by the port as an integer value. Will return
* -1 if enum value of mtu is not supported.
*/
-static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port,
int mtu)
{
if (rdma_core_cap_opa_port(device, port))
@@ -3415,7 +3424,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
*
* Return the MTU size supported by the port as an integer value.
*/
-static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
+static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port,
struct ib_port_attr *attr)
{
if (rdma_core_cap_opa_port(device, port))
@@ -3424,34 +3433,34 @@ static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
return ib_mtu_enum_to_int(attr->max_mtu);
}
-int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
int state);
-int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info);
-int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats);
-int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid);
-int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
int type);
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey);
+ u32 port_num, u16 index, u16 *pkey);
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify);
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index);
+ u32 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index);
+ u32 port_num, u16 pkey, u16 *index);
enum ib_pd_flags {
/*
@@ -3469,15 +3478,21 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ *
+ * Every PD has a local_dma_lkey which can be used as the lkey value for local
+ * memory operations.
+ */
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-/**
- * ib_dealloc_pd_user - Deallocate kernel/user PD
- * @pd: The protection domain
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
/**
* ib_dealloc_pd - Deallocate kernel PD
@@ -3487,7 +3502,9 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
*/
static inline void ib_dealloc_pd(struct ib_pd *pd)
{
- ib_dealloc_pd_user(pd, NULL);
+ int ret = ib_dealloc_pd_user(pd, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
}
enum rdma_create_ah_flags {
@@ -3558,7 +3575,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
* attributes which are initialized using ib_init_ah_attr_from_wc().
*
*/
-int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr);
@@ -3575,7 +3592,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
* in all UD QP post sends.
*/
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
- const struct ib_grh *grh, u8 port_num);
+ const struct ib_grh *grh, u32 port_num);
/**
* rdma_modify_ah - Modifies the address vector associated with an address
@@ -3615,9 +3632,11 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
*
* NOTE: for user ah use rdma_destroy_ah_user with valid udata!
*/
-static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
- return rdma_destroy_ah_user(ah, flags, NULL);
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
}
struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
@@ -3671,9 +3690,11 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
*
* NOTE: for user srq use ib_destroy_srq_user with valid udata!
*/
-static inline int ib_destroy_srq(struct ib_srq *srq)
+static inline void ib_destroy_srq(struct ib_srq *srq)
{
- return ib_destroy_srq_user(srq, NULL);
+ int ret = ib_destroy_srq_user(srq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
}
/**
@@ -3693,8 +3714,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
bad_recv_wr ? : &dummy);
}
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller);
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specific protection
+ * domain.
+ * @pd: The protection domain associated with the QP.
+ * @init_attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ */
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3817,46 +3852,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata);
-
-/**
- * ib_alloc_cq_user: Allocate kernel/user CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- * @udata: Valid user data or NULL for kernel objects
- */
-static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
- void *private, int nr_cqe,
- int comp_vector,
- enum ib_poll_context poll_ctx,
- struct ib_udata *udata)
-{
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- KBUILD_MODNAME, udata);
-}
-
-/**
- * ib_alloc_cq: Allocate kernel CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- *
- * NOTE: for user cq use ib_alloc_cq_user with valid udata!
- */
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller);
static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx)
{
- return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME);
}
struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
@@ -3878,26 +3882,7 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
KBUILD_MODNAME);
}
-/**
- * ib_free_cq_user - Free kernel/user CQ
- * @cq: The CQ to free
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: This function shouldn't be called on shared CQs.
- */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
-
-/**
- * ib_free_cq - Free kernel CQ
- * @cq: The CQ to free
- *
- * NOTE: for user cq use ib_free_cq_user with valid udata!
- */
-static inline void ib_free_cq(struct ib_cq *cq)
-{
- ib_free_cq_user(cq, NULL);
-}
-
+void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
@@ -3955,7 +3940,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
*/
static inline void ib_destroy_cq(struct ib_cq *cq)
{
- ib_destroy_cq_user(cq, NULL);
+ int ret = ib_destroy_cq_user(cq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
}
/**
@@ -4015,18 +4002,50 @@ struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
+/*
+ * Check if a IB device's underlying DMA mapping supports P2PDMA transfers.
+ */
+static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev)
+{
+ if (ib_uses_virt_dma(dev))
+ return false;
+
+ return dma_pci_p2pdma_supported(dev->dma_device);
+}
+
/**
- * ib_req_ncomp_notif - Request completion notification when there are
- * at least the specified number of unreaped completions on the CQ.
- * @cq: The CQ to generate an event for.
- * @wc_cnt: The number of unreaped completions that should be on the
- * CQ before an event is generated.
+ * ib_virt_dma_to_ptr - Convert a dma_addr to a kernel pointer
+ * @dma_addr: The DMA address
+ *
+ * Used by ib_uses_virt_dma() devices to get back to the kernel pointer after
+ * going through the dma_addr marshalling.
+ */
+static inline void *ib_virt_dma_to_ptr(u64 dma_addr)
+{
+ /* virt_dma mode maps the kvs's directly into the dma addr */
+ return (void *)(uintptr_t)dma_addr;
+}
+
+/**
+ * ib_virt_dma_to_page - Convert a dma_addr to a struct page
+ * @dma_addr: The DMA address
+ *
+ * Used by ib_uses_virt_dma() device to get back to the struct page after going
+ * through the dma_addr marshalling.
*/
-static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
+static inline struct page *ib_virt_dma_to_page(u64 dma_addr)
{
- return cq->device->ops.req_ncomp_notif ?
- cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
- -ENOSYS;
+ return virt_to_page(ib_virt_dma_to_ptr(dma_addr));
}
/**
@@ -4036,6 +4055,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
+ if (ib_uses_virt_dma(dev))
+ return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}
@@ -4050,6 +4071,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
@@ -4064,7 +4087,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_single(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_single(dev->dma_device, addr, size, direction);
}
/**
@@ -4081,6 +4105,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
@@ -4095,7 +4121,63 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (ib_uses_virt_dma(dev))
+ return ib_dma_virt_map_sg(dev, sg, nents);
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+/**
+ * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The sg_table object describing the buffer
+ * @direction: The direction of the DMA
+ * @attrs: Optional DMA attributes for the map operation
+ */
+static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
+ struct sg_table *sgt,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ int nents;
+
+ if (ib_uses_virt_dma(dev)) {
+ nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
+ if (!nents)
+ return -EIO;
+ sgt->nents = nents;
+ return 0;
+ }
+ return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+}
+
+static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev,
+ struct sg_table *sgt,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs);
}
/**
@@ -4109,7 +4191,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4123,24 +4205,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4151,6 +4216,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
+ if (ib_uses_virt_dma(dev))
+ return UINT_MAX;
return dma_get_max_seg_size(dev->dma_device);
}
@@ -4166,7 +4233,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
@@ -4181,36 +4249,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-/**
- * ib_dma_alloc_coherent - Allocate memory and map it for DMA
- * @dev: The device for which the DMA address is requested
- * @size: The size of the region to allocate in bytes
- * @dma_handle: A pointer for returning the DMA address of the region
- * @flag: memory allocator flags
- */
-static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flag)
-{
- return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
-}
-
-/**
- * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
- * @dev: The device for which the DMA addresses were allocated
- * @size: The size of the region
- * @cpu_addr: the address returned by ib_dma_alloc_coherent()
- * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
- */
-static inline void ib_dma_free_coherent(struct ib_device *dev,
- size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-{
- dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
@@ -4302,8 +4342,11 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
struct inode *inode, struct ib_udata *udata);
int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
-static inline int ib_check_mr_access(int flags)
+static inline int ib_check_mr_access(struct ib_device *ib_dev,
+ unsigned int flags)
{
+ u64 device_cap = ib_dev->attrs.device_cap_flags;
+
/*
* Local write permission is required if remote write or
* remote atomic permission is also requested.
@@ -4315,6 +4358,16 @@ static inline int ib_check_mr_access(int flags)
if (flags & ~IB_ACCESS_SUPPORTED)
return -EINVAL;
+ if (flags & IB_ACCESS_ON_DEMAND &&
+ !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
+ return -EOPNOTSUPP;
+
+ if ((flags & IB_ACCESS_FLUSH_GLOBAL &&
+ !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) ||
+ (flags & IB_ACCESS_FLUSH_PERSISTENT &&
+ !(device_cap & IB_DEVICE_FLUSH_PERSISTENT)))
+ return -EOPNOTSUPP;
+
return 0;
}
@@ -4370,19 +4423,14 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
enum rdma_driver_id driver_id);
struct ib_device *ib_device_get_by_name(const char *name,
enum rdma_driver_id driver_id);
-struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
unsigned int port);
-struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
-
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
-int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
- u32 wq_attr_mask);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
@@ -4410,7 +4458,8 @@ void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed,
+ u8 *width);
static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
{
@@ -4478,12 +4527,12 @@ static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr)
return false;
}
-static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num)
+static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num)
{
attr->port_num = port_num;
}
-static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
+static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
{
return attr->port_num;
}
@@ -4581,7 +4630,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
* @port_num: Port number
*/
static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
- u8 port_num)
+ u32 port_num)
{
if (rdma_protocol_roce(dev, port_num))
return RDMA_AH_ATTR_TYPE_ROCE;
@@ -4596,7 +4645,7 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
/**
* ib_lid_cpu16 - Return lid in 16bit CPU encoding.
- * In the current implementation the only way to get
+ * In the current implementation the only way to
* get the 32bit lid is from other sources for OPA.
* For IB, lids will always be 16bits so cast the
* value accordingly.
@@ -4653,40 +4702,18 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
-struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *));
-int rdma_init_netdev(struct ib_device *device, u8 port_num,
+int rdma_init_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev);
/**
- * rdma_set_device_sysfs_group - Set device attributes group to have
- * driver specific sysfs entries at
- * for infiniband class.
- *
- * @device: device pointer for which attributes to be created
- * @group: Pointer to group which should be added when device
- * is registered with sysfs.
- * rdma_set_device_sysfs_group() allows existing drivers to expose one
- * group per device to have sysfs attributes.
- *
- * NOTE: New drivers should not make use of this API; instead new device
- * parameter should be exposed via netlink command. This API and mechanism
- * exist only for existing drivers.
- */
-static inline void
-rdma_set_device_sysfs_group(struct ib_device *dev,
- const struct attribute_group *group)
-{
- dev->groups[1] = group;
-}
-
-/**
* rdma_device_to_ibdev - Get ib_device pointer from device pointer
*
* @device: device pointer for which ib_device pointer to retrieve
@@ -4703,12 +4730,25 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
}
/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev: device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent = ibdev->dev.parent;
+
+ if (!parent)
+ return NUMA_NO_NODE;
+ return dev_to_node(parent);
+}
+
+/**
* rdma_device_to_drv_device - Helper macro to reach back to driver's
* ib_device holder structure from device pointer.
*
* NOTE: New drivers should not make use of this API; This API is only for
* existing drivers who have exposed sysfs entries using
- * rdma_set_device_sysfs_group().
+ * ops->device_group.
*/
#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
@@ -4717,6 +4757,7 @@ bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
/**
@@ -4759,4 +4800,24 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
return (u32)(v & IB_GRH_FLOWLABEL_MASK);
}
+
+/**
+ * rdma_get_udp_sport - Calculate and set UDP source port based on the flow
+ * label. If flow label is not defined in GRH then
+ * calculate it based on lqpn/rqpn.
+ *
+ * @fl: flow label from GRH
+ * @lqpn: local qp number
+ * @rqpn: remote qp number
+ */
+static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ return rdma_flow_label_to_udp_sport(fl);
+}
+
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 91975400e1b3..2b22f153ef63 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -70,6 +70,7 @@ struct iw_cm_id {
u8 tos;
bool tos_set:1;
bool mapped:1;
+ bool afonly:1;
};
struct iw_cm_conn_param {
@@ -114,27 +115,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
void iw_destroy_cm_id(struct iw_cm_id *cm_id);
/**
- * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
- *
- * @cm_id: The IW CM idenfier to unbind from the QP.
- * @qp: The QP
- *
- * This is called by the provider when destroying the QP to ensure
- * that any references held by the IWCM are released. It may also
- * be called by the IWCM when destroying a CM_ID to that any
- * references held by the provider are released.
- */
-void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
-
-/**
- * iw_cm_get_qp - Return the ib_qp associated with a QPN
- *
- * @ib_device: The IB device
- * @qpn: The queue pair number
- */
-struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
-
-/**
* iw_cm_listen - Listen for incoming connection requests on the
* specified IW CM id.
*
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
index cbe3c2811455..d297f084001a 100644
--- a/include/rdma/opa_vnic.h
+++ b/include/rdma/opa_vnic.h
@@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev)
return oparn->dev_priv;
}
-/* opa_vnic skb meta data structrue */
+/* opa_vnic skb meta data structure */
struct opa_vnic_skb_mdata {
u8 vl;
u8 entropy;
@@ -90,8 +90,7 @@ struct opa_vnic_stats {
static inline bool rdma_cap_opa_vnic(struct ib_device *device)
{
- return !!(device->attrs.device_cap_flags &
- IB_DEVICE_RDMA_NETDEV_OPA);
+ return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA);
}
#endif /* _OPA_VNIC_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index cf5da2ae49bf..8a8ab2f793ab 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -52,7 +52,17 @@ struct rdma_addr {
struct rdma_route {
struct rdma_addr addr;
struct sa_path_rec *path_rec;
- int num_paths;
+
+ /* Optional path records of primary path */
+ struct sa_path_rec *path_rec_inbound;
+ struct sa_path_rec *path_rec_outbound;
+
+ /*
+ * 0 - No primary nor alternate path is available
+ * 1 - Only primary path is available
+ * 2 - Both primary and alternate path are available
+ */
+ int num_pri_alt_paths;
};
struct rdma_conn_param {
@@ -107,14 +117,18 @@ struct rdma_cm_id {
struct rdma_route route;
enum rdma_ucm_port_space ps;
enum ib_qp_type qp_type;
- u8 port_num;
+ u32 port_num;
+ struct work_struct net_work;
};
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type,
- const char *caller);
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller);
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type);
/**
* rdma_create_id - Create an RDMA identifier.
@@ -132,9 +146,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
* The event handler callback serializes on the id's mutex and is
* allowed to sleep.
*/
-#define rdma_create_id(net, event_handler, context, ps, qp_type) \
- __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
- KBUILD_MODNAME)
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+ __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \
+ KBUILD_MODNAME)
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@@ -224,19 +238,9 @@ void rdma_destroy_qp(struct rdma_cm_id *id);
int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask);
-/**
- * rdma_connect - Initiate an active connection request.
- * @id: Connection identifier to connect.
- * @conn_param: Connection information used for connected QPs.
- *
- * Users must have resolved a route for the rdma_cm_id to connect with
- * by having called rdma_resolve_route before calling this routine.
- *
- * This call will either connect to a remote QP or obtain remote QP
- * information for unconnected rdma_cm_id's. The actual operation is
- * based on the rdma_cm_id's port space.
- */
int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+int rdma_connect_locked(struct rdma_cm_id *id,
+ struct rdma_conn_param *conn_param);
int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
struct rdma_ucm_ece *ece);
@@ -250,29 +254,12 @@ int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
*/
int rdma_listen(struct rdma_cm_id *id, int backlog);
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller);
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
-int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller, struct rdma_ucm_ece *ece);
-
-/**
- * rdma_accept - Called to accept a connection request or response.
- * @id: Connection identifier associated with the request.
- * @conn_param: Information needed to establish the connection. This must be
- * provided if accepting a connection request. If accepting a connection
- * response, this parameter must be NULL.
- *
- * Typically, this routine is only called by the listener to accept a connection
- * request. It must also be called on the active side of a connection if the
- * user is performing their own QP transitions.
- *
- * In the case of error, a reject message is sent to the remote side and the
- * state of the qp associated with the id is modified to error, such that any
- * previously posted receive buffers would be flushed.
- */
-#define rdma_accept(id, conn_param) \
- __rdma_accept((id), (conn_param), KBUILD_MODNAME)
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece);
/**
* rdma_notify - Notifies the RDMA CM of an asynchronous event that has
@@ -355,6 +342,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
+
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer);
/**
* rdma_get_service_id - Return the IB service ID for a specified address.
* @id: Communication identifier associated with the address.
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
index eb99856e8b30..45d5481a7846 100644
--- a/include/rdma/rdma_counter.h
+++ b/include/rdma/rdma_counter.h
@@ -40,26 +40,29 @@ struct rdma_counter {
struct rdma_counter_mode mode;
struct mutex lock;
struct rdma_hw_stats *stats;
- u8 port;
+ u32 port;
};
void rdma_counter_init(struct ib_device *dev);
void rdma_counter_release(struct ib_device *dev);
-int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
- bool on, enum rdma_nl_counter_mask mask);
-int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mask mask,
+ struct netlink_ext_ack *extack);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port);
int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
int rdma_counter_query_stats(struct rdma_counter *counter);
-u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index);
-int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index);
+int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id);
-int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
u32 qp_num, u32 *counter_id);
-int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
u32 qp_num, u32 counter_id);
-int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask);
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+ unsigned int index, bool enable);
#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9df71ee..c2a79aeee113 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -30,7 +30,7 @@ enum rdma_nl_flags {
* constant as well and the compiler checks they are the same.
*/
#define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \
- static inline void __chk_##_index(void) \
+ static inline void __maybe_unused __chk_##_index(void) \
{ \
BUILD_BUG_ON(_index != _val); \
} \
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 9fd217b24916..c429d6ddb129 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -92,7 +92,7 @@ struct rvt_ibport {
/*
* The pkey table is allocated and maintained by the driver. Drivers
* need to have access to this before registering with rdmav. However
- * rdmavt will need access to it so drivers need to proviee this during
+ * rdmavt will need access to it so drivers need to provide this during
* the attach port API call.
*/
u16 *pkey_table;
@@ -230,7 +230,7 @@ struct rvt_driver_provided {
void (*do_send)(struct rvt_qp *qp);
/*
- * Returns a pointer to the undelying hardware's PCI device. This is
+ * Returns a pointer to the underlying hardware's PCI device. This is
* used to display information as to what hardware is being referenced
* in an output message
*/
@@ -245,7 +245,7 @@ struct rvt_driver_provided {
void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
/*
- * Init a struture allocated with qp_priv_alloc(). This should be
+ * Init a structure allocated with qp_priv_alloc(). This should be
* called after all qp fields have been initialized in rdmavt.
*/
int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
@@ -257,7 +257,7 @@ struct rvt_driver_provided {
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
/*
- * Inform the driver the particular qp in quesiton has been reset so
+ * Inform the driver the particular qp in question has been reset so
* that it can clean up anything it needs to.
*/
void (*notify_qp_reset)(struct rvt_qp *qp);
@@ -281,7 +281,7 @@ struct rvt_driver_provided {
void (*stop_send_queue)(struct rvt_qp *qp);
/*
- * Have the drivr drain any in progress operations
+ * Have the driver drain any in progress operations
*/
void (*quiesce_qp)(struct rvt_qp *qp);
@@ -309,16 +309,16 @@ struct rvt_driver_provided {
/*
* Query driver for the state of the port.
*/
- int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num,
+ int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num,
struct ib_port_attr *props);
/*
* Tell driver to shutdown a port
*/
- int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num);
+ int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num);
/* Tell driver to send a trap for changed port capabilities */
- void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num);
+ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num);
/*
* The following functions can be safely ignored completely. Any use of
@@ -338,7 +338,7 @@ struct rvt_driver_provided {
/* Let the driver pick the next queue pair number*/
int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
- enum ib_qp_type type, u8 port_num);
+ enum ib_qp_type type, u32 port_num);
/* Determine if its safe or allowed to modify the qp */
int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -445,7 +445,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
* to work by setting the name manually here.
*/
dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
- strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
+ strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
}
/**
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 8275954f5ce6..d67892944193 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -11,6 +11,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
#include <rdma/rvt-abi.h>
+#include <linux/vmalloc.h>
/*
* Atomic bit definitions for r_aflags.
*/
@@ -444,7 +445,7 @@ struct rvt_qp {
/*
* This sge list MUST be last. Do not add anything below here.
*/
- struct rvt_sge r_sg_list[] /* verified SGEs */
+ struct rvt_sge *r_sg_list /* verified SGEs */
____cacheline_aligned_in_smp;
};
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 7682d1bcf789..0d69ded73bf2 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -14,6 +14,9 @@
#include <uapi/rdma/rdma_netlink.h>
#include <linux/xarray.h>
+/* Mark entry as containing driver specific details, it is used to provide QP subtype for now */
+#define RESTRACK_DD XA_MARK_1
+
struct ib_device;
struct sk_buff;
@@ -50,6 +53,10 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_COUNTER,
/**
+ * @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ)
+ */
+ RDMA_RESTRACK_SRQ,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
@@ -68,6 +75,14 @@ struct rdma_restrack_entry {
* As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
*/
bool valid;
+ /**
+ * @no_track: don't add this entry to restrack DB
+ *
+ * This field is used to mark an entry that doesn't need to be added to
+ * internal restrack DB and presented later to the users at the nldev
+ * query stage.
+ */
+ u8 no_track : 1;
/*
* @kref: Protect destroy of the resource
*/
@@ -104,24 +119,13 @@ struct rdma_restrack_entry {
u32 id;
};
-int rdma_restrack_count(struct ib_device *dev,
- enum rdma_restrack_type type);
-
-void rdma_restrack_kadd(struct rdma_restrack_entry *res);
-void rdma_restrack_uadd(struct rdma_restrack_entry *res);
-
-/**
- * rdma_restrack_del() - delete object from the reource tracking database
- * @res: resource entry
- * @type: actual type of object to operate
- */
-void rdma_restrack_del(struct rdma_restrack_entry *res);
-
+int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
+ bool show_details);
/**
* rdma_is_kernel_res() - check the owner of resource
* @res: resource entry
*/
-static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
+static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res)
{
return !res->user;
}
@@ -138,14 +142,6 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
*/
int rdma_restrack_put(struct rdma_restrack_entry *res);
-/**
- * rdma_restrack_set_task() - set the task for this resource
- * @res: resource entry
- * @caller: kernel name, the current task will be used if the caller is NULL.
- */
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller);
-
/*
* Helper functions for rdma drivers when filling out
* nldev driver attributes.
@@ -164,4 +160,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type,
u32 id);
+
+/**
+ * rdma_restrack_no_track() - don't add resource to the DB
+ * @res: resource entry
+ *
+ * Every user of this API should be cross examined.
+ * Probably you don't need to use this function.
+ */
+static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res)
+{
+ res->no_track = true;
+}
+static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res)
+{
+ return !res->no_track;
+}
#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index 6ad9dc836c10..d606cac48233 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -42,29 +42,29 @@ struct rdma_rw_ctx {
};
};
-int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir);
-void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
- struct scatterlist *sg, u32 sg_cnt,
- enum dma_data_direction dir);
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
+ enum dma_data_direction dir);
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey,
enum dma_data_direction dir);
void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
enum dma_data_direction dir);
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
-int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+ u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
-unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index b00270c72740..e6c0de227fad 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -24,6 +24,7 @@ enum uverbs_attr_type {
UVERBS_ATTR_TYPE_PTR_OUT,
UVERBS_ATTR_TYPE_IDR,
UVERBS_ATTR_TYPE_FD,
+ UVERBS_ATTR_TYPE_RAW_FD,
UVERBS_ATTR_TYPE_ENUM_IN,
UVERBS_ATTR_TYPE_IDRS_ARRAY,
};
@@ -435,8 +436,10 @@ struct uapi_definition {
}, \
##__VA_ARGS__
#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \
- UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \
- ##__VA_ARGS__)
+ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, \
+ PTR_IF(IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS), \
+ &UVERBS_OBJECT(_object_enum)), \
+ ##__VA_ARGS__)
/*
* =======================================
@@ -521,6 +524,11 @@ struct uapi_definition {
.u.obj.access = _access, \
__VA_ARGS__ } })
+#define UVERBS_ATTR_RAW_FD(_attr_id, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id), \
+ .attr = { .type = UVERBS_ATTR_TYPE_RAW_FD, __VA_ARGS__ } })
+
#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -621,12 +629,14 @@ struct uverbs_attr {
};
struct uverbs_attr_bundle {
- struct ib_udata driver_udata;
- struct ib_udata ucore;
- struct ib_uverbs_file *ufile;
- struct ib_ucontext *context;
- struct ib_uobject *uobject;
- DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+ struct_group_tagged(uverbs_attr_bundle_hdr, hdr,
+ struct ib_udata driver_udata;
+ struct ib_udata ucore;
+ struct ib_uverbs_file *ufile;
+ struct ib_ucontext *context;
+ struct ib_uobject *uobject;
+ DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+ );
struct uverbs_attr attrs[];
};
@@ -647,12 +657,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
* 'ucontext'.
*
*/
-#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
- (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \
- driver_udata) \
- ->context, \
- drv_dev_struct, member) : \
- (drv_dev_struct *)NULL)
+static inline struct uverbs_attr_bundle *
+rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata)
+{
+ return container_of(udata, struct uverbs_attr_bundle, driver_udata);
+}
+
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
+ (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \
+ drv_dev_struct, member) : (drv_dev_struct *)NULL)
#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
@@ -862,9 +875,24 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
-int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
- size_t idx, s64 lower_bound, u64 upper_bound,
- s64 *def_val);
+
+static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle,
+ size_t n, size_t size)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return ERR_PTR(-EOVERFLOW);
+ return uverbs_zalloc(bundle, bytes);
+}
+
+int _uverbs_get_const_signed(s64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val);
+int _uverbs_get_const_unsigned(u64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 upper_bound, u64 *def_val);
int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
size_t idx, const void *from, size_t size);
#else
@@ -908,27 +936,84 @@ uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
{
return -EINVAL;
}
+static inline int
+_uverbs_get_const_signed(s64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ return -EINVAL;
+}
+static inline int
+_uverbs_get_const_unsigned(u64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 upper_bound, u64 *def_val)
+{
+ return -EINVAL;
+}
#endif
-#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+#define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \
({ \
s64 _val; \
- int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
- type_min(typeof(*_to)), \
- type_max(typeof(*_to)), NULL); \
- (*_to) = _val; \
+ int _ret = \
+ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*(_to))), \
+ type_max(typeof(*(_to))), NULL); \
+ (*(_to)) = _val; \
_ret; \
})
-#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+#define uverbs_get_const_unsigned(_to, _attrs_bundle, _idx) \
+ ({ \
+ u64 _val; \
+ int _ret = \
+ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \
+ type_max(typeof(*(_to))), NULL); \
+ (*(_to)) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, _default) \
({ \
s64 _val; \
s64 _def_val = _default; \
int _ret = \
- _uverbs_get_const(&_val, _attrs_bundle, _idx, \
- type_min(typeof(*_to)), \
- type_max(typeof(*_to)), &_def_val); \
- (*_to) = _val; \
+ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*(_to))), \
+ type_max(typeof(*(_to))), &_def_val); \
+ (*(_to)) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, _default) \
+ ({ \
+ u64 _val; \
+ u64 _def_val = _default; \
+ int _ret = \
+ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \
+ type_max(typeof(*(_to))), &_def_val); \
+ (*(_to)) = _val; \
_ret; \
})
+
+#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+ (is_signed_type(typeof(*(_to))) ? \
+ uverbs_get_const_signed(_to, _attrs_bundle, _idx) : \
+ uverbs_get_const_unsigned(_to, _attrs_bundle, _idx)) \
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+ (is_signed_type(typeof(*(_to))) ? \
+ uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, \
+ _default) : \
+ uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \
+ _default))
+
+static inline int
+uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx)
+{
+ return uverbs_get_const_signed(to, attrs_bundle, idx);
+}
+
#endif
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index f04f5126f61e..ee7873f872c3 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -20,7 +20,7 @@
/* These are static so they do not need to be qualified */
#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
-#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
+#define UVERBS_OBJECT_METHODS(object_id) _UVERBS_NAME(_object_methods_##object_id, __LINE__)
#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \
static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 06db27e35f40..ccd11631c167 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -71,6 +71,8 @@ struct uverbs_obj_type_class {
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs);
void (*remove_handle)(struct ib_uobject *uobj);
+ void (*swap_uobjects)(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new);
};
struct uverbs_obj_type {
@@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
bool hw_obj_valid);
void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
struct uverbs_attr_bundle *attrs);
+void rdma_assign_uobject(struct ib_uobject *to_uobj,
+ struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -138,8 +143,8 @@ struct uverbs_obj_fd_type {
* because the driver is removed or the FD is closed.
*/
struct uverbs_obj_type type;
- int (*destroy_object)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ void (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
const struct file_operations *fops;
const char *name;
int flags;