aboutsummaryrefslogtreecommitdiffstats
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r--include/rdma/ib_verbs.h394
1 files changed, 376 insertions, 18 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 65994a19e840..986fddb08579 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -81,6 +81,13 @@ enum rdma_transport_type {
RDMA_TRANSPORT_USNIC_UDP
};
+enum rdma_protocol_type {
+ RDMA_PROTOCOL_IB,
+ RDMA_PROTOCOL_IBOE,
+ RDMA_PROTOCOL_IWARP,
+ RDMA_PROTOCOL_USNIC_UDP
+};
+
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type);
@@ -166,6 +173,16 @@ struct ib_odp_caps {
} per_transport_caps;
};
+enum ib_cq_creation_flags {
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+};
+
+struct ib_cq_init_attr {
+ unsigned int cqe;
+ int comp_vector;
+ u32 flags;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -210,6 +227,8 @@ struct ib_device_attr {
int sig_prot_cap;
int sig_guard_cap;
struct ib_odp_caps odp_caps;
+ uint64_t timestamp_mask;
+ uint64_t hca_core_clock; /* in KHZ */
};
enum ib_mtu {
@@ -346,6 +365,42 @@ union rdma_protocol_stats {
struct iw_protocol_stats iw;
};
+/* Define bits for the various functionality this port needs to be supported by
+ * the core.
+ */
+/* Management 0x00000FFF */
+#define RDMA_CORE_CAP_IB_MAD 0x00000001
+#define RDMA_CORE_CAP_IB_SMI 0x00000002
+#define RDMA_CORE_CAP_IB_CM 0x00000004
+#define RDMA_CORE_CAP_IW_CM 0x00000008
+#define RDMA_CORE_CAP_IB_SA 0x00000010
+#define RDMA_CORE_CAP_OPA_MAD 0x00000020
+
+/* Address format 0x000FF000 */
+#define RDMA_CORE_CAP_AF_IB 0x00001000
+#define RDMA_CORE_CAP_ETH_AH 0x00002000
+
+/* Protocol 0xFFF00000 */
+#define RDMA_CORE_CAP_PROT_IB 0x00100000
+#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
+#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
+
+#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_SMI \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_IB_SA \
+ | RDMA_CORE_CAP_AF_IB)
+#define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
+ | RDMA_CORE_CAP_IW_CM)
+#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
+ | RDMA_CORE_CAP_OPA_MAD)
+
struct ib_port_attr {
enum ib_port_state state;
enum ib_mtu max_mtu;
@@ -412,6 +467,8 @@ enum ib_event_type {
IB_EVENT_GID_CHANGE,
};
+__attribute_const__ const char *ib_event_msg(enum ib_event_type event);
+
struct ib_event {
struct ib_device *device;
union {
@@ -663,6 +720,8 @@ enum ib_wc_status {
IB_WC_GENERAL_ERR
};
+__attribute_const__ const char *ib_wc_status_msg(enum ib_wc_status status);
+
enum ib_wc_opcode {
IB_WC_SEND,
IB_WC_RDMA_WRITE,
@@ -1407,7 +1466,7 @@ struct ib_flow {
struct ib_uobject *uobject;
};
-struct ib_mad;
+struct ib_mad_hdr;
struct ib_grh;
enum ib_process_mad_flags {
@@ -1474,6 +1533,13 @@ struct ib_dma_mapping_ops {
struct iw_cm_verbs;
+struct ib_port_immutable {
+ int pkey_tbl_len;
+ int gid_tbl_len;
+ u32 core_cap_flags;
+ u32 max_mad_size;
+};
+
struct ib_device {
struct device *dma_device;
@@ -1487,8 +1553,10 @@ struct ib_device {
struct list_head client_data_list;
struct ib_cache cache;
- int *pkey_tbl_len;
- int *gid_tbl_len;
+ /**
+ * port_immutable is indexed by port number
+ */
+ struct ib_port_immutable *port_immutable;
int num_comp_vectors;
@@ -1497,7 +1565,8 @@ struct ib_device {
int (*get_protocol_stats)(struct ib_device *device,
union rdma_protocol_stats *stats);
int (*query_device)(struct ib_device *device,
- struct ib_device_attr *device_attr);
+ struct ib_device_attr *device_attr,
+ struct ib_udata *udata);
int (*query_port)(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr);
@@ -1561,8 +1630,8 @@ struct ib_device {
int (*post_recv)(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
- int comp_vector,
+ struct ib_cq * (*create_cq)(struct ib_device *device,
+ const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
@@ -1637,10 +1706,13 @@ struct ib_device {
int (*process_mad)(struct ib_device *device,
int process_mad_flags,
u8 port_num,
- struct ib_wc *in_wc,
- struct ib_grh *in_grh,
- struct ib_mad *in_mad,
- struct ib_mad *out_mad);
+ const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in_mad,
+ size_t in_mad_size,
+ struct ib_mad_hdr *out_mad,
+ size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
struct ib_ucontext *ucontext,
struct ib_udata *udata);
@@ -1675,6 +1747,14 @@ struct ib_device {
u32 local_dma_lkey;
u8 node_type;
u8 phys_port_cnt;
+
+ /**
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
+ */
+ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
};
struct ib_client {
@@ -1743,6 +1823,284 @@ int ib_query_port(struct ib_device *device,
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
u8 port_num);
+/**
+ * rdma_start_port - Return the first valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return start port number
+ */
+static inline u8 rdma_start_port(const struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+/**
+ * rdma_end_port - Return the last valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return last port number
+ */
+static inline u8 rdma_end_port(const struct ib_device *device)
+{
+ return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+ 0 : device->phys_port_cnt;
+}
+
+static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+}
+
+static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+}
+
+static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+}
+
+static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+}
+
+/**
+ * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Management Datagrams (MAD) are a required part of the InfiniBand
+ * specification and are supported on all InfiniBand devices. A slightly
+ * extended version are also supported on OPA interfaces.
+ *
+ * Return: true if the port supports sending/receiving of MAD packets.
+ */
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+}
+
+/**
+ * rdma_cap_opa_mad - Check if the port of device provides support for OPA
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Intel OmniPath devices extend and/or replace the InfiniBand Management
+ * datagrams with their own versions. These OPA MADs share many but not all of
+ * the characteristics of InfiniBand MADs.
+ *
+ * OPA MADs differ in the following ways:
+ *
+ * 1) MADs are variable size up to 2K
+ * IBTA defined MADs remain fixed at 256 bytes
+ * 2) OPA SMPs must carry valid PKeys
+ * 3) OPA SMP packets are a different format
+ *
+ * Return: true if the port supports OPA MAD packet formats.
+ */
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+{
+ return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
+ == RDMA_CORE_CAP_OPA_MAD;
+}
+
+/**
+ * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
+ * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Each InfiniBand node is required to provide a Subnet Management Agent
+ * that the subnet manager can access. Prior to the fabric being fully
+ * configured by the subnet manager, the SMA is accessed via a well known
+ * interface called the Subnet Management Interface (SMI). This interface
+ * uses directed route packets to communicate with the SM to get around the
+ * chicken and egg problem of the SM needing to know what's on the fabric
+ * in order to configure the fabric, and needing to configure the fabric in
+ * order to send packets to the devices on the fabric. These directed
+ * route packets do not need the fabric fully configured in order to reach
+ * their destination. The SMI is the only method allowed to send
+ * directed route packets on an InfiniBand fabric.
+ *
+ * Return: true if the port provides an SMI.
+ */
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+}
+
+/**
+ * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * The InfiniBand Communication Manager is one of many pre-defined General
+ * Service Agents (GSA) that are accessed via the General Service
+ * Interface (GSI). It's role is to facilitate establishment of connections
+ * between nodes as well as other management related tasks for established
+ * connections.
+ *
+ * Return: true if the port supports an IB CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+}
+
+/**
+ * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Similar to above, but specific to iWARP connections which have a different
+ * managment protocol than InfiniBand.
+ *
+ * Return: true if the port supports an iWARP CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+}
+
+/**
+ * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
+ * Subnet Administration.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * An InfiniBand Subnet Administration (SA) service is a pre-defined General
+ * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand
+ * fabrics, devices should resolve routes to other hosts by contacting the
+ * SA to query the proper route.
+ *
+ * Return: true if the port should act as a client to the fabric Subnet
+ * Administration interface. This does not imply that the SA service is
+ * running locally.
+ */
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+}
+
+/**
+ * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
+ * Multicast.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand multicast registration is more complex than normal IPv4 or
+ * IPv6 multicast registration. Each Host Channel Adapter must register
+ * with the Subnet Manager when it wishes to join a multicast group. It
+ * should do so only once regardless of how many queue pairs it subscribes
+ * to this group. And it should leave the group only after all queue pairs
+ * attached to the group have been detached.
+ *
+ * Return: true if the port must undertake the additional adminstrative
+ * overhead of registering/unregistering with the SM and tracking of the
+ * total number of queue pairs attached to the multicast group.
+ */
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+{
+ return rdma_cap_ib_sa(device, port_num);
+}
+
+/**
+ * rdma_cap_af_ib - Check if the port of device has the capability
+ * Native Infiniband Address.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
+ * GID. RoCE uses a different mechanism, but still generates a GID via
+ * a prescribed mechanism and port specific data.
+ *
+ * Return: true if the port uses a GID address to identify devices on the
+ * network.
+ */
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+}
+
+/**
+ * rdma_cap_eth_ah - Check if the port of device has the capability
+ * Ethernet Address Handle.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
+ * to fabricate GIDs over Ethernet/IP specific addresses native to the
+ * port. Normally, packet headers are generated by the sending host
+ * adapter, but when sending connectionless datagrams, we must manually
+ * inject the proper headers for the fabric we are communicating over.
+ *
+ * Return: true if we are running as a RoCE port and must force the
+ * addition of a Global Route Header built from our Ethernet Address
+ * Handle into our header list for connectionless packets.
+ */
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+}
+
+/**
+ * rdma_cap_read_multi_sge - Check if the port of device has the capability
+ * RDMA Read Multiple Scatter-Gather Entries.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * iWARP has a restriction that RDMA READ requests may only have a single
+ * Scatter/Gather Entry (SGE) in the work request.
+ *
+ * NOTE: although the linux kernel currently assumes all devices are either
+ * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and
+ * WRITEs, according to Tom Talpey, this is not accurate. There are some
+ * devices out there that support more than a single SGE on RDMA READ
+ * requests, but do not support the same number of SGEs as they do on
+ * RDMA WRITE requests. The linux kernel would need rearchitecting to
+ * support these imbalanced READ/WRITE SGEs allowed devices. So, for now,
+ * suffice with either the device supports the same READ/WRITE SGEs, or
+ * it only gets one READ sge.
+ *
+ * Return: true for any device that allows more than one SGE in RDMA READ
+ * requests.
+ */
+static inline bool rdma_cap_read_multi_sge(struct ib_device *device,
+ u8 port_num)
+{
+ return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP);
+}
+
+/**
+ * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
+ *
+ * @device: Device
+ * @port_num: Port number
+ *
+ * This MAD size includes the MAD headers and MAD payload. No other headers
+ * are included.
+ *
+ * Return the max MAD size required by the Port. Will return 0 if the port
+ * does not support MADs
+ */
+static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].max_mad_size;
+}
+
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid);
@@ -1799,8 +2157,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
*/
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
- struct ib_grh *grh, struct ib_ah_attr *ah_attr);
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct ib_ah_attr *ah_attr);
/**
* ib_create_ah_from_wc - Creates an address handle associated with the
@@ -1814,8 +2173,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
- struct ib_grh *grh, u8 port_num);
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
+ const struct ib_grh *grh, u8 port_num);
/**
* ib_modify_ah - Modifies the address vector associated with an address
@@ -2011,16 +2370,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
* asynchronous event not associated with a completion occurs on the CQ.
* @cq_context: Context associated with the CQ returned to the user via
* the associated completion and event handlers.
- * @cqe: The minimum size of the CQ.
- * @comp_vector - Completion vector used to signal completion events.
- * Must be >= 0 and < context->num_comp_vectors.
+ * @cq_attr: The attributes the CQ should be created upon.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
struct ib_cq *ib_create_cq(struct ib_device *device,
ib_comp_handler comp_handler,
void (*event_handler)(struct ib_event *, void *),
- void *cq_context, int cqe, int comp_vector);
+ void *cq_context,
+ const struct ib_cq_init_attr *cq_attr);
/**
* ib_resize_cq - Modifies the capacity of the CQ.