aboutsummaryrefslogtreecommitdiffstats
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--include/rdma/ib_verbs.h1387
1 files changed, 724 insertions, 663 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1f779fad3a1e..975d6e9efbcb 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1,44 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
- * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
-#if !defined(IB_VERBS_H)
+#ifndef IB_VERBS_H
#define IB_VERBS_H
+#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
@@ -75,6 +49,9 @@ struct ib_umem_odp;
struct ib_uqp_object;
struct ib_usrq_object;
struct ib_uwq_object;
+struct rdma_cm_id;
+struct ib_port;
+struct hw_stats_device_data;
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
@@ -100,7 +77,8 @@ void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
__printf(2, 3) __cold
void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#define ibdev_dbg(__dev, format, args...) \
dynamic_ibdev_dbg(__dev, format, ##args)
#else
@@ -133,7 +111,8 @@ do { \
#define ibdev_info_ratelimited(ibdev, fmt, ...) \
ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
-#if defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
/* descriptor check is first to prevent flooding with "callbacks suppressed" */
#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
do { \
@@ -162,10 +141,9 @@ union ib_gid {
extern union ib_gid zgid;
enum ib_gid_type {
- /* If link layer is Ethernet, this is RoCE V1 */
- IB_GID_TYPE_IB = 0,
- IB_GID_TYPE_ROCE = 0,
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
IB_GID_TYPE_SIZE
};
@@ -176,7 +154,7 @@ struct ib_gid_attr {
union ib_gid gid;
enum ib_gid_type gid_type;
u16 index;
- u8 port_num;
+ u32 port_num;
};
enum {
@@ -204,7 +182,7 @@ rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
@@ -214,9 +192,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
- return IB_GID_TYPE_IB;
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
+ return IB_GID_TYPE_ROCE;
+ else
+ return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type
@@ -225,6 +204,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
+ return RDMA_NETWORK_ROCE_V1;
+
if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
@@ -238,32 +220,24 @@ enum rdma_link_layer {
};
enum ib_device_cap_flags {
- IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
- IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
- IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
- IB_DEVICE_RAW_MULTI = (1 << 3),
- IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
- IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
- IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
- IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
- IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
- /* Not in use, former INIT_TYPE = (1 << 9),*/
- IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
- IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
- IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
- IB_DEVICE_SRQ_RESIZE = (1 << 13),
- IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
-
- /*
- * This device supports a per-device lkey or stag that can be
- * used without performing a memory registration for the local
- * memory. Note that ULPs should never check this flag, but
- * instead of use the local_dma_lkey flag in the ib_pd structure,
- * which will always contain a usable lkey.
- */
- IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
- /* Reserved, old SEND_W_INV = (1 << 16),*/
- IB_DEVICE_MEM_WINDOW = (1 << 17),
+ IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR,
+ IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR,
+ IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR,
+ IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI,
+ IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG,
+ IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT,
+ IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE,
+ IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD,
+ IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT,
+ /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */
+ IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT,
+ IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID,
+ IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN,
+ IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE,
+ IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ,
+
+ /* Reserved, old SEND_W_INV = 1 << 16,*/
+ IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW,
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -271,9 +245,8 @@ enum ib_device_cap_flags {
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
- IB_DEVICE_UD_IP_CSUM = (1 << 18),
- IB_DEVICE_UD_TSO = (1 << 19),
- IB_DEVICE_XRC = (1 << 20),
+ IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM,
+ IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC,
/*
* This device supports the IB "base memory management extension",
@@ -284,31 +257,53 @@ enum ib_device_cap_flags {
* IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
* stag.
*/
- IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
- IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
- IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS,
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A,
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B,
+ IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM,
/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
- IB_DEVICE_RAW_IP_CSUM = (1 << 26),
- /*
- * Devices should set IB_DEVICE_CROSS_CHANNEL if they
- * support execution of WQEs that involve synchronization
- * of I/O operations with single completion queue managed
- * by hardware.
- */
- IB_DEVICE_CROSS_CHANNEL = (1 << 27),
- IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
- IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
- IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
- IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
- IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
+ IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM,
+ IB_DEVICE_MANAGED_FLOW_STEERING =
+ IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING,
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
- IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
+ IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS,
/* The device supports padding incoming writes to cacheline. */
- IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
- IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
+ IB_DEVICE_PCI_WRITE_END_PADDING =
+ IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
+};
+
+enum ib_kernel_cap_flags {
+ /*
+ * This device supports a per-device lkey or stag that can be
+ * used without performing a memory registration for the local
+ * memory. Note that ULPs should never check this flag, but
+ * instead of use the local_dma_lkey flag in the ib_pd structure,
+ * which will always contain a usable lkey.
+ */
+ IBK_LOCAL_DMA_LKEY = 1 << 0,
+ /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */
+ IBK_INTEGRITY_HANDOVER = 1 << 1,
+ /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */
+ IBK_ON_DEMAND_PAGING = 1 << 2,
+ /* IB_MR_TYPE_SG_GAPS is supported */
+ IBK_SG_GAPS_REG = 1 << 3,
+ /* Driver supports RDMA_NLDEV_CMD_DELLINK */
+ IBK_ALLOW_USER_UNREG = 1 << 4,
+
+ /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */
+ IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5,
+ /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */
+ IBK_UD_TSO = 1 << 6,
+ /* iopib will use the device ops:
+ * get_vf_config
+ * get_vf_guid
+ * get_vf_stats
+ * set_vf_guid
+ * set_vf_link_state
+ */
+ IBK_VIRTUAL_FUNCTION = 1 << 7,
+ /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */
+ IBK_RDMA_NETDEV_OPA = 1 << 8,
};
enum ib_atomic_cap {
@@ -407,6 +402,7 @@ struct ib_device_attr {
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
+ u64 kernel_cap_flags;
int max_send_sge;
int max_recv_sge;
int max_sge_rd;
@@ -430,8 +426,6 @@ struct ib_device_attr {
int max_mcast_qp_attach;
int max_total_mcast_qp_attach;
int max_ah;
- int max_fmr;
- int max_map_per_fmr;
int max_srq;
int max_srq_wr;
int max_srq_sge;
@@ -462,6 +456,11 @@ enum ib_mtu {
IB_MTU_4096 = 5
};
+enum opa_mtu {
+ OPA_MTU_8192 = 6,
+ OPA_MTU_10240 = 7
+};
+
static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
{
switch (mtu) {
@@ -488,6 +487,28 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
return IB_MTU_256;
}
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
+{
+ switch (mtu) {
+ case OPA_MTU_8192:
+ return 8192;
+ case OPA_MTU_10240:
+ return 10240;
+ default:
+ return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
+ }
+}
+
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
+{
+ if (mtu >= 10240)
+ return OPA_MTU_10240;
+ else if (mtu >= 8192)
+ return OPA_MTU_8192;
+ else
+ return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
+}
+
enum ib_port_state {
IB_PORT_NOP = 0,
IB_PORT_DOWN = 1,
@@ -534,21 +555,40 @@ enum ib_port_speed {
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
- IB_SPEED_HDR = 64
+ IB_SPEED_HDR = 64,
+ IB_SPEED_NDR = 128,
+};
+
+enum ib_stat_flag {
+ IB_STAT_FLAG_OPTIONAL = 1 << 0,
+};
+
+/**
+ * struct rdma_stat_desc
+ * @name - The name of the counter
+ * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
+ * @priv - Driver private information; Core code should not use
+ */
+struct rdma_stat_desc {
+ const char *name;
+ unsigned int flags;
+ const void *priv;
};
/**
* struct rdma_hw_stats
* @lock - Mutex to protect parallel write access to lifespan and values
- * of counters, which are 64bits and not guaranteeed to be written
+ * of counters, which are 64bits and not guaranteed to be written
* atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
* to 10 milliseconds, drivers can override the default be specifying
* their own value during their allocation routine.
- * @name - Array of pointers to static names used for the counters in
- * directory.
+ * @descs - Array of pointers to static descriptors used for the counters
+ * in directory.
+ * @is_disabled - A bitmap to indicate each counter is currently disabled
+ * or not.
* @num_counters - How many hardware counters there are. If name is
* shorter than this number, a kernel oops will result. Driver authors
* are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
@@ -560,36 +600,19 @@ struct rdma_hw_stats {
struct mutex lock; /* Protect lifespan and values[] */
unsigned long timestamp;
unsigned long lifespan;
- const char * const *names;
+ const struct rdma_stat_desc *descs;
+ unsigned long *is_disabled;
int num_counters;
u64 value[];
};
#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
-/**
- * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
- * for drivers.
- * @names - Array of static const char *
- * @num_counters - How many elements in array
- * @lifespan - How many milliseconds between updates
- */
-static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
- const char * const *names, int num_counters,
- unsigned long lifespan)
-{
- struct rdma_hw_stats *stats;
- stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
- GFP_KERNEL);
- if (!stats)
- return NULL;
- stats->names = names;
- stats->num_counters = num_counters;
- stats->lifespan = msecs_to_jiffies(lifespan);
-
- return stats;
-}
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+ const struct rdma_stat_desc *descs, int num_counters,
+ unsigned long lifespan);
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats);
/* Define bits for the various functionality this port needs to be supported by
* the core.
@@ -651,6 +674,7 @@ struct ib_port_attr {
enum ib_port_state state;
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
+ u32 phys_mtu;
int gid_tbl_len;
unsigned int ip_gids:1;
/* This is the value from PortInfo CapabilityMask, defined by IBA */
@@ -667,7 +691,7 @@ struct ib_port_attr {
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
u16 port_cap_flags2;
};
@@ -729,7 +753,7 @@ struct ib_event {
struct ib_qp *qp;
struct ib_srq *srq;
struct ib_wq *wq;
- u8 port_num;
+ u32 port_num;
} element;
enum ib_event_type event;
};
@@ -880,6 +904,12 @@ struct ib_mr_status {
*/
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+struct rdma_ah_init_attr {
+ struct rdma_ah_attr *ah_attr;
+ u32 flags;
+ struct net_device *xmit_slave;
+};
+
enum rdma_ah_attr_type {
RDMA_AH_ATTR_TYPE_UNDEFINED,
RDMA_AH_ATTR_TYPE_IB,
@@ -906,7 +936,7 @@ struct rdma_ah_attr {
struct ib_global_route grh;
u8 sl;
u8 static_rate;
- u8 port_num;
+ u32 port_num;
u8 ah_flags;
enum rdma_ah_attr_type type;
union {
@@ -944,13 +974,14 @@ enum ib_wc_status {
const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
- IB_WC_SEND,
- IB_WC_RDMA_WRITE,
- IB_WC_RDMA_READ,
- IB_WC_COMP_SWAP,
- IB_WC_FETCH_ADD,
- IB_WC_LSO,
- IB_WC_LOCAL_INV,
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
@@ -992,7 +1023,7 @@ struct ib_wc {
u16 pkey_index;
u8 sl;
u8 dlid_path_bits;
- u8 port_num; /* valid only for DR SMPs on switches */
+ u32 port_num; /* valid only for DR SMPs on switches */
u8 smac[ETH_ALEN];
u16 vlan_id;
u8 network_hdr_type;
@@ -1006,9 +1037,9 @@ enum ib_cq_notify_flags {
};
enum ib_srq_type {
- IB_SRQT_BASIC,
- IB_SRQT_XRC,
- IB_SRQT_TM,
+ IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC,
+ IB_SRQT_XRC = IB_UVERBS_SRQT_XRC,
+ IB_SRQT_TM = IB_UVERBS_SRQT_TM,
};
static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
@@ -1077,16 +1108,16 @@ enum ib_qp_type {
IB_QPT_SMI,
IB_QPT_GSI,
- IB_QPT_RC,
- IB_QPT_UC,
- IB_QPT_UD,
+ IB_QPT_RC = IB_UVERBS_QPT_RC,
+ IB_QPT_UC = IB_UVERBS_QPT_UC,
+ IB_QPT_UD = IB_UVERBS_QPT_UD,
IB_QPT_RAW_IPV6,
IB_QPT_RAW_ETHERTYPE,
- IB_QPT_RAW_PACKET = 8,
- IB_QPT_XRC_INI = 9,
- IB_QPT_XRC_TGT,
+ IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET,
+ IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI,
+ IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT,
IB_QPT_MAX,
- IB_QPT_DRIVER = 0xFF,
+ IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
* IB_QPT_MAX usages should not be affected in the core layer
@@ -1105,17 +1136,21 @@ enum ib_qp_type {
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK =
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
IB_QP_CREATE_MANAGED_SEND = 1 << 3,
IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_INTEGRITY_EN = 1 << 6,
- /* FREE = 1 << 7, */
- IB_QP_CREATE_SCATTER_FCS = 1 << 8,
- IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
+ IB_QP_CREATE_NETDEV_USE = 1 << 7,
+ IB_QP_CREATE_SCATTER_FCS =
+ IB_UVERBS_QP_CREATE_SCATTER_FCS,
+ IB_QP_CREATE_CVLAN_STRIPPING =
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING,
IB_QP_CREATE_SOURCE_QPN = 1 << 10,
- IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING =
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1143,7 +1178,7 @@ struct ib_qp_init_attr {
/*
* Only needed for special QP types, or when using the RW API.
*/
- u8 port_num;
+ u32 port_num;
struct ib_rwq_ind_table *rwq_ind_tbl;
u32 source_qpn;
};
@@ -1217,6 +1252,8 @@ enum ib_qp_attr_mask {
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
IB_QP_RATE_LIMIT = (1<<25),
+
+ IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
};
enum ib_qp_state {
@@ -1260,13 +1297,14 @@ struct ib_qp_attr {
u8 max_rd_atomic;
u8 max_dest_rd_atomic;
u8 min_rnr_timer;
- u8 port_num;
+ u32 port_num;
u8 timeout;
u8 retry_cnt;
u8 rnr_retry;
- u8 alt_port_num;
+ u32 alt_port_num;
u8 alt_timeout;
u32 rate_limit;
+ struct net_device *xmit_slave;
};
enum ib_wr_opcode {
@@ -1278,6 +1316,7 @@ enum ib_wr_opcode {
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
@@ -1379,7 +1418,7 @@ struct ib_ud_wr {
u32 remote_qpn;
u32 remote_qkey;
u16 pkey_index; /* valid for GSI only */
- u8 port_num; /* valid for DR SMPs on switch only */
+ u32 port_num; /* valid for DR SMPs on switch only */
};
static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
@@ -1436,12 +1475,6 @@ enum ib_mr_rereg_flags {
IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
};
-struct ib_fmr_attr {
- int max_pages;
- int max_maps;
- u8 page_shift;
-};
-
struct ib_umem;
enum rdma_remove_reason {
@@ -1456,6 +1489,8 @@ enum rdma_remove_reason {
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
+ /* The driver failed to destroy the uobject and is being disconnected */
+ RDMA_REMOVE_DRIVER_FAILURE,
};
struct ib_rdmacg_object {
@@ -1467,14 +1502,6 @@ struct ib_rdmacg_object {
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- /*
- * 'closing' can be read by the driver only during a destroy callback,
- * it is set when we are closing the file descriptor and indicates
- * that mm_sem may be locked.
- */
- bool closing;
-
- bool cleanup_retryable;
struct ib_rdmacg_object cg_obj;
/*
@@ -1528,9 +1555,8 @@ struct ib_xrcd {
struct ib_device *device;
atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
-
- struct mutex tgt_qp_mutex;
- struct list_head tgt_qp_list;
+ struct rw_semaphore tgt_qps_rwsem;
+ struct xarray tgt_qps;
};
struct ib_ah {
@@ -1544,10 +1570,12 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
- IB_POLL_DIRECT, /* caller context, no hw completions */
IB_POLL_SOFTIRQ, /* poll from softirq context */
IB_POLL_WORKQUEUE, /* poll from workqueue */
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
+ IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE,
+
+ IB_POLL_DIRECT, /* caller context, no hw completions */
};
struct ib_cq {
@@ -1557,9 +1585,11 @@ struct ib_cq {
void (*event_handler)(struct ib_event *, void *);
void *cq_context;
int cqe;
+ unsigned int cqe_used;
atomic_t usecnt; /* count number of work queues */
enum ib_poll_context poll_ctx;
struct ib_wc *wc;
+ struct list_head pool_entry;
union {
struct irq_poll iop;
struct work_struct work;
@@ -1569,7 +1599,9 @@ struct ib_cq {
/* updated only by trace points */
ktime_t timestamp;
- bool interrupt;
+ u8 interrupt:1;
+ u8 shared:1;
+ unsigned int comp_vector;
/*
* Implementation details of the RDMA core, don't use in drivers:
@@ -1595,26 +1627,35 @@ struct ib_srq {
} xrc;
};
} ext;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
enum ib_raw_packet_caps {
- /* Strip cvlan from incoming packet and report it in the matching work
+ /*
+ * Strip cvlan from incoming packet and report it in the matching work
* completion is supported.
*/
- IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
- /* Scatter FCS field of an incoming packet to host memory is supported.
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING =
+ IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING,
+ /*
+ * Scatter FCS field of an incoming packet to host memory is supported.
*/
- IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
+ IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS,
/* Checksum offloads are supported (for both send and receive). */
- IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
- /* When a packet is received for an RQ with no receive WQEs, the
+ IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM,
+ /*
+ * When a packet is received for an RQ with no receive WQEs, the
* packet processing is delayed.
*/
- IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3),
+ IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP,
};
enum ib_wq_type {
- IB_WQT_RQ
+ IB_WQT_RQ = IB_UVERBS_WQT_RQ,
};
enum ib_wq_state {
@@ -1637,10 +1678,11 @@ struct ib_wq {
};
enum ib_wq_flags {
- IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
- IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
- IB_WQ_FLAGS_DELAY_DROP = 1 << 2,
- IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3,
+ IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING,
+ IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS,
+ IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP,
+ IB_WQ_FLAGS_PCI_WRITE_END_PADDING =
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING,
};
struct ib_wq_init_attr {
@@ -1692,7 +1734,7 @@ struct ib_qp_security;
struct ib_port_pkey {
enum port_pkey_state state;
u16 pkey_index;
- u8 port_num;
+ u32 port_num;
struct list_head qp_list;
struct list_head to_error_list;
struct ib_qp_security *sec;
@@ -1753,7 +1795,7 @@ struct ib_qp {
enum ib_qp_type qp_type;
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_qp_security *qp_sec;
- u8 port;
+ u32 port;
bool integrity_en;
/*
@@ -1804,14 +1846,6 @@ struct ib_mw {
enum ib_mw_type type;
};
-struct ib_fmr {
- struct ib_device *device;
- struct ib_pd *pd;
- struct list_head list;
- u32 lkey;
- u32 rkey;
-};
-
/* Supported steering options */
enum ib_flow_attr_type {
/* steering according to rule specifications */
@@ -1853,17 +1887,6 @@ enum ib_flow_spec_type {
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
- IB_FLOW_DOMAIN_USER,
- IB_FLOW_DOMAIN_ETHTOOL,
- IB_FLOW_DOMAIN_RFS,
- IB_FLOW_DOMAIN_NIC,
- IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
@@ -1876,7 +1899,7 @@ struct ib_flow_eth_filter {
__be16 ether_type;
__be16 vlan_tag;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_eth {
@@ -1890,7 +1913,7 @@ struct ib_flow_ib_filter {
__be16 dlid;
__u8 sl;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ib {
@@ -1915,7 +1938,7 @@ struct ib_flow_ipv4_filter {
u8 ttl;
u8 flags;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ipv4 {
@@ -1933,7 +1956,7 @@ struct ib_flow_ipv6_filter {
u8 traffic_class;
u8 hop_limit;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ipv6 {
@@ -1947,7 +1970,7 @@ struct ib_flow_tcp_udp_filter {
__be16 dst_port;
__be16 src_port;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_tcp_udp {
@@ -1959,7 +1982,7 @@ struct ib_flow_spec_tcp_udp {
struct ib_flow_tunnel_filter {
__be32 tunnel_id;
- u8 real_sz[0];
+ u8 real_sz[];
};
/* ib_flow_spec_tunnel describes the Vxlan tunnel
@@ -1976,7 +1999,7 @@ struct ib_flow_esp_filter {
__be32 spi;
__be32 seq;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_esp {
@@ -1991,7 +2014,7 @@ struct ib_flow_gre_filter {
__be16 protocol;
__be32 key;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_gre {
@@ -2004,7 +2027,7 @@ struct ib_flow_spec_gre {
struct ib_flow_mpls_filter {
__be32 tag;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_mpls {
@@ -2068,7 +2091,7 @@ struct ib_flow_attr {
u16 priority;
u32 flags;
u8 num_of_specs;
- u8 port;
+ u32 port;
union ib_flow_spec flows[];
};
@@ -2137,7 +2160,6 @@ struct ib_flow_action {
};
struct ib_mad;
-struct ib_grh;
enum ib_process_mad_flags {
IB_MAD_IGNORE_MKEY = 1,
@@ -2173,15 +2195,17 @@ struct ib_port_data {
struct ib_port_immutable immutable;
spinlock_t pkey_list_lock;
+
+ spinlock_t netdev_lock;
+
struct list_head pkey_list;
struct ib_port_cache cache;
- spinlock_t netdev_lock;
struct net_device __rcu *netdev;
struct hlist_node ndev_hash_link;
struct rdma_port_counter port_counter;
- struct rdma_hw_stats *hw_stats;
+ struct ib_port *sysfs;
};
/* rdma netdev type - specifies protocol type */
@@ -2197,7 +2221,8 @@ enum rdma_netdev_t {
struct rdma_netdev {
void *clnt_priv;
struct ib_device *hca;
- u8 port_num;
+ u32 port_num;
+ int mtu;
/*
* cleanup function must be specified.
@@ -2217,6 +2242,8 @@ struct rdma_netdev {
int set_qkey, u32 qkey);
int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
union ib_gid *gid, u16 mlid);
+ /* timeout */
+ void (*tx_timeout)(struct net_device *dev, unsigned int txqueue);
};
struct rdma_netdev_alloc_params {
@@ -2225,13 +2252,14 @@ struct rdma_netdev_alloc_params {
unsigned int rxqs;
void *param;
- int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+ int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num,
struct net_device *netdev, void *param);
};
struct ib_odp_counters {
atomic64_t faults;
atomic64_t invalidations;
+ atomic64_t prefetch;
};
struct ib_counters {
@@ -2259,8 +2287,13 @@ struct iw_cm_conn_param;
!__same_type(((struct drv_struct *)NULL)->member, \
struct ib_struct)))
-#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
- ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+ gfp, false))
+
+#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type) \
+ ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \
+ GFP_KERNEL, true))
#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
@@ -2293,6 +2326,14 @@ struct ib_device_ops {
u32 uverbs_abi_ver;
unsigned int uverbs_no_driver_id_binding:1;
+ /*
+ * NOTE: New drivers should not make use of device_group; instead new
+ * device parameter should be exposed via netlink command. This
+ * mechanism exists only for existing drivers.
+ */
+ const struct attribute_group *device_group;
+ const struct attribute_group **port_groups;
+
int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
@@ -2302,12 +2343,11 @@ struct ib_device_ops {
int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
- int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
int (*post_srq_recv)(struct ib_srq *srq,
const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int (*process_mad)(struct ib_device *device, int process_mad_flags,
- u8 port_num, const struct ib_wc *in_wc,
+ u32 port_num, const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad *in_mad, struct ib_mad *out_mad,
size_t *out_mad_size, u16 *out_mad_pkey_index);
@@ -2319,9 +2359,9 @@ struct ib_device_ops {
void (*get_dev_fw_str)(struct ib_device *device, char *str);
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
- int (*query_port)(struct ib_device *device, u8 port_num,
+ int (*query_port)(struct ib_device *device, u32 port_num,
struct ib_port_attr *port_attr);
- int (*modify_port)(struct ib_device *device, u8 port_num,
+ int (*modify_port)(struct ib_device *device, u32 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify);
/**
@@ -2330,10 +2370,10 @@ struct ib_device_ops {
* structure to avoid cache line misses when accessing struct ib_device
* in fast paths.
*/
- int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+ int (*get_port_immutable)(struct ib_device *device, u32 port_num,
struct ib_port_immutable *immutable);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
- u8 port_num);
+ u32 port_num);
/**
* When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num or NULL if such
@@ -2342,7 +2382,8 @@ struct ib_device_ops {
* that this function returns NULL before the net device has finished
* NETDEV_UNREGISTER state.
*/
- struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+ struct net_device *(*get_netdev)(struct ib_device *device,
+ u32 port_num);
/**
* rdma netdev operation
*
@@ -2350,11 +2391,11 @@ struct ib_device_ops {
* must return -EOPNOTSUPP if it doesn't support the specified type.
*/
struct net_device *(*alloc_rdma_netdev)(
- struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+ struct ib_device *device, u32 port_num, enum rdma_netdev_t type,
const char *name, unsigned char name_assign_type,
void (*setup)(struct net_device *));
- int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+ int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params);
/**
@@ -2362,7 +2403,7 @@ struct ib_device_ops {
* link layer is either IB or iWarp. It is no-op if @port_num port
* is RoCE link layer.
*/
- int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+ int (*query_gid)(struct ib_device *device, u32 port_num, int index,
union ib_gid *gid);
/**
* When calling add_gid, the HW vendor's driver should add the gid
@@ -2387,7 +2428,7 @@ struct ib_device_ops {
* This function is only called when roce_gid_table is used.
*/
int (*del_gid)(const struct ib_gid_attr *attr, void **context);
- int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+ int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index,
u16 *pkey);
int (*alloc_ucontext)(struct ib_ucontext *context,
struct ib_udata *udata);
@@ -2402,12 +2443,14 @@ struct ib_device_ops {
void (*mmap_free)(struct rdma_user_mmap_entry *entry);
void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
- int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
+ int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
- void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
int (*create_srq)(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -2415,10 +2458,9 @@ struct ib_device_ops {
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
- void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
- struct ib_qp *(*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -2427,18 +2469,23 @@ struct ib_device_ops {
int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
- void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+ struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr, int fd,
+ int mr_access_flags,
+ struct ib_udata *udata);
+ struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
u32 max_num_data_sg,
u32 max_num_meta_sg);
@@ -2446,58 +2493,49 @@ struct ib_device_ops {
enum ib_uverbs_advise_mr_advice advice, u32 flags,
struct ib_sge *sg_list, u32 num_sge,
struct uverbs_attr_bundle *attrs);
+
+ /*
+ * Kernel users should universally support relaxed ordering (RO), as
+ * they are designed to read data only after observing the CQE and use
+ * the DMA API correctly.
+ *
+ * Some drivers implicitly enable RO if platform supports it.
+ */
int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
- struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
int (*dealloc_mw)(struct ib_mw *mw);
- struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
- int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
- u64 iova);
- int (*unmap_fmr)(struct list_head *fmr_list);
- int (*dealloc_fmr)(struct ib_fmr *fmr);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
- struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
- struct ib_udata *udata);
+ int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
struct ib_flow *(*create_flow)(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata);
+ struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
- struct ib_flow_action *(*create_flow_action_esp)(
- struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(
- struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port,
int state);
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_config)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *ivf);
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_stats)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats);
- int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
+ int (*get_vf_guid)(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid);
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid,
int type);
struct ib_wq *(*create_wq)(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
- void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask, struct ib_udata *udata);
- struct ib_rwq_ind_table *(*create_rwq_ind_table)(
- struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
struct ib_dm *(*alloc_dm)(struct ib_device *device,
struct ib_ucontext *context,
@@ -2507,8 +2545,8 @@ struct ib_device_ops {
struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
- struct ib_counters *(*create_counters)(
- struct ib_device *device, struct uverbs_attr_bundle *attrs);
+ int (*create_counters)(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs);
int (*destroy_counters)(struct ib_counters *counters);
int (*read_counters)(struct ib_counters *counters,
struct ib_counters_read_attr *counters_read_attr,
@@ -2519,13 +2557,14 @@ struct ib_device_ops {
unsigned int *meta_sg_offset);
/**
- * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
- * driver initialized data. The struct is kfree()'ed by the sysfs
- * core when the device is removed. A lifespan of -1 in the return
- * struct tells the core to set a default lifespan.
+ * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
+ * fill in the driver initialized data. The struct is kfree()'ed by
+ * the sysfs core when the device is removed. A lifespan of -1 in the
+ * return struct tells the core to set a default lifespan.
*/
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
- u8 port_num);
+ struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
+ struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
+ u32 port_num);
/**
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
@@ -2539,18 +2578,25 @@ struct ib_device_ops {
* one given in index at their option
*/
int (*get_hw_stats)(struct ib_device *device,
- struct rdma_hw_stats *stats, u8 port, int index);
- /*
- * This function is called once for each port when a ib device is
- * registered.
+ struct rdma_hw_stats *stats, u32 port, int index);
+
+ /**
+ * modify_hw_stat - Modify the counter configuration
+ * @enable: true/false when enable/disable a counter
+ * Return codes - 0 on success or error code otherwise.
*/
- int (*init_port)(struct ib_device *device, u8 port_num,
- struct kobject *port_sysfs);
+ int (*modify_hw_stat)(struct ib_device *device, u32 port,
+ unsigned int counter_index, bool enable);
/**
* Allows rdma drivers to add their own restrack attributes.
*/
- int (*fill_res_entry)(struct sk_buff *msg,
- struct rdma_restrack_entry *entry);
+ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+ int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
+ int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
+ int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
+ int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
+ int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
+ int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
/* Device lifecycle callbacks */
/*
@@ -2605,14 +2651,28 @@ struct ib_device_ops {
* Allows rdma drivers to add their own restrack attributes
* dumped via 'rdma stat' iproute2 command.
*/
- int (*fill_stat_entry)(struct sk_buff *msg,
- struct rdma_restrack_entry *entry);
+ int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+
+ /* query driver for its ucontext properties */
+ int (*query_ucontext)(struct ib_ucontext *context,
+ struct uverbs_attr_bundle *attrs);
+
+ /*
+ * Provide NUMA node. This API exists for rdmavt/hfi1 only.
+ * Everyone else relies on Linux memory management model.
+ */
+ int (*get_numa_node)(struct ib_device *dev);
DECLARE_RDMA_OBJ_SIZE(ib_ah);
+ DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_qp);
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
DECLARE_RDMA_OBJ_SIZE(ib_srq);
DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+ DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
};
struct ib_core_device {
@@ -2659,14 +2719,14 @@ struct ib_device {
struct ib_core_device coredev;
};
- /* First group for device attributes,
- * Second group for driver provided attributes (optional).
- * It is NULL terminated array.
+ /* First group is for device attributes,
+ * Second group is for driver provided attributes (optional).
+ * Third group is for the hw_stats
+ * It is a NULL terminated array.
*/
- const struct attribute_group *groups[3];
+ const struct attribute_group *groups[4];
u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
char node_desc[IB_DEVICE_NODE_DESC_MAX];
__be64 node_guid;
@@ -2677,16 +2737,19 @@ struct ib_device {
/* CQ adaptive moderation (RDMA DIM) */
u16 use_cq_dim:1;
u8 node_type;
- u8 phys_port_cnt;
+ u32 phys_port_cnt;
struct ib_device_attr attrs;
- struct attribute_group *hw_stats_ag;
- struct rdma_hw_stats *hw_stats;
+ struct hw_stats_device_data *hw_stats_data;
#ifdef CONFIG_CGROUP_RDMA
struct rdmacg_device cg_device;
#endif
u32 index;
+
+ spinlock_t cq_pools_lock;
+ struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1];
+
struct rdma_restrack_root *res;
const struct uapi_definition *driver_def;
@@ -2709,12 +2772,22 @@ struct ib_device {
/* Used by iWarp CM */
char iw_ifname[IFNAMSIZ];
u32 iw_driver_flags;
+ u32 lag_flags;
};
+static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
+ gfp_t gfp, bool is_numa_aware)
+{
+ if (is_numa_aware && dev->ops.get_numa_node)
+ return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev));
+
+ return kzalloc(size, gfp);
+}
+
struct ib_client_nl_info;
struct ib_client {
const char *name;
- void (*add) (struct ib_device *);
+ int (*add)(struct ib_device *ibdev);
void (*remove)(struct ib_device *, void *client_data);
void (*rename)(struct ib_device *dev, void *client_data);
int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
@@ -2738,7 +2811,7 @@ struct ib_client {
* netdev. */
struct net_device *(*get_net_dev_by_params)(
struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr,
@@ -2778,7 +2851,8 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device, const char *name);
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device);
void ib_unregister_device(struct ib_device *device);
void ib_unregister_driver(enum rdma_driver_id driver_id);
void ib_unregister_device_and_put(struct ib_device *device);
@@ -2850,6 +2924,15 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
size_t length, u32 min_pgoff,
u32 max_pgoff);
+static inline int
+rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 pgoff)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff,
+ pgoff);
+}
+
struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
unsigned long pgoff);
@@ -2896,46 +2979,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
- * ib_is_destroy_retryable - Check whether the uobject destruction
- * is retryable.
- * @ret: The initial destruction return code
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * This function is a helper function that IB layer and low-level drivers
- * can use to consider whether the destruction of the given uobject is
- * retry-able.
- * It checks the original return code, if it wasn't success the destruction
- * is retryable according to the ucontext state (i.e. cleanup_retryable) and
- * the remove reason. (i.e. why).
- * Must be called with the object locked for destroy.
- */
-static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- return ret && (why == RDMA_REMOVE_DESTROY ||
- uobj->context->cleanup_retryable);
-}
-
-/**
- * ib_destroy_usecnt - Called during destruction to check the usecnt
- * @usecnt: The usecnt atomic
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * Non-zero usecnts will block destruction unless destruction was triggered by
- * a ucontext cleanup.
- */
-static inline int ib_destroy_usecnt(atomic_t *usecnt,
- enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
- return -EBUSY;
- return 0;
-}
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -2958,10 +3001,10 @@ void ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(const struct ib_event *event);
int ib_query_port(struct ib_device *device,
- u8 port_num, struct ib_port_attr *port_attr);
+ u32 port_num, struct ib_port_attr *port_attr);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
- u8 port_num);
+ u32 port_num);
/**
* rdma_cap_ib_switch - Check if the device is IB switch
@@ -2985,7 +3028,7 @@ static inline bool rdma_cap_ib_switch(const struct ib_device *device)
*
* Return start port number
*/
-static inline u8 rdma_start_port(const struct ib_device *device)
+static inline u32 rdma_start_port(const struct ib_device *device)
{
return rdma_cap_ib_switch(device) ? 0 : 1;
}
@@ -2996,9 +3039,10 @@ static inline u8 rdma_start_port(const struct ib_device *device)
* @iter - The unsigned int to store the port number
*/
#define rdma_for_each_port(device, iter) \
- for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
- unsigned int, iter))); \
- iter <= rdma_end_port(device); (iter)++)
+ for (iter = rdma_start_port(device + \
+ BUILD_BUG_ON_ZERO(!__same_type(u32, \
+ iter))); \
+ iter <= rdma_end_port(device); iter++)
/**
* rdma_end_port - Return the last valid port number for the device
@@ -3008,7 +3052,7 @@ static inline u8 rdma_start_port(const struct ib_device *device)
*
* Return last port number
*/
-static inline u8 rdma_end_port(const struct ib_device *device)
+static inline u32 rdma_end_port(const struct ib_device *device)
{
return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
}
@@ -3021,55 +3065,63 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
}
static inline bool rdma_is_grh_required(const struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_PORT_IB_GRH_REQUIRED;
}
-static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_ib(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_IB;
}
-static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
(RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
}
-static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
}
-static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_ROCE;
}
-static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_iwarp(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_IWARP;
}
-static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+static inline bool rdma_ib_or_roce(const struct ib_device *device,
+ u32 port_num)
{
return rdma_protocol_ib(device, port_num) ||
rdma_protocol_roce(device, port_num);
}
-static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_RAW_PACKET;
}
-static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+static inline bool rdma_protocol_usnic(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_PROT_USNIC;
@@ -3087,7 +3139,7 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n
*
* Return: true if the port supports sending/receiving of MAD packets.
*/
-static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_MAD;
@@ -3112,7 +3164,7 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
*
* Return: true if the port supports OPA MAD packet formats.
*/
-static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_MAD;
@@ -3138,7 +3190,7 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
*
* Return: true if the port provides an SMI.
*/
-static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_SMI;
@@ -3159,7 +3211,7 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
* Return: true if the port supports an IB CM (this does not guarantee that
* a CM is actually running however).
*/
-static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_CM;
@@ -3177,7 +3229,7 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
* Return: true if the port supports an iWARP CM (this does not guarantee that
* a CM is actually running however).
*/
-static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IW_CM;
@@ -3198,7 +3250,7 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
* Administration interface. This does not imply that the SA service is
* running locally.
*/
-static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_IB_SA;
@@ -3221,7 +3273,8 @@ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
* overhead of registering/unregistering with the SM and tracking of the
* total number of queue pairs attached to the multicast group.
*/
-static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device,
+ u32 port_num)
{
return rdma_cap_ib_sa(device, port_num);
}
@@ -3239,7 +3292,7 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num
* Return: true if the port uses a GID address to identify devices on the
* network.
*/
-static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_AF_IB;
@@ -3261,7 +3314,7 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
* addition of a Global Route Header built from our Ethernet Address
* Handle into our header list for connectionless packets.
*/
-static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num)
{
return device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_ETH_AH;
@@ -3276,7 +3329,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
* Return: true if we are running on an OPA device which supports
* the extended OPA addressing.
*/
-static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
+static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num)
{
return (device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
@@ -3294,7 +3347,8 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
* Return the max MAD size required by the Port. Will return 0 if the port
* does not support MADs
*/
-static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+static inline size_t rdma_max_mad_size(const struct ib_device *device,
+ u32 port_num)
{
return device->port_data[port_num].immutable.max_mad_size;
}
@@ -3313,7 +3367,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n
* its GIDs.
*/
static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return rdma_protocol_roce(device, port_num) &&
device->ops.add_gid && device->ops.del_gid;
@@ -3332,57 +3386,82 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
}
/**
- * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
+ * @device: Device
+ * @port_num: 1 based Port number
*
- * @addr: address
- * @pgsz_bitmap: bitmap of HW supported page sizes
+ * Return true if port is an Intel OPA port , false if not
*/
-static inline unsigned int rdma_find_pg_bit(unsigned long addr,
- unsigned long pgsz_bitmap)
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
+ u32 port_num)
{
- unsigned long align;
- unsigned long pgsz;
-
- align = addr & -addr;
+ return (device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
+}
- /* Find page bit such that addr is aligned to the highest supported
- * HW page size
- */
- pgsz = pgsz_bitmap & ~(-align << 1);
- if (!pgsz)
- return __ffs(pgsz_bitmap);
+/**
+ * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
+ * @device: Device
+ * @port_num: Port number
+ * @mtu: enum value of MTU
+ *
+ * Return the MTU size supported by the port as an integer value. Will return
+ * -1 if enum value of mtu is not supported.
+ */
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port,
+ int mtu)
+{
+ if (rdma_core_cap_opa_port(device, port))
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
+ else
+ return ib_mtu_enum_to_int((enum ib_mtu)mtu);
+}
- return __fls(pgsz);
+/**
+ * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
+ * @device: Device
+ * @port_num: Port number
+ * @attr: port attribute
+ *
+ * Return the MTU size supported by the port as an integer value.
+ */
+static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port,
+ struct ib_port_attr *attr)
+{
+ if (rdma_core_cap_opa_port(device, port))
+ return attr->phys_mtu;
+ else
+ return ib_mtu_enum_to_int(attr->max_mtu);
}
-int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
int state);
-int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info);
-int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats);
-int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid);
-int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
int type);
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey);
+ u32 port_num, u16 index, u16 *pkey);
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify);
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index);
+ u32 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index);
+ u32 port_num, u16 pkey, u16 *index);
enum ib_pd_flags {
/*
@@ -3400,15 +3479,21 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ *
+ * Every PD has a local_dma_lkey which can be used as the lkey value for local
+ * memory operations.
+ */
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-/**
- * ib_dealloc_pd_user - Deallocate kernel/user PD
- * @pd: The protection domain
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
/**
* ib_dealloc_pd - Deallocate kernel PD
@@ -3418,7 +3503,9 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
*/
static inline void ib_dealloc_pd(struct ib_pd *pd)
{
- ib_dealloc_pd_user(pd, NULL);
+ int ret = ib_dealloc_pd_user(pd, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
}
enum rdma_create_ah_flags {
@@ -3489,7 +3576,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
* attributes which are initialized using ib_init_ah_attr_from_wc().
*
*/
-int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr);
@@ -3506,7 +3593,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
* in all UD QP post sends.
*/
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
- const struct ib_grh *grh, u8 port_num);
+ const struct ib_grh *grh, u32 port_num);
/**
* rdma_modify_ah - Modifies the address vector associated with an address
@@ -3546,26 +3633,25 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
*
* NOTE: for user ah use rdma_destroy_ah_user with valid udata!
*/
-static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
- return rdma_destroy_ah_user(ah, flags, NULL);
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
}
-/**
- * ib_create_srq - Creates a SRQ associated with the specified protection
- * domain.
- * @pd: The protection domain associated with the SRQ.
- * @srq_init_attr: A list of initial attributes required to create the
- * SRQ. If SRQ creation succeeds, then the attributes are updated to
- * the actual capabilities of the created SRQ.
- *
- * srq_attr->max_wr and srq_attr->max_sge are read the determine the
- * requested size of the SRQ, and set to the actual values allocated
- * on return. If ib_create_srq() succeeds, then max_wr and max_sge
- * will always be at least as large as the requested values.
- */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr);
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata);
+static inline struct ib_srq *
+ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr)
+{
+ if (!pd->device->ops.create_srq)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return ib_create_srq_user(pd, srq_init_attr, NULL, NULL);
+}
/**
* ib_modify_srq - Modifies the attributes for the specified SRQ.
@@ -3605,9 +3691,11 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
*
* NOTE: for user srq use ib_destroy_srq_user with valid udata!
*/
-static inline int ib_destroy_srq(struct ib_srq *srq)
+static inline void ib_destroy_srq(struct ib_srq *srq)
{
- return ib_destroy_srq_user(srq, NULL);
+ int ret = ib_destroy_srq_user(srq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
}
/**
@@ -3627,34 +3715,21 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
bad_recv_wr ? : &dummy);
}
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller);
/**
- * ib_create_qp_user - Creates a QP associated with the specified protection
- * domain.
+ * ib_create_qp - Creates a kernel QP associated with the specific protection
+ * domain.
* @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
+ * @init_attr: A list of initial attributes required to create the
* QP. If QP creation succeeds, then the attributes are updated to
* the actual capabilities of the created QP.
- * @udata: Valid user data or NULL for kernel objects
- */
-struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
-
-/**
- * ib_create_qp - Creates a kernel QP associated with the specified protection
- * domain.
- * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
- * QP. If QP creation succeeds, then the attributes are updated to
- * the actual capabilities of the created QP.
- * @udata: Valid user data or NULL for kernel objects
- *
- * NOTE: for user qp use ib_create_qp_user with valid udata!
*/
static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr)
+ struct ib_qp_init_attr *init_attr)
{
- return ib_create_qp_user(pd, qp_init_attr, NULL);
+ return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME);
}
/**
@@ -3778,46 +3853,15 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata);
-
-/**
- * ib_alloc_cq_user: Allocate kernel/user CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- * @udata: Valid user data or NULL for kernel objects
- */
-static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
- void *private, int nr_cqe,
- int comp_vector,
- enum ib_poll_context poll_ctx,
- struct ib_udata *udata)
-{
- return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- KBUILD_MODNAME, udata);
-}
-
-/**
- * ib_alloc_cq: Allocate kernel CQ
- * @dev: The IB device
- * @private: Private data attached to the CQE
- * @nr_cqe: Number of CQEs in the CQ
- * @comp_vector: Completion vector used for the IRQs
- * @poll_ctx: Context used for polling the CQ
- *
- * NOTE: for user cq use ib_alloc_cq_user with valid udata!
- */
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller);
static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx)
{
- return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- NULL);
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME);
}
struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
@@ -3839,24 +3883,7 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
KBUILD_MODNAME);
}
-/**
- * ib_free_cq_user - Free kernel/user CQ
- * @cq: The CQ to free
- * @udata: Valid user data or NULL for kernel objects
- */
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
-
-/**
- * ib_free_cq - Free kernel CQ
- * @cq: The CQ to free
- *
- * NOTE: for user cq use ib_free_cq_user with valid udata!
- */
-static inline void ib_free_cq(struct ib_cq *cq)
-{
- ib_free_cq_user(cq, NULL);
-}
-
+void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
@@ -3914,7 +3941,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
*/
static inline void ib_destroy_cq(struct ib_cq *cq)
{
- ib_destroy_cq_user(cq, NULL);
+ int ret = ib_destroy_cq_user(cq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
}
/**
@@ -3968,18 +3997,31 @@ static inline int ib_req_notify_cq(struct ib_cq *cq,
return cq->device->ops.req_notify_cq(cq, flags);
}
-/**
- * ib_req_ncomp_notif - Request completion notification when there are
- * at least the specified number of unreaped completions on the CQ.
- * @cq: The CQ to generate an event for.
- * @wc_cnt: The number of unreaped completions that should be on the
- * CQ before an event is generated.
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx);
+
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
+
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
*/
-static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
{
- return cq->device->ops.req_ncomp_notif ?
- cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
- -ENOSYS;
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
+/*
+ * Check if a IB device's underlying DMA mapping supports P2PDMA transfers.
+ */
+static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev)
+{
+ if (ib_uses_virt_dma(dev))
+ return false;
+
+ return dma_pci_p2pdma_supported(dev->dma_device);
}
/**
@@ -3989,6 +4031,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
+ if (ib_uses_virt_dma(dev))
+ return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}
@@ -4003,6 +4047,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
@@ -4017,7 +4063,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_single(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_single(dev->dma_device, addr, size, direction);
}
/**
@@ -4034,6 +4081,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
@@ -4048,7 +4097,63 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (ib_uses_virt_dma(dev))
+ return ib_dma_virt_map_sg(dev, sg, nents);
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+/**
+ * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The sg_table object describing the buffer
+ * @direction: The direction of the DMA
+ * @attrs: Optional DMA attributes for the map operation
+ */
+static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
+ struct sg_table *sgt,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ int nents;
+
+ if (ib_uses_virt_dma(dev)) {
+ nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
+ if (!nents)
+ return -EIO;
+ sgt->nents = nents;
+ return 0;
+ }
+ return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+}
+
+static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev,
+ struct sg_table *sgt,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs);
}
/**
@@ -4062,7 +4167,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4076,24 +4181,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4104,6 +4192,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
+ if (ib_uses_virt_dma(dev))
+ return UINT_MAX;
return dma_get_max_seg_size(dev->dma_device);
}
@@ -4119,7 +4209,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
@@ -4134,36 +4225,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-/**
- * ib_dma_alloc_coherent - Allocate memory and map it for DMA
- * @dev: The device for which the DMA address is requested
- * @size: The size of the region to allocate in bytes
- * @dma_handle: A pointer for returning the DMA address of the region
- * @flag: memory allocator flags
- */
-static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flag)
-{
- return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
-}
-
-/**
- * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
- * @dev: The device for which the DMA addresses were allocated
- * @size: The size of the region
- * @cpu_addr: the address returned by ib_dma_alloc_coherent()
- * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
- */
-static inline void ib_dma_free_coherent(struct ib_device *dev,
- size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-{
- dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
@@ -4199,14 +4262,8 @@ static inline int ib_dereg_mr(struct ib_mr *mr)
return ib_dereg_mr_user(mr, NULL);
}
-struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
-
-static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type, u32 max_num_sg)
-{
- return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
-}
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
u32 max_num_data_sg,
@@ -4236,45 +4293,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
}
/**
- * ib_alloc_fmr - Allocates a unmapped fast memory region.
- * @pd: The protection domain associated with the unmapped region.
- * @mr_access_flags: Specifies the memory access rights.
- * @fmr_attr: Attributes of the unmapped region.
- *
- * A fast memory region must be mapped before it can be used as part of
- * a work request.
- */
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-
-/**
- * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
- * @fmr: The fast memory region to associate with the pages.
- * @page_list: An array of physical pages to map to the fast memory region.
- * @list_len: The number of pages in page_list.
- * @iova: The I/O virtual address to use with the mapped region.
- */
-static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
- u64 *page_list, int list_len,
- u64 iova)
-{
- return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova);
-}
-
-/**
- * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
- * @fmr_list: A linked list of fast memory regions to unmap.
- */
-int ib_unmap_fmr(struct list_head *fmr_list);
-
-/**
- * ib_dealloc_fmr - Deallocates a fast memory region.
- * @fmr: The fast memory region to deallocate.
- */
-int ib_dealloc_fmr(struct ib_fmr *fmr);
-
-/**
* ib_attach_mcast - Attaches the specified QP to a multicast group.
* @qp: QP to attach to the multicast group. The QP must be type
* IB_QPT_UD.
@@ -4296,23 +4314,12 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
*/
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-/**
- * ib_alloc_xrcd - Allocates an XRC domain.
- * @device: The device on which to allocate the XRC domain.
- * @caller: Module name for kernel consumers
- */
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
-#define ib_alloc_xrcd(device) \
- __ib_alloc_xrcd((device), KBUILD_MODNAME)
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata);
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
-/**
- * ib_dealloc_xrcd - Deallocates an XRC domain.
- * @xrcd: The XRC domain to deallocate.
- * @udata: Valid user data or NULL for kernel object
- */
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-
-static inline int ib_check_mr_access(int flags)
+static inline int ib_check_mr_access(struct ib_device *ib_dev,
+ unsigned int flags)
{
/*
* Local write permission is required if remote write or
@@ -4325,6 +4332,9 @@ static inline int ib_check_mr_access(int flags)
if (flags & ~IB_ACCESS_SUPPORTED)
return -EINVAL;
+ if (flags & IB_ACCESS_ON_DEMAND &&
+ !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
+ return -EINVAL;
return 0;
}
@@ -4380,22 +4390,16 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
enum rdma_driver_id driver_id);
struct ib_device *ib_device_get_by_name(const char *name,
enum rdma_driver_id driver_id);
-struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
unsigned int port);
-struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u32 port);
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
-int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
- u32 wq_attr_mask);
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr*
- wq_ind_table_init_attr);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
@@ -4423,7 +4427,8 @@ void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed,
+ u8 *width);
static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
{
@@ -4491,12 +4496,12 @@ static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr)
return false;
}
-static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num)
+static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num)
{
attr->port_num = port_num;
}
-static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
+static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
{
return attr->port_num;
}
@@ -4594,7 +4599,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
* @port_num: Port number
*/
static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
- u8 port_num)
+ u32 port_num)
{
if (rdma_protocol_roce(dev, port_num))
return RDMA_AH_ATTR_TYPE_ROCE;
@@ -4609,7 +4614,7 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
/**
* ib_lid_cpu16 - Return lid in 16bit CPU encoding.
- * In the current implementation the only way to get
+ * In the current implementation the only way to
* get the 32bit lid is from other sources for OPA.
* For IB, lids will always be 16bits so cast the
* value accordingly.
@@ -4666,40 +4671,18 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
-struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *));
-int rdma_init_netdev(struct ib_device *device, u8 port_num,
+int rdma_init_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
struct net_device *netdev);
/**
- * rdma_set_device_sysfs_group - Set device attributes group to have
- * driver specific sysfs entries at
- * for infiniband class.
- *
- * @device: device pointer for which attributes to be created
- * @group: Pointer to group which should be added when device
- * is registered with sysfs.
- * rdma_set_device_sysfs_group() allows existing drivers to expose one
- * group per device to have sysfs attributes.
- *
- * NOTE: New drivers should not make use of this API; instead new device
- * parameter should be exposed via netlink command. This API and mechanism
- * exist only for existing drivers.
- */
-static inline void
-rdma_set_device_sysfs_group(struct ib_device *dev,
- const struct attribute_group *group)
-{
- dev->groups[1] = group;
-}
-
-/**
* rdma_device_to_ibdev - Get ib_device pointer from device pointer
*
* @device: device pointer for which ib_device pointer to retrieve
@@ -4716,16 +4699,94 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
}
/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev: device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent = ibdev->dev.parent;
+
+ if (!parent)
+ return NUMA_NO_NODE;
+ return dev_to_node(parent);
+}
+
+/**
* rdma_device_to_drv_device - Helper macro to reach back to driver's
* ib_device holder structure from device pointer.
*
* NOTE: New drivers should not make use of this API; This API is only for
* existing drivers who have exposed sysfs entries using
- * rdma_set_device_sysfs_group().
+ * ops->device_group.
*/
#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
bool rdma_dev_access_netns(const struct ib_device *device,
const struct net *net);
+
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
+
+/**
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
+ * on the flow_label
+ *
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+ * convention.
+ */
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
+{
+ u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
+
+ fl_low ^= fl_high >> 14;
+ return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
+}
+
+/**
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
+ * local and remote qpn values
+ *
+ * This function folded the multiplication results of two qpns, 24 bit each,
+ * fields, and converts it to a 20 bit results.
+ *
+ * This function will create symmetric flow_label value based on the local
+ * and remote qpn values. this will allow both the requester and responder
+ * to calculate the same flow_label for a given connection.
+ *
+ * This helper function should be used by driver in case the upper layer
+ * provide a zero flow_label value. This is to improve entropy of RDMA
+ * traffic in the network.
+ */
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
+{
+ u64 v = (u64)lqpn * rqpn;
+
+ v ^= v >> 20;
+ v ^= v >> 40;
+
+ return (u32)(v & IB_GRH_FLOWLABEL_MASK);
+}
+
+/**
+ * rdma_get_udp_sport - Calculate and set UDP source port based on the flow
+ * label. If flow label is not defined in GRH then
+ * calculate it based on lqpn/rqpn.
+ *
+ * @fl: flow label from GRH
+ * @lqpn: local qp number
+ * @rqpn: remote qp number
+ */
+static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
+{
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ return rdma_flow_label_to_udp_sport(fl);
+}
+
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port);
#endif /* IB_VERBS_H */