diff options
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r-- | include/rdma/ib_verbs.h | 1016 |
1 files changed, 645 insertions, 371 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670..af43a8d2a74a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -12,6 +12,7 @@ #ifndef IB_VERBS_H #define IB_VERBS_H +#include <linux/ethtool.h> #include <linux/types.h> #include <linux/device.h> #include <linux/dma-mapping.h> @@ -49,6 +50,8 @@ struct ib_uqp_object; struct ib_usrq_object; struct ib_uwq_object; struct rdma_cm_id; +struct ib_port; +struct hw_stats_device_data; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -56,9 +59,6 @@ extern struct workqueue_struct *ib_comp_unbound_wq; struct ib_ucq_object; -__printf(3, 4) __cold -void ibdev_printk(const char *level, const struct ib_device *ibdev, - const char *format, ...); __printf(2, 3) __cold void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold @@ -151,7 +151,7 @@ struct ib_gid_attr { union ib_gid gid; enum ib_gid_type gid_type; u16 index; - u8 port_num; + u32 port_num; }; enum { @@ -217,32 +217,24 @@ enum rdma_link_layer { }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = (1 << 0), - IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), - IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), - IB_DEVICE_RAW_MULTI = (1 << 3), - IB_DEVICE_AUTO_PATH_MIG = (1 << 4), - IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), - IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), - IB_DEVICE_SHUTDOWN_PORT = (1 << 8), - /* Not in use, former INIT_TYPE = (1 << 9),*/ - IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), - IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), - IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), - IB_DEVICE_SRQ_RESIZE = (1 << 13), - IB_DEVICE_N_NOTIFY_CQ = (1 << 14), - - /* - * This device supports a per-device lkey or stag that can be - * used without performing a memory registration for the local - * memory. Note that ULPs should never check this flag, but - * instead of use the local_dma_lkey flag in the ib_pd structure, - * which will always contain a usable lkey. - */ - IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), - /* Reserved, old SEND_W_INV = (1 << 16),*/ - IB_DEVICE_MEM_WINDOW = (1 << 17), + IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR, + IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR, + IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR, + IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI, + IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT, + IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE, + IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT, + /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */ + IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT, + IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID, + IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN, + IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE, + IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ, + + /* Reserved, old SEND_W_INV = 1 << 16,*/ + IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW, /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -250,9 +242,8 @@ enum ib_device_cap_flags { * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1 << 18), - IB_DEVICE_UD_TSO = (1 << 19), - IB_DEVICE_XRC = (1 << 20), + IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM, + IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC, /* * This device supports the IB "base memory management extension", @@ -263,31 +254,57 @@ enum ib_device_cap_flags { * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ - IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), - IB_DEVICE_RC_IP_CSUM = (1 << 25), + IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS, + IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A, + IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B, + IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM, /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */ - IB_DEVICE_RAW_IP_CSUM = (1 << 26), - /* - * Devices should set IB_DEVICE_CROSS_CHANNEL if they - * support execution of WQEs that involve synchronization - * of I/O operations with single completion queue managed - * by hardware. - */ - IB_DEVICE_CROSS_CHANNEL = (1 << 27), - IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), - IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), - IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), - IB_DEVICE_SG_GAPS_REG = (1ULL << 32), - IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), + IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM, + IB_DEVICE_MANAGED_FLOW_STEERING = + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING, /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ - IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), - IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), + IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS, /* The device supports padding incoming writes to cacheline. */ - IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), - IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), + IB_DEVICE_PCI_WRITE_END_PADDING = + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, + /* Placement type attributes */ + IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL, + IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT, + IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE, +}; + +enum ib_kernel_cap_flags { + /* + * This device supports a per-device lkey or stag that can be + * used without performing a memory registration for the local + * memory. Note that ULPs should never check this flag, but + * instead of use the local_dma_lkey flag in the ib_pd structure, + * which will always contain a usable lkey. + */ + IBK_LOCAL_DMA_LKEY = 1 << 0, + /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */ + IBK_INTEGRITY_HANDOVER = 1 << 1, + /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */ + IBK_ON_DEMAND_PAGING = 1 << 2, + /* IB_MR_TYPE_SG_GAPS is supported */ + IBK_SG_GAPS_REG = 1 << 3, + /* Driver supports RDMA_NLDEV_CMD_DELLINK */ + IBK_ALLOW_USER_UNREG = 1 << 4, + + /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */ + IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5, + /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */ + IBK_UD_TSO = 1 << 6, + /* iopib will use the device ops: + * get_vf_config + * get_vf_guid + * get_vf_stats + * set_vf_guid + * set_vf_link_state + */ + IBK_VIRTUAL_FUNCTION = 1 << 7, + /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */ + IBK_RDMA_NETDEV_OPA = 1 << 8, }; enum ib_atomic_cap { @@ -297,17 +314,19 @@ enum ib_atomic_cap { }; enum ib_odp_general_cap_bits { - IB_ODP_SUPPORT = 1 << 0, - IB_ODP_SUPPORT_IMPLICIT = 1 << 1, + IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT, + IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT, }; enum ib_odp_transport_cap_bits { - IB_ODP_SUPPORT_SEND = 1 << 0, - IB_ODP_SUPPORT_RECV = 1 << 1, - IB_ODP_SUPPORT_WRITE = 1 << 2, - IB_ODP_SUPPORT_READ = 1 << 3, - IB_ODP_SUPPORT_ATOMIC = 1 << 4, - IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND, + IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV, + IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE, + IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ, + IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC, + IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV, + IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH, + IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE, }; struct ib_odp_caps { @@ -386,6 +405,7 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; + u64 kernel_cap_flags; int max_send_sge; int max_recv_sge; int max_sge_rd; @@ -501,6 +521,23 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; +static inline const char *__attribute_const__ +ib_port_state_to_str(enum ib_port_state state) +{ + const char * const states[] = { + [IB_PORT_NOP] = "NOP", + [IB_PORT_DOWN] = "DOWN", + [IB_PORT_INIT] = "INIT", + [IB_PORT_ARMED] = "ARMED", + [IB_PORT_ACTIVE] = "ACTIVE", + [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER", + }; + + if (state < ARRAY_SIZE(states)) + return states[state]; + return "UNKNOWN"; +} + enum ib_port_phys_state { IB_PORT_PHYS_STATE_SLEEP = 1, IB_PORT_PHYS_STATE_POLLING = 2, @@ -540,20 +577,39 @@ enum ib_port_speed { IB_SPEED_EDR = 32, IB_SPEED_HDR = 64, IB_SPEED_NDR = 128, + IB_SPEED_XDR = 256, +}; + +enum ib_stat_flag { + IB_STAT_FLAG_OPTIONAL = 1 << 0, +}; + +/** + * struct rdma_stat_desc + * @name - The name of the counter + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL + * @priv - Driver private information; Core code should not use + */ +struct rdma_stat_desc { + const char *name; + unsigned int flags; + const void *priv; }; /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values - * of counters, which are 64bits and not guaranteeed to be written + * of counters, which are 64bits and not guaranteed to be written * atomicaly on 32bits systems. * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. - * @name - Array of pointers to static names used for the counters in - * directory. + * @descs - Array of pointers to static descriptors used for the counters + * in directory. + * @is_disabled - A bitmap to indicate each counter is currently disabled + * or not. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) @@ -565,36 +621,19 @@ struct rdma_hw_stats { struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; - const char * const *names; + const struct rdma_stat_desc *descs; + unsigned long *is_disabled; int num_counters; - u64 value[]; + u64 value[] __counted_by(num_counters); }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 -/** - * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct - * for drivers. - * @names - Array of static const char * - * @num_counters - How many elements in array - * @lifespan - How many milliseconds between updates - */ -static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( - const char * const *names, int num_counters, - unsigned long lifespan) -{ - struct rdma_hw_stats *stats; - stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), - GFP_KERNEL); - if (!stats) - return NULL; - stats->names = names; - stats->num_counters = num_counters; - stats->lifespan = msecs_to_jiffies(lifespan); - - return stats; -} +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan); +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats); /* Define bits for the various functionality this port needs to be supported by * the core. @@ -735,7 +774,7 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; struct ib_wq *wq; - u8 port_num; + u32 port_num; } element; enum ib_event_type event; }; @@ -818,6 +857,7 @@ enum ib_rate { IB_RATE_50_GBPS = 20, IB_RATE_400_GBPS = 21, IB_RATE_600_GBPS = 22, + IB_RATE_800_GBPS = 23, }; /** @@ -918,7 +958,7 @@ struct rdma_ah_attr { struct ib_global_route grh; u8 sl; u8 static_rate; - u8 port_num; + u32 port_num; u8 ah_flags; enum rdma_ah_attr_type type; union { @@ -964,9 +1004,11 @@ enum ib_wc_opcode { IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW, IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV, IB_WC_LSO = IB_UVERBS_WC_TSO, + IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, + IB_WC_FLUSH = IB_UVERBS_WC_FLUSH, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). @@ -1005,7 +1047,7 @@ struct ib_wc { u16 pkey_index; u8 sl; u8 dlid_path_bits; - u8 port_num; /* valid only for DR SMPs on switches */ + u32 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; u8 network_hdr_type; @@ -1070,7 +1112,7 @@ struct ib_qp_cap { /* * Maximum number of rdma_rw_ctx structures in flight at a time. - * ib_create_qp() will calculate the right amount of neededed WRs + * ib_create_qp() will calculate the right amount of needed WRs * and MRs based on this. */ u32 max_rdma_ctxs; @@ -1144,7 +1186,7 @@ enum ib_qp_create_flags { */ struct ib_qp_init_attr { - /* Consumer's event_handler callback must not block */ + /* This callback occurs in workqueue context */ void (*event_handler)(struct ib_event *, void *); void *qp_context; @@ -1160,7 +1202,7 @@ struct ib_qp_init_attr { /* * Only needed for special QP types, or when using the RW API. */ - u8 port_num; + u32 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; u32 source_qpn; }; @@ -1234,6 +1276,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { @@ -1277,11 +1321,11 @@ struct ib_qp_attr { u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; - u8 port_num; + u32 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; - u8 alt_port_num; + u32 alt_port_num; u8 alt_timeout; u32 rate_limit; struct net_device *xmit_slave; @@ -1305,6 +1349,8 @@ enum ib_wr_opcode { IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_FLUSH = IB_UVERBS_WR_FLUSH, + IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE, /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, @@ -1398,7 +1444,7 @@ struct ib_ud_wr { u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ - u8 port_num; /* valid for DR SMPs on switch only */ + u32 port_num; /* valid for DR SMPs on switch only */ }; static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) @@ -1438,10 +1484,12 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, + IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL, + IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_SUPPORTED = - ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, + ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -1469,6 +1517,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1481,9 +1531,8 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; + u64 enabled_caps; /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -1607,22 +1656,31 @@ struct ib_srq { } xrc; }; } ext; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; enum ib_raw_packet_caps { - /* Strip cvlan from incoming packet and report it in the matching work + /* + * Strip cvlan from incoming packet and report it in the matching work * completion is supported. */ - IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0), - /* Scatter FCS field of an incoming packet to host memory is supported. + IB_RAW_PACKET_CAP_CVLAN_STRIPPING = + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING, + /* + * Scatter FCS field of an incoming packet to host memory is supported. */ - IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), + IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS, /* Checksum offloads are supported (for both send and receive). */ - IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), - /* When a packet is received for an RQ with no receive WQEs, the + IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM, + /* + * When a packet is received for an RQ with no receive WQEs, the * packet processing is delayed. */ - IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), + IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP, }; enum ib_wq_type { @@ -1705,7 +1763,7 @@ struct ib_qp_security; struct ib_port_pkey { enum port_pkey_state state; u16 pkey_index; - u8 port_num; + u32 port_num; struct list_head qp_list; struct list_head to_error_list; struct ib_qp_security *sec; @@ -1747,6 +1805,7 @@ struct ib_qp { struct list_head rdma_mrs; struct list_head sig_mrs; struct ib_srq *srq; + struct completion srq_completion; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; @@ -1756,6 +1815,7 @@ struct ib_qp { struct ib_qp *real_qp; struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); + void (*registered_event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ const struct ib_gid_attr *av_sgid_attr; @@ -1766,7 +1826,7 @@ struct ib_qp { enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; - u8 port; + u32 port; bool integrity_en; /* @@ -1869,8 +1929,6 @@ struct ib_flow_eth_filter { u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_eth { @@ -1883,8 +1941,6 @@ struct ib_flow_spec_eth { struct ib_flow_ib_filter { __be16 dlid; __u8 sl; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_ib { @@ -1908,8 +1964,6 @@ struct ib_flow_ipv4_filter { u8 tos; u8 ttl; u8 flags; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_ipv4 { @@ -1926,9 +1980,7 @@ struct ib_flow_ipv6_filter { u8 next_hdr; u8 traffic_class; u8 hop_limit; - /* Must be last */ - u8 real_sz[]; -}; +} __packed; struct ib_flow_spec_ipv6 { u32 type; @@ -1940,8 +1992,6 @@ struct ib_flow_spec_ipv6 { struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_tcp_udp { @@ -1953,7 +2003,6 @@ struct ib_flow_spec_tcp_udp { struct ib_flow_tunnel_filter { __be32 tunnel_id; - u8 real_sz[]; }; /* ib_flow_spec_tunnel describes the Vxlan tunnel @@ -1969,8 +2018,6 @@ struct ib_flow_spec_tunnel { struct ib_flow_esp_filter { __be32 spi; __be32 seq; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_esp { @@ -1984,8 +2031,6 @@ struct ib_flow_gre_filter { __be16 c_ks_res0_ver; __be16 protocol; __be32 key; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_gre { @@ -1997,8 +2042,6 @@ struct ib_flow_spec_gre { struct ib_flow_mpls_filter { __be32 tag; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_mpls { @@ -2062,7 +2105,7 @@ struct ib_flow_attr { u16 priority; u32 flags; u8 num_of_specs; - u8 port; + u32 port; union ib_flow_spec flows[]; }; @@ -2131,7 +2174,6 @@ struct ib_flow_action { }; struct ib_mad; -struct ib_grh; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, @@ -2152,6 +2194,7 @@ struct ib_port_cache { struct ib_gid_table *gid; u8 lmc; enum ib_port_state port_state; + enum ib_port_state last_port_state; }; struct ib_port_immutable { @@ -2167,15 +2210,18 @@ struct ib_port_data { struct ib_port_immutable immutable; spinlock_t pkey_list_lock; + + spinlock_t netdev_lock; + struct list_head pkey_list; struct ib_port_cache cache; - spinlock_t netdev_lock; struct net_device __rcu *netdev; + netdevice_tracker netdev_tracker; struct hlist_node ndev_hash_link; struct rdma_port_counter port_counter; - struct rdma_hw_stats *hw_stats; + struct ib_port *sysfs; }; /* rdma netdev type - specifies protocol type */ @@ -2191,7 +2237,7 @@ enum rdma_netdev_t { struct rdma_netdev { void *clnt_priv; struct ib_device *hca; - u8 port_num; + u32 port_num; int mtu; /* @@ -2212,6 +2258,8 @@ struct rdma_netdev { int set_qkey, u32 qkey); int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid); + /* timeout */ + void (*tx_timeout)(struct net_device *dev, unsigned int txqueue); }; struct rdma_netdev_alloc_params { @@ -2220,13 +2268,15 @@ struct rdma_netdev_alloc_params { unsigned int rxqs; void *param; - int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num, + int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num, struct net_device *netdev, void *param); }; struct ib_odp_counters { atomic64_t faults; + atomic64_t faults_handled; atomic64_t invalidations; + atomic64_t invalidations_handled; atomic64_t prefetch; }; @@ -2255,8 +2305,13 @@ struct iw_cm_conn_param; !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) -#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ - ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ + gfp, false)) + +#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type) \ + ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ + GFP_KERNEL, true)) #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) @@ -2289,6 +2344,14 @@ struct ib_device_ops { u32 uverbs_abi_ver; unsigned int uverbs_no_driver_id_binding:1; + /* + * NOTE: New drivers should not make use of device_group; instead new + * device parameter should be exposed via netlink command. This + * mechanism exists only for existing drivers. + */ + const struct attribute_group *device_group; + const struct attribute_group **port_groups; + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, @@ -2298,12 +2361,11 @@ struct ib_device_ops { int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); - int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); int (*post_srq_recv)(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int (*process_mad)(struct ib_device *device, int process_mad_flags, - u8 port_num, const struct ib_wc *in_wc, + u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in_mad, struct ib_mad *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); @@ -2315,9 +2377,9 @@ struct ib_device_ops { void (*get_dev_fw_str)(struct ib_device *device, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - int (*query_port)(struct ib_device *device, u8 port_num, + int (*query_port)(struct ib_device *device, u32 port_num, struct ib_port_attr *port_attr); - int (*modify_port)(struct ib_device *device, u8 port_num, + int (*modify_port)(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); /** @@ -2326,10 +2388,10 @@ struct ib_device_ops { * structure to avoid cache line misses when accessing struct ib_device * in fast paths. */ - int (*get_port_immutable)(struct ib_device *device, u8 port_num, + int (*get_port_immutable)(struct ib_device *device, u32 port_num, struct ib_port_immutable *immutable); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, - u8 port_num); + u32 port_num); /** * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such @@ -2338,7 +2400,8 @@ struct ib_device_ops { * that this function returns NULL before the net device has finished * NETDEV_UNREGISTER state. */ - struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + struct net_device *(*get_netdev)(struct ib_device *device, + u32 port_num); /** * rdma netdev operation * @@ -2346,11 +2409,11 @@ struct ib_device_ops { * must return -EOPNOTSUPP if it doesn't support the specified type. */ struct net_device *(*alloc_rdma_netdev)( - struct ib_device *device, u8 port_num, enum rdma_netdev_t type, + struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); - int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); /** @@ -2358,7 +2421,7 @@ struct ib_device_ops { * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ - int (*query_gid)(struct ib_device *device, u8 port_num, int index, + int (*query_gid)(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); /** * When calling add_gid, the HW vendor's driver should add the gid @@ -2383,7 +2446,7 @@ struct ib_device_ops { * This function is only called when roce_gid_table is used. */ int (*del_gid)(const struct ib_gid_attr *attr, void **context); - int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, + int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index, u16 *pkey); int (*alloc_ucontext)(struct ib_ucontext *context, struct ib_udata *udata); @@ -2401,6 +2464,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); @@ -2412,16 +2477,15 @@ struct ib_device_ops { struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); - struct ib_qp *(*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata); + int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); @@ -2429,9 +2493,14 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, + struct uverbs_attr_bundle *attrs); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -2442,6 +2511,14 @@ struct ib_device_ops { enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); + + /* + * Kernel users should universally support relaxed ordering (RO), as + * they are designed to read data only after observing the CQE and use + * the DMA API correctly. + * + * Some drivers implicitly enable RO if platform supports it. + */ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, @@ -2456,25 +2533,17 @@ struct ib_device_ops { struct ib_flow_attr *flow_attr, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); - struct ib_flow_action *(*create_flow_action_esp)( - struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*destroy_flow_action)(struct ib_flow_action *action); - int (*modify_flow_action_esp)( - struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); - int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); - int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + int (*get_vf_config)(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *ivf); - int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + int (*get_vf_stats)(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); - int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + int (*get_vf_guid)(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); - int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, @@ -2506,13 +2575,14 @@ struct ib_device_ops { unsigned int *meta_sg_offset); /** - * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the - * driver initialized data. The struct is kfree()'ed by the sysfs - * core when the device is removed. A lifespan of -1 in the return - * struct tells the core to set a default lifespan. + * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and + * fill in the driver initialized data. The struct is kfree()'ed by + * the sysfs core when the device is removed. A lifespan of -1 in the + * return struct tells the core to set a default lifespan. */ - struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, - u8 port_num); + struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device); + struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device, + u32 port_num); /** * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or @@ -2526,13 +2596,15 @@ struct ib_device_ops { * one given in index at their option */ int (*get_hw_stats)(struct ib_device *device, - struct rdma_hw_stats *stats, u8 port, int index); - /* - * This function is called once for each port when a ib device is - * registered. + struct rdma_hw_stats *stats, u32 port, int index); + + /** + * modify_hw_stat - Modify the counter configuration + * @enable: true/false when enable/disable a counter + * Return codes - 0 on success or error code otherwise. */ - int (*init_port)(struct ib_device *device, u8 port_num, - struct kobject *port_sysfs); + int (*modify_hw_stat)(struct ib_device *device, u32 port, + unsigned int counter_index, bool enable); /** * Allows rdma drivers to add their own restrack attributes. */ @@ -2543,6 +2615,8 @@ struct ib_device_ops { int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); + int (*fill_res_srq_entry)(struct sk_buff *msg, struct ib_srq *ib_srq); + int (*fill_res_srq_entry_raw)(struct sk_buff *msg, struct ib_srq *ib_srq); /* Device lifecycle callbacks */ /* @@ -2572,12 +2646,13 @@ struct ib_device_ops { * @counter - The counter to be bound. If counter->id is zero then * the driver needs to allocate a new counter and set counter->id */ - int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp); + int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp, + u32 port); /** * counter_unbind_qp - Unbind the qp from the dynamically-allocated * counter and bind it onto the default one */ - int (*counter_unbind_qp)(struct ib_qp *qp); + int (*counter_unbind_qp)(struct ib_qp *qp, u32 port); /** * counter_dealloc -De-allocate the hw counter */ @@ -2594,6 +2669,11 @@ struct ib_device_ops { int (*counter_update_stats)(struct rdma_counter *counter); /** + * counter_init - Initialize the driver specific rdma counter struct. + */ + void (*counter_init)(struct rdma_counter *counter); + + /** * Allows rdma drivers to add their own restrack attributes * dumped via 'rdma stat' iproute2 command. */ @@ -2603,15 +2683,48 @@ struct ib_device_ops { int (*query_ucontext)(struct ib_ucontext *context, struct uverbs_attr_bundle *attrs); + /* + * Provide NUMA node. This API exists for rdmavt/hfi1 only. + * Everyone else relies on Linux memory management model. + */ + int (*get_numa_node)(struct ib_device *dev); + + /** + * add_sub_dev - Add a sub IB device + */ + struct ib_device *(*add_sub_dev)(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + /** + * del_sub_dev - Delete a sub IB device + */ + void (*del_sub_dev)(struct ib_device *sub_dev); + + /** + * ufile_cleanup - Attempt to cleanup ubojects HW resources inside + * the ufile. + */ + void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); + + /** + * report_port_event - Drivers need to implement this if they have + * some private stuff to handle when link status changes. + */ + void (*report_port_event)(struct ib_device *ibdev, + struct net_device *ndev, unsigned long event); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_mw); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_qp); DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); DECLARE_RDMA_OBJ_SIZE(ib_xrcd); + DECLARE_RDMA_OBJ_SIZE(rdma_counter); }; struct ib_core_device { @@ -2658,14 +2771,15 @@ struct ib_device { struct ib_core_device coredev; }; - /* First group for device attributes, - * Second group for driver provided attributes (optional). - * It is NULL terminated array. + /* First group is for device attributes, + * Second group is for driver provided attributes (optional). + * Third group is for the hw_stats + * It is a NULL terminated array. */ - const struct attribute_group *groups[3]; + const struct attribute_group *groups[4]; + u8 hw_stats_attr_index; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; @@ -2676,10 +2790,9 @@ struct ib_device { /* CQ adaptive moderation (RDMA DIM) */ u16 use_cq_dim:1; u8 node_type; - u8 phys_port_cnt; + u32 phys_port_cnt; struct ib_device_attr attrs; - struct attribute_group *hw_stats_ag; - struct rdma_hw_stats *hw_stats; + struct hw_stats_device_data *hw_stats_data; #ifdef CONFIG_CGROUP_RDMA struct rdmacg_device cg_device; @@ -2713,8 +2826,28 @@ struct ib_device { char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; u32 lag_flags; + + /* A parent device has a list of sub-devices */ + struct mutex subdev_lock; + struct list_head subdev_list_head; + + /* A sub device has a type and a parent */ + enum rdma_nl_dev_type type; + struct ib_device *parent; + struct list_head subdev_list; + + enum rdma_nl_name_assign_type name_assign_type; }; +static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, + gfp_t gfp, bool is_numa_aware) +{ + if (is_numa_aware && dev->ops.get_numa_node) + return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev)); + + return kzalloc(size, gfp); +} + struct ib_client_nl_info; struct ib_client { const char *name; @@ -2742,7 +2875,7 @@ struct ib_client { * netdev. */ struct net_device *(*get_net_dev_by_params)( struct ib_device *dev, - u8 port, + u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, @@ -2766,6 +2899,7 @@ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ @@ -2855,6 +2989,23 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +void rdma_user_mmap_disassociate(struct ib_device *device); +#else +static inline void rdma_user_mmap_disassociate(struct ib_device *device) +{ +} +#endif + +static inline int +rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 pgoff) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff, + pgoff); +} + struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); @@ -2901,46 +3052,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - -/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -2963,10 +3074,10 @@ void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, - u8 port_num, struct ib_port_attr *port_attr); + u32 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, - u8 port_num); + u32 port_num); /** * rdma_cap_ib_switch - Check if the device is IB switch @@ -2990,7 +3101,7 @@ static inline bool rdma_cap_ib_switch(const struct ib_device *device) * * Return start port number */ -static inline u8 rdma_start_port(const struct ib_device *device) +static inline u32 rdma_start_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : 1; } @@ -3001,9 +3112,10 @@ static inline u8 rdma_start_port(const struct ib_device *device) * @iter - The unsigned int to store the port number */ #define rdma_for_each_port(device, iter) \ - for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \ - unsigned int, iter))); \ - iter <= rdma_end_port(device); (iter)++) + for (iter = rdma_start_port(device + \ + BUILD_BUG_ON_ZERO(!__same_type(u32, \ + iter))); \ + iter <= rdma_end_port(device); iter++) /** * rdma_end_port - Return the last valid port number for the device @@ -3013,7 +3125,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) * * Return last port number */ -static inline u8 rdma_end_port(const struct ib_device *device) +static inline u32 rdma_end_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } @@ -3026,55 +3138,63 @@ static inline int rdma_is_port_valid(const struct ib_device *device, } static inline bool rdma_is_grh_required(const struct ib_device *device, - u8 port_num) + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_PORT_IB_GRH_REQUIRED; } -static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_ib(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IB; } -static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } -static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } -static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } -static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_iwarp(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; } -static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_ib_or_roce(const struct ib_device *device, + u32 port_num) { return rdma_protocol_ib(device, port_num) || rdma_protocol_roce(device, port_num); } -static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_raw_packet(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; } -static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_usnic(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; @@ -3092,7 +3212,7 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n * * Return: true if the port supports sending/receiving of MAD packets. */ -static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_MAD; @@ -3117,7 +3237,7 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) * * Return: true if the port supports OPA MAD packet formats. */ -static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_MAD; @@ -3143,7 +3263,7 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) * * Return: true if the port provides an SMI. */ -static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SMI; @@ -3164,7 +3284,7 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) * Return: true if the port supports an IB CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_CM; @@ -3182,7 +3302,7 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) * Return: true if the port supports an iWARP CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IW_CM; @@ -3203,7 +3323,7 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) * Administration interface. This does not imply that the SA service is * running locally. */ -static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SA; @@ -3226,7 +3346,8 @@ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) * overhead of registering/unregistering with the SM and tracking of the * total number of queue pairs attached to the multicast group. */ -static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mcast(const struct ib_device *device, + u32 port_num) { return rdma_cap_ib_sa(device, port_num); } @@ -3244,7 +3365,7 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num * Return: true if the port uses a GID address to identify devices on the * network. */ -static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_AF_IB; @@ -3266,7 +3387,7 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) * addition of a Global Route Header built from our Ethernet Address * Handle into our header list for connectionless packets. */ -static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_ETH_AH; @@ -3281,7 +3402,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) * Return: true if we are running on an OPA device which supports * the extended OPA addressing. */ -static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num) { return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; @@ -3299,7 +3420,8 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) * Return the max MAD size required by the Port. Will return 0 if the port * does not support MADs */ -static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) +static inline size_t rdma_max_mad_size(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.max_mad_size; } @@ -3318,7 +3440,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n * its GIDs. */ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, - u8 port_num) + u32 port_num) { return rdma_protocol_roce(device, port_num) && device->ops.add_gid && device->ops.del_gid; @@ -3359,7 +3481,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device, * Return the MTU size supported by the port as an integer value. Will return * -1 if enum value of mtu is not supported. */ -static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port, int mtu) { if (rdma_core_cap_opa_port(device, port)) @@ -3376,7 +3498,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, * * Return the MTU size supported by the port as an integer value. */ -static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, +static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port, struct ib_port_attr *attr) { if (rdma_core_cap_opa_port(device, port)) @@ -3385,34 +3507,34 @@ static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, return ib_mtu_enum_to_int(attr->max_mtu); } -int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, +int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state); -int ib_get_vf_config(struct ib_device *device, int vf, u8 port, +int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info); -int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, +int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); -int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, +int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); -int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, +int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type); int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); + u32 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, + u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index); + u32 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, - u8 port_num, u16 pkey, u16 *index); + u32 port_num, u16 pkey, u16 *index); enum ib_pd_flags { /* @@ -3430,6 +3552,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) @@ -3516,7 +3649,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * attributes which are initialized using ib_init_ah_attr_from_wc(). * */ -int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr); @@ -3533,7 +3666,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, - const struct ib_grh *grh, u8 port_num); + const struct ib_grh *grh, u32 port_num); /** * rdma_modify_ah - Modifies the address vector associated with an address @@ -3655,8 +3788,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +/** + * ib_create_qp - Creates a kernel QP associated with the specific protection + * domain. + * @pd: The protection domain associated with the QP. + * @init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + */ +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3929,18 +4076,50 @@ struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + +/* + * Check if a IB device's underlying DMA mapping supports P2PDMA transfers. + */ +static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev) +{ + if (ib_uses_virt_dma(dev)) + return false; + + return dma_pci_p2pdma_supported(dev->dma_device); +} + /** - * ib_req_ncomp_notif - Request completion notification when there are - * at least the specified number of unreaped completions on the CQ. - * @cq: The CQ to generate an event for. - * @wc_cnt: The number of unreaped completions that should be on the - * CQ before an event is generated. + * ib_virt_dma_to_ptr - Convert a dma_addr to a kernel pointer + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() devices to get back to the kernel pointer after + * going through the dma_addr marshalling. */ -static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +static inline void *ib_virt_dma_to_ptr(u64 dma_addr) { - return cq->device->ops.req_ncomp_notif ? - cq->device->ops.req_ncomp_notif(cq, wc_cnt) : - -ENOSYS; + /* virt_dma mode maps the kvs's directly into the dma addr */ + return (void *)(uintptr_t)dma_addr; +} + +/** + * ib_virt_dma_to_page - Convert a dma_addr to a struct page + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() device to get back to the struct page after going + * through the dma_addr marshalling. + */ +static inline struct page *ib_virt_dma_to_page(u64 dma_addr) +{ + return virt_to_page(ib_virt_dma_to_ptr(dma_addr)); } /** @@ -3950,6 +4129,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3964,6 +4145,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3978,7 +4161,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3995,6 +4179,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -4009,7 +4195,63 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +/** + * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses + * @dev: The device for which the DMA addresses are to be created + * @sg: The sg_table object describing the buffer + * @direction: The direction of the DMA + * @attrs: Optional DMA attributes for the map operation + */ +static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + int nents; + + if (ib_uses_virt_dma(dev)) { + nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); + if (!nents) + return -EIO; + sgt->nents = nents; + return 0; + } + return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); +} + +static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs); } /** @@ -4023,7 +4265,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4037,24 +4279,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4065,6 +4290,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4080,7 +4307,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4095,36 +4323,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel @@ -4216,8 +4416,11 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { + u64 device_cap = ib_dev->attrs.device_cap_flags; + /* * Local write permission is required if remote write or * remote atomic permission is also requested. @@ -4229,6 +4432,16 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) + return -EOPNOTSUPP; + + if ((flags & IB_ACCESS_FLUSH_GLOBAL && + !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) || + (flags & IB_ACCESS_FLUSH_PERSISTENT && + !(device_cap & IB_DEVICE_FLUSH_PERSISTENT))) + return -EOPNOTSUPP; + return 0; } @@ -4284,18 +4497,27 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, enum rdma_driver_id driver_id); struct ib_device *ib_device_get_by_name(const char *name, enum rdma_driver_id driver_id); -struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); -struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + u32 port); +int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev, + u32 *port); + +static inline enum ib_port_state ib_get_curr_port_state(struct net_device *net_dev) +{ + return (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; +} +void ib_dispatch_port_state_event(struct ib_device *ibdev, + struct net_device *ndev); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); -int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, - u32 wq_attr_mask); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); @@ -4323,7 +4545,8 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width); +int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, + u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { @@ -4391,12 +4614,12 @@ static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) return false; } -static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) +static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num) { attr->port_num = port_num; } -static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) +static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) { return attr->port_num; } @@ -4494,7 +4717,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); * @port_num: Port number */ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, - u8 port_num) + u32 port_num) { if (rdma_protocol_roce(dev, port_num)) return RDMA_AH_ATTR_TYPE_ROCE; @@ -4503,13 +4726,15 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_OPA; return RDMA_AH_ATTR_TYPE_IB; } + if (dev->type == RDMA_DEVICE_TYPE_SMI) + return RDMA_AH_ATTR_TYPE_IB; return RDMA_AH_ATTR_TYPE_UNDEFINED; } /** * ib_lid_cpu16 - Return lid in 16bit CPU encoding. - * In the current implementation the only way to get + * In the current implementation the only way to * get the 32bit lid is from other sources for OPA. * For IB, lids will always be 16bits so cast the * value accordingly. @@ -4561,45 +4786,33 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) * @device: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif -struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, +struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); -int rdma_init_netdev(struct ib_device *device, u8 port_num, +int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), struct net_device *netdev); /** - * rdma_set_device_sysfs_group - Set device attributes group to have - * driver specific sysfs entries at - * for infiniband class. - * - * @device: device pointer for which attributes to be created - * @group: Pointer to group which should be added when device - * is registered with sysfs. - * rdma_set_device_sysfs_group() allows existing drivers to expose one - * group per device to have sysfs attributes. - * - * NOTE: New drivers should not make use of this API; instead new device - * parameter should be exposed via netlink command. This API and mechanism - * exist only for existing drivers. - */ -static inline void -rdma_set_device_sysfs_group(struct ib_device *dev, - const struct attribute_group *group) -{ - dev->groups[1] = group; -} - -/** * rdma_device_to_ibdev - Get ib_device pointer from device pointer * * @device: device pointer for which ib_device pointer to retrieve @@ -4616,12 +4829,25 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) } /** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + +/** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. * * NOTE: New drivers should not make use of this API; This API is only for * existing drivers who have exposed sysfs entries using - * rdma_set_device_sysfs_group(). + * ops->device_group. */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) @@ -4673,4 +4899,52 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } + +/** + * rdma_get_udp_sport - Calculate and set UDP source port based on the flow + * label. If flow label is not defined in GRH then + * calculate it based on lqpn/rqpn. + * + * @fl: flow label from GRH + * @lqpn: local qp number + * @rqpn: remote qp number + */ +static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port); + +/** ib_add_sub_device - Add a sub IB device on an existing one + * + * @parent: The IB device that needs to add a sub device + * @type: The type of the new sub device + * @name: The name of the new sub device + * + * + * Return 0 on success, an error code otherwise + */ +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' + * + * @sub: The sub device that is going to be deleted + * + * Return 0 on success, an error code otherwise + */ +int ib_del_sub_device_and_put(struct ib_device *sub); + +static inline void ib_mark_name_assigned_by_user(struct ib_device *ibdev) +{ + ibdev->name_assign_type = RDMA_NAME_ASSIGN_TYPE_USER; +} + #endif /* IB_VERBS_H */ |