diff options
Diffstat (limited to 'include/rdma')
29 files changed, 1161 insertions, 639 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 83139b9ce409..f7c185ff7a11 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -75,7 +75,7 @@ struct sockaddr_ib { */ static inline bool ib_safe_file_access(struct file *filp) { - return filp->f_cred == current_cred() && !uaccess_kernel(); + return filp->f_cred == current_cred(); } #endif /* _RDMA_IB_H */ diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b0e636ac6690..811a0f11d0db 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -7,6 +7,7 @@ #ifndef IB_ADDR_H #define IB_ADDR_H +#include <linux/ethtool.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/if_arp.h> @@ -193,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) return 0; } -static inline int iboe_get_rate(struct net_device *dev) -{ - struct ethtool_link_ksettings cmd; - int err; - - rtnl_lock(); - err = __ethtool_get_link_ksettings(dev, &cmd); - rtnl_unlock(); - if (err) - return IB_RATE_PORT_CURRENT; - - if (cmd.base.speed >= 40000) - return IB_RATE_40_GBPS; - else if (cmd.base.speed >= 30000) - return IB_RATE_30_GBPS; - else if (cmd.base.speed >= 20000) - return IB_RATE_20_GBPS; - else if (cmd.base.speed >= 10000) - return IB_RATE_10_GBPS; - else - return IB_RATE_PORT_CURRENT; -} - static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index bae29f50adff..2bf09b594d10 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -10,7 +10,7 @@ #include <rdma/ib_verbs.h> -int rdma_query_gid(struct ib_device *device, u8 port_num, int index, +int rdma_query_gid(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr); const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, @@ -20,10 +20,10 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device, const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, - u8 port, + u32 port, struct net_device *ndev); const struct ib_gid_attr *rdma_find_gid_by_filter( - struct ib_device *device, const union ib_gid *gid, u8 port_num, + struct ib_device *device, const union ib_gid *gid, u32 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context); @@ -43,7 +43,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr); * the local software cache. */ int ib_get_cached_pkey(struct ib_device *device_handle, - u8 port_num, + u32 port_num, int index, u16 *pkey); @@ -59,27 +59,11 @@ int ib_get_cached_pkey(struct ib_device *device_handle, * the local software cache. */ int ib_find_cached_pkey(struct ib_device *device, - u8 port_num, + u32 port_num, u16 pkey, u16 *index); /** - * ib_find_exact_cached_pkey - Returns the PKey table index where a specified - * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) - * @device: The device to query. - * @port_num: The port number of the device to search for the PKey. - * @pkey: The PKey value to search for. - * @index: The index into the cached PKey table where the PKey was found. - * - * ib_find_exact_cached_pkey() searches the specified PKey table in - * the local software cache. - */ -int ib_find_exact_cached_pkey(struct ib_device *device, - u8 port_num, - u16 pkey, - u16 *index); - -/** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. * @port_num: The port number of the device to query. @@ -89,7 +73,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, * the local software cache. */ int ib_get_cached_lmc(struct ib_device *device, - u8 port_num, + u32 port_num, u8 *lmc); /** @@ -102,12 +86,12 @@ int ib_get_cached_lmc(struct ib_device *device, * the local software cache. */ int ib_get_cached_port_state(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_port_state *port_active); bool rdma_is_zero_gid(const union ib_gid *gid); const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, - u8 port_num, int index); + u32 port_num, int index); void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); ssize_t rdma_query_gid_table(struct ib_device *device, diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index e23eb357b761..1fa3786f82f4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -294,7 +294,6 @@ struct ib_cm_id { void *context; struct ib_device *device; __be64 service_id; - __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; @@ -340,13 +339,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, - __be64 service_mask); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, @@ -354,6 +348,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, struct ib_cm_req_param { struct sa_path_rec *primary_path; + struct sa_path_rec *primary_path_inbound; + struct sa_path_rec *primary_path_outbound; struct sa_path_rec *alternate_path; const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; @@ -484,23 +480,12 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); -#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ - /** - * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection - * message. + * ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a + connection message in case duplicates are received. * @cm_id: Connection identifier associated with the connection message. - * @service_timeout: The lower 5-bits specify the maximum time required for - * the sender to reply to the connection message. The upper 3-bits - * specify additional control flags. - * @private_data: Optional user-defined private data sent with the - * message receipt acknowledgement. - * @private_data_len: Size of the private data buffer, in bytes. */ -int ib_send_cm_mra(struct ib_cm_id *cm_id, - u8 service_timeout, - const void *private_data, - u8 private_data_len); +int ib_prepare_cm_mra(struct ib_cm_id *cm_id); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 57c1ac881d08..1c4c1a69937a 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -7,7 +7,7 @@ #define IB_HDRS_H #include <linux/types.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <rdma/ib_verbs.h> #define IB_SEQ_NAK (3 << 29) @@ -206,11 +206,6 @@ static inline u8 ib_get_lver(struct ib_header *hdr) IB_LVER_MASK); } -static inline u16 ib_get_len(struct ib_header *hdr) -{ - return (u16)(be16_to_cpu(hdr->lrh[2])); -} - static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) { return be32_to_cpu(ohdr->u.ud.deth[0]); @@ -237,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) #define IB_BTH_SE_SHIFT 23 #define IB_BTH_TVER_MASK 0xf #define IB_BTH_TVER_SHIFT 16 +#define IB_BTH_OPCODE_CNP 0x81 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 8dfb1ddf345a..3f1b58d8b4bf 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -276,6 +276,9 @@ enum ib_port_capability_mask2_bits { IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3, IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4, IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5, + IB_PORT_LINK_SPEED_NDR_SUP = 1 << 10, + IB_PORT_EXTENDED_SPEEDS2_SUP = 1 << 11, + IB_PORT_LINK_SPEED_XDR_SUP = 1 << 12, }; #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) @@ -668,7 +671,7 @@ struct ib_mad_reg_req { * @registration_flags: Registration flags to set for this agent */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, - u8 port_num, + u32 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, @@ -718,27 +721,26 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** - * ib_cancel_mad - Cancels an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. - * @send_buf: Indicates the MAD to cancel. - * - * MADs will be returned to the user through the corresponding - * ib_mad_send_handler. - */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf); - -/** * ib_modify_mad - Modifies an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @send_buf: Indicates the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf) +{ + ib_modify_mad(send_buf, 0); +} /** * ib_create_send_mad - Allocate and initialize a data buffer and work request diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index 1838869aad28..b179e464e3d1 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -22,7 +22,4 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, struct sa_path_rec *src); -void ib_copy_path_rec_from_user(struct sa_path_rec *dst, - struct ib_user_path_rec *src); - #endif /* IB_USER_MARSHALL_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index a9162f25beaf..8266fab826a7 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -84,6 +84,8 @@ enum { /* opcode 0x15 is reserved */ IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, + IB_OPCODE_FLUSH = 0x1C, + IB_OPCODE_ATOMIC_WRITE = 0x1D, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ @@ -112,6 +114,8 @@ enum { IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), + IB_OPCODE(RC, FLUSH), + IB_OPCODE(RC, ATOMIC_WRITE), /* UC */ IB_OPCODE(UC, SEND_FIRST), @@ -149,6 +153,7 @@ enum { IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RD, COMPARE_SWAP), IB_OPCODE(RD, FETCH_ADD), + IB_OPCODE(RD, FLUSH), /* UD */ IB_OPCODE(UD, SEND_ONLY), @@ -278,7 +283,4 @@ int ib_ud_header_init(int payload_bytes, int ib_ud_header_pack(struct ib_ud_header *header, void *buf); -int ib_ud_header_unpack(void *buf, - struct ib_ud_header *header); - #endif /* IB_PACK_H */ diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 693285e76f13..b46353fc53bf 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -186,6 +186,7 @@ struct sa_path_rec { struct sa_path_rec_opa opa; }; enum sa_path_rec_type rec_type; + u32 flags; }; static inline enum ib_gid_type @@ -366,20 +367,6 @@ struct ib_sa_mcmember_rec { #define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF -struct ib_sa_service_rec { - u64 id; - union ib_gid gid; - __be16 pkey; - /* reserved */ - u32 lease; - u8 key[16]; - u8 name[64]; - u8 data8[16]; - u16 data16[8]; - u32 data32[4]; - u64 data64[2]; -}; - #define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0) #define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1) #define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2) @@ -423,23 +410,13 @@ struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, - u8 port_num, struct sa_path_rec *rec, + u32 port_num, struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + unsigned int num_prs, void *context), void *context, struct ib_sa_query **query); -int ib_sa_service_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, u8 method, - struct ib_sa_service_rec *rec, - ib_sa_comp_mask comp_mask, unsigned long timeout_ms, - gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_service_rec *resp, - void *context), - void *context, struct ib_sa_query **sa_query); - struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; ib_sa_comp_mask comp_mask; @@ -477,7 +454,8 @@ struct ib_sa_multicast { * group, and the user must rejoin the group to continue using it. */ struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, + struct ib_device *device, + u32 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, @@ -506,20 +484,20 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast); * @mgid: MGID of multicast group. * @rec: Location to copy SA multicast member record. */ -int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, +int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec); /** * ib_init_ah_from_mcmember - Initialize address handle attributes based on * an SA multicast member record. */ -int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, +int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct rdma_ah_attr *ah_attr); -int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *sgid_attr); @@ -538,7 +516,7 @@ void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec); /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, + struct ib_device *device, u32 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, unsigned long timeout_ms, gfp_t gfp_mask, @@ -547,10 +525,6 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); -bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, - struct ib_device *device, - u8 port_num); - static inline bool sa_path_is_roce(struct sa_path_rec *rec) { return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h index fdb8633cbaff..fc16b826b2c1 100644 --- a/include/rdma/ib_smi.h +++ b/include/rdma/ib_smi.h @@ -144,5 +144,15 @@ ib_get_smp_direction(struct ib_smp *smp) #define IB_NOTICE_TRAP_DR_NOTICE 0x80 #define IB_NOTICE_TRAP_DR_TRUNC 0x40 - +/** + * ib_init_query_mad - Initialize query MAD. + * @mad: MAD to initialize. + */ +static inline void ib_init_query_mad(struct ib_smp *mad) +{ + mad->base_version = IB_MGMT_BASE_VERSION; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} #endif /* IB_SMI_H */ diff --git a/include/rdma/ib_sysfs.h b/include/rdma/ib_sysfs.h new file mode 100644 index 000000000000..3b77cfd74d9a --- /dev/null +++ b/include/rdma/ib_sysfs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Copyright (c) 2021 Mellanox Technologies Ltd. All rights reserved. + */ +#ifndef DEF_RDMA_IB_SYSFS_H +#define DEF_RDMA_IB_SYSFS_H + +#include <linux/sysfs.h> + +struct ib_device; + +struct ib_port_attribute { + struct attribute attr; + ssize_t (*show)(struct ib_device *ibdev, u32 port_num, + struct ib_port_attribute *attr, char *buf); + ssize_t (*store)(struct ib_device *ibdev, u32 port_num, + struct ib_port_attribute *attr, const char *buf, + size_t count); +}; + +#define IB_PORT_ATTR_RW(_name) \ + struct ib_port_attribute ib_port_attr_##_name = __ATTR_RW(_name) + +#define IB_PORT_ATTR_ADMIN_RW(_name) \ + struct ib_port_attribute ib_port_attr_##_name = \ + __ATTR_RW_MODE(_name, 0600) + +#define IB_PORT_ATTR_RO(_name) \ + struct ib_port_attribute ib_port_attr_##_name = __ATTR_RO(_name) + +#define IB_PORT_ATTR_WO(_name) \ + struct ib_port_attribute ib_port_attr_##_name = __ATTR_WO(_name) + +struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj, + u32 *port_num); + +#endif diff --git a/include/rdma/ib_ucaps.h b/include/rdma/ib_ucaps.h new file mode 100644 index 000000000000..d9f96be3a553 --- /dev/null +++ b/include/rdma/ib_ucaps.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#ifndef _IB_UCAPS_H_ +#define _IB_UCAPS_H_ + +#define UCAP_ENABLED(ucaps, type) (!!((ucaps) & (1U << (type)))) + +enum rdma_user_cap { + RDMA_UCAP_MLX5_CTRL_LOCAL, + RDMA_UCAP_MLX5_CTRL_OTHER_VHCA, + RDMA_UCAP_MAX +}; + +void ib_cleanup_ucaps(void); +int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int ib_create_ucap(enum rdma_user_cap type); +void ib_remove_ucap(enum rdma_user_cap type); +#else +static inline int ib_create_ucap(enum rdma_user_cap type) +{ + return -EOPNOTSUPP; +} +static inline void ib_remove_ucap(enum rdma_user_cap type) {} +#endif /* CONFIG_INFINIBAND_USER_ACCESS */ + +#endif /* _IB_UCAPS_H_ */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c765..7dc7b1cc71b5 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. */ #ifndef IB_UMEM_H @@ -13,6 +14,7 @@ struct ib_ucontext; struct ib_umem_odp; +struct dma_buf_attach_ops; struct ib_umem { struct ib_device *ibdev; @@ -22,18 +24,41 @@ struct ib_umem { unsigned long address; u32 writable : 1; u32 is_odp : 1; - struct work_struct work; - struct sg_table sg_head; - int nmap; - unsigned int sg_nents; + u32 is_dmabuf : 1; + struct sg_append_table sgt_append; }; +struct ib_umem_dmabuf { + struct ib_umem umem; + struct dma_buf_attachment *attach; + struct sg_table *sgt; + struct scatterlist *first_sg; + struct scatterlist *last_sg; + unsigned long first_sg_offset; + unsigned long last_sg_trim; + void *private; + u8 pinned : 1; + u8 revoked : 1; +}; + +static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) +{ + return container_of(umem, struct ib_umem_dmabuf, umem); +} + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -51,7 +76,15 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, struct ib_umem *umem, unsigned long pgsz) { - __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz); + __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, + umem->sgt_append.sgt.nents, pgsz); + biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); + biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); +} + +static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) +{ + return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; } /** @@ -67,7 +100,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ - __rdma_block_iter_next(biter);) + __rdma_umem_block_iter_next(biter);) #ifdef CONFIG_INFINIBAND_USER_MEM @@ -80,6 +113,54 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sgt_append.sgt.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} + +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, size_t size, + int fd, int access, + const struct dma_buf_attach_ops *ops); +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access); +struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access); +int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); +void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf); + #else /* CONFIG_INFINIBAND_USER_MEM */ #include <linux/err.h> @@ -88,12 +169,12 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access) { - return ERR_PTR(-EINVAL); + return ERR_PTR(-EOPNOTSUPP); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { - return -EINVAL; + return -EOPNOTSUPP; } static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, @@ -101,7 +182,44 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} +static inline +struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access, + struct dma_buf_attach_ops *ops) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, + size_t size, int fd, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} -#endif /* CONFIG_INFINIBAND_USER_MEM */ +static inline struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device, + struct device *dma_device, + unsigned long offset, size_t size, + int fd, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) +{ + return -EOPNOTSUPP; +} +static inline void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { } +static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {} + +#endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 0844c1d05ac6..2a24bf791c10 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -8,23 +8,17 @@ #include <rdma/ib_umem.h> #include <rdma/ib_verbs.h> +#include <linux/hmm-dma.h> struct ib_umem_odp { struct ib_umem umem; struct mmu_interval_notifier notifier; struct pid *tgid; - /* An array of the pfns included in the on-demand paging umem. */ - unsigned long *pfn_list; + struct hmm_dma_map map; /* - * An array with DMA addresses mapped for pfns in pfn_list. - * The lower two bits designate access permissions. - * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. - */ - dma_addr_t *dma_list; - /* - * The umem_mutex protects the page_list and dma_list fields of an ODP + * The umem_mutex protects the page_list field of an ODP * umem, allowing only a single thread to map/unmap pages. The mutex * also protects access to the mmu notifier counters. */ @@ -67,19 +61,6 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) umem_odp->page_shift; } -/* - * The lower 2 bits of the DMA address signal the R/W permissions for - * the entry. To upgrade the permissions, provide the appropriate - * bitmask to the map_dma_pages function. - * - * Be aware that upgrading a mapped address might result in change of - * the DMA address for the page. - */ -#define ODP_READ_ALLOWED_BIT (1<<0ULL) -#define ODP_WRITE_ALLOWED_BIT (1<<1ULL) - -#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_umem_odp * diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670..af43a8d2a74a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2020 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -12,6 +12,7 @@ #ifndef IB_VERBS_H #define IB_VERBS_H +#include <linux/ethtool.h> #include <linux/types.h> #include <linux/device.h> #include <linux/dma-mapping.h> @@ -49,6 +50,8 @@ struct ib_uqp_object; struct ib_usrq_object; struct ib_uwq_object; struct rdma_cm_id; +struct ib_port; +struct hw_stats_device_data; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -56,9 +59,6 @@ extern struct workqueue_struct *ib_comp_unbound_wq; struct ib_ucq_object; -__printf(3, 4) __cold -void ibdev_printk(const char *level, const struct ib_device *ibdev, - const char *format, ...); __printf(2, 3) __cold void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...); __printf(2, 3) __cold @@ -151,7 +151,7 @@ struct ib_gid_attr { union ib_gid gid; enum ib_gid_type gid_type; u16 index; - u8 port_num; + u32 port_num; }; enum { @@ -217,32 +217,24 @@ enum rdma_link_layer { }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = (1 << 0), - IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), - IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), - IB_DEVICE_RAW_MULTI = (1 << 3), - IB_DEVICE_AUTO_PATH_MIG = (1 << 4), - IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), - IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), - IB_DEVICE_SHUTDOWN_PORT = (1 << 8), - /* Not in use, former INIT_TYPE = (1 << 9),*/ - IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), - IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), - IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), - IB_DEVICE_SRQ_RESIZE = (1 << 13), - IB_DEVICE_N_NOTIFY_CQ = (1 << 14), - - /* - * This device supports a per-device lkey or stag that can be - * used without performing a memory registration for the local - * memory. Note that ULPs should never check this flag, but - * instead of use the local_dma_lkey flag in the ib_pd structure, - * which will always contain a usable lkey. - */ - IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), - /* Reserved, old SEND_W_INV = (1 << 16),*/ - IB_DEVICE_MEM_WINDOW = (1 << 17), + IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR, + IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR, + IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR, + IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI, + IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT, + IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE, + IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT, + /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */ + IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT, + IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID, + IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN, + IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE, + IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ, + + /* Reserved, old SEND_W_INV = 1 << 16,*/ + IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW, /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -250,9 +242,8 @@ enum ib_device_cap_flags { * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1 << 18), - IB_DEVICE_UD_TSO = (1 << 19), - IB_DEVICE_XRC = (1 << 20), + IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM, + IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC, /* * This device supports the IB "base memory management extension", @@ -263,31 +254,57 @@ enum ib_device_cap_flags { * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ - IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), - IB_DEVICE_RC_IP_CSUM = (1 << 25), + IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS, + IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A, + IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B, + IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM, /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */ - IB_DEVICE_RAW_IP_CSUM = (1 << 26), - /* - * Devices should set IB_DEVICE_CROSS_CHANNEL if they - * support execution of WQEs that involve synchronization - * of I/O operations with single completion queue managed - * by hardware. - */ - IB_DEVICE_CROSS_CHANNEL = (1 << 27), - IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), - IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), - IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), - IB_DEVICE_SG_GAPS_REG = (1ULL << 32), - IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), + IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM, + IB_DEVICE_MANAGED_FLOW_STEERING = + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING, /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ - IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), - IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), + IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS, /* The device supports padding incoming writes to cacheline. */ - IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), - IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), + IB_DEVICE_PCI_WRITE_END_PADDING = + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, + /* Placement type attributes */ + IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL, + IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT, + IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE, +}; + +enum ib_kernel_cap_flags { + /* + * This device supports a per-device lkey or stag that can be + * used without performing a memory registration for the local + * memory. Note that ULPs should never check this flag, but + * instead of use the local_dma_lkey flag in the ib_pd structure, + * which will always contain a usable lkey. + */ + IBK_LOCAL_DMA_LKEY = 1 << 0, + /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */ + IBK_INTEGRITY_HANDOVER = 1 << 1, + /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */ + IBK_ON_DEMAND_PAGING = 1 << 2, + /* IB_MR_TYPE_SG_GAPS is supported */ + IBK_SG_GAPS_REG = 1 << 3, + /* Driver supports RDMA_NLDEV_CMD_DELLINK */ + IBK_ALLOW_USER_UNREG = 1 << 4, + + /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */ + IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5, + /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */ + IBK_UD_TSO = 1 << 6, + /* iopib will use the device ops: + * get_vf_config + * get_vf_guid + * get_vf_stats + * set_vf_guid + * set_vf_link_state + */ + IBK_VIRTUAL_FUNCTION = 1 << 7, + /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */ + IBK_RDMA_NETDEV_OPA = 1 << 8, }; enum ib_atomic_cap { @@ -297,17 +314,19 @@ enum ib_atomic_cap { }; enum ib_odp_general_cap_bits { - IB_ODP_SUPPORT = 1 << 0, - IB_ODP_SUPPORT_IMPLICIT = 1 << 1, + IB_ODP_SUPPORT = IB_UVERBS_ODP_SUPPORT, + IB_ODP_SUPPORT_IMPLICIT = IB_UVERBS_ODP_SUPPORT_IMPLICIT, }; enum ib_odp_transport_cap_bits { - IB_ODP_SUPPORT_SEND = 1 << 0, - IB_ODP_SUPPORT_RECV = 1 << 1, - IB_ODP_SUPPORT_WRITE = 1 << 2, - IB_ODP_SUPPORT_READ = 1 << 3, - IB_ODP_SUPPORT_ATOMIC = 1 << 4, - IB_ODP_SUPPORT_SRQ_RECV = 1 << 5, + IB_ODP_SUPPORT_SEND = IB_UVERBS_ODP_SUPPORT_SEND, + IB_ODP_SUPPORT_RECV = IB_UVERBS_ODP_SUPPORT_RECV, + IB_ODP_SUPPORT_WRITE = IB_UVERBS_ODP_SUPPORT_WRITE, + IB_ODP_SUPPORT_READ = IB_UVERBS_ODP_SUPPORT_READ, + IB_ODP_SUPPORT_ATOMIC = IB_UVERBS_ODP_SUPPORT_ATOMIC, + IB_ODP_SUPPORT_SRQ_RECV = IB_UVERBS_ODP_SUPPORT_SRQ_RECV, + IB_ODP_SUPPORT_FLUSH = IB_UVERBS_ODP_SUPPORT_FLUSH, + IB_ODP_SUPPORT_ATOMIC_WRITE = IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE, }; struct ib_odp_caps { @@ -386,6 +405,7 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; + u64 kernel_cap_flags; int max_send_sge; int max_recv_sge; int max_sge_rd; @@ -501,6 +521,23 @@ enum ib_port_state { IB_PORT_ACTIVE_DEFER = 5 }; +static inline const char *__attribute_const__ +ib_port_state_to_str(enum ib_port_state state) +{ + const char * const states[] = { + [IB_PORT_NOP] = "NOP", + [IB_PORT_DOWN] = "DOWN", + [IB_PORT_INIT] = "INIT", + [IB_PORT_ARMED] = "ARMED", + [IB_PORT_ACTIVE] = "ACTIVE", + [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER", + }; + + if (state < ARRAY_SIZE(states)) + return states[state]; + return "UNKNOWN"; +} + enum ib_port_phys_state { IB_PORT_PHYS_STATE_SLEEP = 1, IB_PORT_PHYS_STATE_POLLING = 2, @@ -540,20 +577,39 @@ enum ib_port_speed { IB_SPEED_EDR = 32, IB_SPEED_HDR = 64, IB_SPEED_NDR = 128, + IB_SPEED_XDR = 256, +}; + +enum ib_stat_flag { + IB_STAT_FLAG_OPTIONAL = 1 << 0, +}; + +/** + * struct rdma_stat_desc + * @name - The name of the counter + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL + * @priv - Driver private information; Core code should not use + */ +struct rdma_stat_desc { + const char *name; + unsigned int flags; + const void *priv; }; /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values - * of counters, which are 64bits and not guaranteeed to be written + * of counters, which are 64bits and not guaranteed to be written * atomicaly on 32bits systems. * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. - * @name - Array of pointers to static names used for the counters in - * directory. + * @descs - Array of pointers to static descriptors used for the counters + * in directory. + * @is_disabled - A bitmap to indicate each counter is currently disabled + * or not. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) @@ -565,36 +621,19 @@ struct rdma_hw_stats { struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; - const char * const *names; + const struct rdma_stat_desc *descs; + unsigned long *is_disabled; int num_counters; - u64 value[]; + u64 value[] __counted_by(num_counters); }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 -/** - * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct - * for drivers. - * @names - Array of static const char * - * @num_counters - How many elements in array - * @lifespan - How many milliseconds between updates - */ -static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( - const char * const *names, int num_counters, - unsigned long lifespan) -{ - struct rdma_hw_stats *stats; - stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), - GFP_KERNEL); - if (!stats) - return NULL; - stats->names = names; - stats->num_counters = num_counters; - stats->lifespan = msecs_to_jiffies(lifespan); - - return stats; -} +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan); +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats); /* Define bits for the various functionality this port needs to be supported by * the core. @@ -735,7 +774,7 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; struct ib_wq *wq; - u8 port_num; + u32 port_num; } element; enum ib_event_type event; }; @@ -818,6 +857,7 @@ enum ib_rate { IB_RATE_50_GBPS = 20, IB_RATE_400_GBPS = 21, IB_RATE_600_GBPS = 22, + IB_RATE_800_GBPS = 23, }; /** @@ -918,7 +958,7 @@ struct rdma_ah_attr { struct ib_global_route grh; u8 sl; u8 static_rate; - u8 port_num; + u32 port_num; u8 ah_flags; enum rdma_ah_attr_type type; union { @@ -964,9 +1004,11 @@ enum ib_wc_opcode { IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW, IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV, IB_WC_LSO = IB_UVERBS_WC_TSO, + IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, + IB_WC_FLUSH = IB_UVERBS_WC_FLUSH, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). @@ -1005,7 +1047,7 @@ struct ib_wc { u16 pkey_index; u8 sl; u8 dlid_path_bits; - u8 port_num; /* valid only for DR SMPs on switches */ + u32 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; u8 network_hdr_type; @@ -1070,7 +1112,7 @@ struct ib_qp_cap { /* * Maximum number of rdma_rw_ctx structures in flight at a time. - * ib_create_qp() will calculate the right amount of neededed WRs + * ib_create_qp() will calculate the right amount of needed WRs * and MRs based on this. */ u32 max_rdma_ctxs; @@ -1144,7 +1186,7 @@ enum ib_qp_create_flags { */ struct ib_qp_init_attr { - /* Consumer's event_handler callback must not block */ + /* This callback occurs in workqueue context */ void (*event_handler)(struct ib_event *, void *); void *qp_context; @@ -1160,7 +1202,7 @@ struct ib_qp_init_attr { /* * Only needed for special QP types, or when using the RW API. */ - u8 port_num; + u32 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; u32 source_qpn; }; @@ -1234,6 +1276,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { @@ -1277,11 +1321,11 @@ struct ib_qp_attr { u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; - u8 port_num; + u32 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; - u8 alt_port_num; + u32 alt_port_num; u8 alt_timeout; u32 rate_limit; struct net_device *xmit_slave; @@ -1305,6 +1349,8 @@ enum ib_wr_opcode { IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_FLUSH = IB_UVERBS_WR_FLUSH, + IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE, /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, @@ -1398,7 +1444,7 @@ struct ib_ud_wr { u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ - u8 port_num; /* valid for DR SMPs on switch only */ + u32 port_num; /* valid for DR SMPs on switch only */ }; static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr) @@ -1438,10 +1484,12 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, + IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL, + IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_SUPPORTED = - ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, + ((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* @@ -1469,6 +1517,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1481,9 +1531,8 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; + u64 enabled_caps; /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -1607,22 +1656,31 @@ struct ib_srq { } xrc; }; } ext; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; enum ib_raw_packet_caps { - /* Strip cvlan from incoming packet and report it in the matching work + /* + * Strip cvlan from incoming packet and report it in the matching work * completion is supported. */ - IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0), - /* Scatter FCS field of an incoming packet to host memory is supported. + IB_RAW_PACKET_CAP_CVLAN_STRIPPING = + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING, + /* + * Scatter FCS field of an incoming packet to host memory is supported. */ - IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), + IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS, /* Checksum offloads are supported (for both send and receive). */ - IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), - /* When a packet is received for an RQ with no receive WQEs, the + IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM, + /* + * When a packet is received for an RQ with no receive WQEs, the * packet processing is delayed. */ - IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), + IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP, }; enum ib_wq_type { @@ -1705,7 +1763,7 @@ struct ib_qp_security; struct ib_port_pkey { enum port_pkey_state state; u16 pkey_index; - u8 port_num; + u32 port_num; struct list_head qp_list; struct list_head to_error_list; struct ib_qp_security *sec; @@ -1747,6 +1805,7 @@ struct ib_qp { struct list_head rdma_mrs; struct list_head sig_mrs; struct ib_srq *srq; + struct completion srq_completion; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; @@ -1756,6 +1815,7 @@ struct ib_qp { struct ib_qp *real_qp; struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); + void (*registered_event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ const struct ib_gid_attr *av_sgid_attr; @@ -1766,7 +1826,7 @@ struct ib_qp { enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; - u8 port; + u32 port; bool integrity_en; /* @@ -1869,8 +1929,6 @@ struct ib_flow_eth_filter { u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_eth { @@ -1883,8 +1941,6 @@ struct ib_flow_spec_eth { struct ib_flow_ib_filter { __be16 dlid; __u8 sl; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_ib { @@ -1908,8 +1964,6 @@ struct ib_flow_ipv4_filter { u8 tos; u8 ttl; u8 flags; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_ipv4 { @@ -1926,9 +1980,7 @@ struct ib_flow_ipv6_filter { u8 next_hdr; u8 traffic_class; u8 hop_limit; - /* Must be last */ - u8 real_sz[]; -}; +} __packed; struct ib_flow_spec_ipv6 { u32 type; @@ -1940,8 +1992,6 @@ struct ib_flow_spec_ipv6 { struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_tcp_udp { @@ -1953,7 +2003,6 @@ struct ib_flow_spec_tcp_udp { struct ib_flow_tunnel_filter { __be32 tunnel_id; - u8 real_sz[]; }; /* ib_flow_spec_tunnel describes the Vxlan tunnel @@ -1969,8 +2018,6 @@ struct ib_flow_spec_tunnel { struct ib_flow_esp_filter { __be32 spi; __be32 seq; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_esp { @@ -1984,8 +2031,6 @@ struct ib_flow_gre_filter { __be16 c_ks_res0_ver; __be16 protocol; __be32 key; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_gre { @@ -1997,8 +2042,6 @@ struct ib_flow_spec_gre { struct ib_flow_mpls_filter { __be32 tag; - /* Must be last */ - u8 real_sz[]; }; struct ib_flow_spec_mpls { @@ -2062,7 +2105,7 @@ struct ib_flow_attr { u16 priority; u32 flags; u8 num_of_specs; - u8 port; + u32 port; union ib_flow_spec flows[]; }; @@ -2131,7 +2174,6 @@ struct ib_flow_action { }; struct ib_mad; -struct ib_grh; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, @@ -2152,6 +2194,7 @@ struct ib_port_cache { struct ib_gid_table *gid; u8 lmc; enum ib_port_state port_state; + enum ib_port_state last_port_state; }; struct ib_port_immutable { @@ -2167,15 +2210,18 @@ struct ib_port_data { struct ib_port_immutable immutable; spinlock_t pkey_list_lock; + + spinlock_t netdev_lock; + struct list_head pkey_list; struct ib_port_cache cache; - spinlock_t netdev_lock; struct net_device __rcu *netdev; + netdevice_tracker netdev_tracker; struct hlist_node ndev_hash_link; struct rdma_port_counter port_counter; - struct rdma_hw_stats *hw_stats; + struct ib_port *sysfs; }; /* rdma netdev type - specifies protocol type */ @@ -2191,7 +2237,7 @@ enum rdma_netdev_t { struct rdma_netdev { void *clnt_priv; struct ib_device *hca; - u8 port_num; + u32 port_num; int mtu; /* @@ -2212,6 +2258,8 @@ struct rdma_netdev { int set_qkey, u32 qkey); int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid); + /* timeout */ + void (*tx_timeout)(struct net_device *dev, unsigned int txqueue); }; struct rdma_netdev_alloc_params { @@ -2220,13 +2268,15 @@ struct rdma_netdev_alloc_params { unsigned int rxqs; void *param; - int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num, + int (*initialize_rdma_netdev)(struct ib_device *device, u32 port_num, struct net_device *netdev, void *param); }; struct ib_odp_counters { atomic64_t faults; + atomic64_t faults_handled; atomic64_t invalidations; + atomic64_t invalidations_handled; atomic64_t prefetch; }; @@ -2255,8 +2305,13 @@ struct iw_cm_conn_param; !__same_type(((struct drv_struct *)NULL)->member, \ struct ib_struct))) -#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ - ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp)) +#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \ + ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ + gfp, false)) + +#define rdma_zalloc_drv_obj_numa(ib_dev, ib_type) \ + ((struct ib_type *)rdma_zalloc_obj(ib_dev, ib_dev->ops.size_##ib_type, \ + GFP_KERNEL, true)) #define rdma_zalloc_drv_obj(ib_dev, ib_type) \ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL) @@ -2289,6 +2344,14 @@ struct ib_device_ops { u32 uverbs_abi_ver; unsigned int uverbs_no_driver_id_binding:1; + /* + * NOTE: New drivers should not make use of device_group; instead new + * device parameter should be exposed via netlink command. This + * mechanism exists only for existing drivers. + */ + const struct attribute_group *device_group; + const struct attribute_group **port_groups; + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, const struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, @@ -2298,12 +2361,11 @@ struct ib_device_ops { int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); - int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); int (*post_srq_recv)(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int (*process_mad)(struct ib_device *device, int process_mad_flags, - u8 port_num, const struct ib_wc *in_wc, + u32 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad *in_mad, struct ib_mad *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); @@ -2315,9 +2377,9 @@ struct ib_device_ops { void (*get_dev_fw_str)(struct ib_device *device, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); - int (*query_port)(struct ib_device *device, u8 port_num, + int (*query_port)(struct ib_device *device, u32 port_num, struct ib_port_attr *port_attr); - int (*modify_port)(struct ib_device *device, u8 port_num, + int (*modify_port)(struct ib_device *device, u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); /** @@ -2326,10 +2388,10 @@ struct ib_device_ops { * structure to avoid cache line misses when accessing struct ib_device * in fast paths. */ - int (*get_port_immutable)(struct ib_device *device, u8 port_num, + int (*get_port_immutable)(struct ib_device *device, u32 port_num, struct ib_port_immutable *immutable); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, - u8 port_num); + u32 port_num); /** * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such @@ -2338,7 +2400,8 @@ struct ib_device_ops { * that this function returns NULL before the net device has finished * NETDEV_UNREGISTER state. */ - struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + struct net_device *(*get_netdev)(struct ib_device *device, + u32 port_num); /** * rdma netdev operation * @@ -2346,11 +2409,11 @@ struct ib_device_ops { * must return -EOPNOTSUPP if it doesn't support the specified type. */ struct net_device *(*alloc_rdma_netdev)( - struct ib_device *device, u8 port_num, enum rdma_netdev_t type, + struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); - int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); /** @@ -2358,7 +2421,7 @@ struct ib_device_ops { * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ - int (*query_gid)(struct ib_device *device, u8 port_num, int index, + int (*query_gid)(struct ib_device *device, u32 port_num, int index, union ib_gid *gid); /** * When calling add_gid, the HW vendor's driver should add the gid @@ -2383,7 +2446,7 @@ struct ib_device_ops { * This function is only called when roce_gid_table is used. */ int (*del_gid)(const struct ib_gid_attr *attr, void **context); - int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, + int (*query_pkey)(struct ib_device *device, u32 port_num, u16 index, u16 *pkey); int (*alloc_ucontext)(struct ib_ucontext *context, struct ib_udata *udata); @@ -2401,6 +2464,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); @@ -2412,16 +2477,15 @@ struct ib_device_ops { struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); - struct ib_qp *(*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata); + int (*create_qp)(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); @@ -2429,9 +2493,14 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, + struct uverbs_attr_bundle *attrs); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -2442,6 +2511,14 @@ struct ib_device_ops { enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); + + /* + * Kernel users should universally support relaxed ordering (RO), as + * they are designed to read data only after observing the CQE and use + * the DMA API correctly. + * + * Some drivers implicitly enable RO if platform supports it. + */ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, @@ -2456,25 +2533,17 @@ struct ib_device_ops { struct ib_flow_attr *flow_attr, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); - struct ib_flow_action *(*create_flow_action_esp)( - struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*destroy_flow_action)(struct ib_flow_action *action); - int (*modify_flow_action_esp)( - struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); - int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); - int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + int (*get_vf_config)(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *ivf); - int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + int (*get_vf_stats)(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); - int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + int (*get_vf_guid)(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); - int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int (*set_vf_guid)(struct ib_device *device, int vf, u32 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, @@ -2506,13 +2575,14 @@ struct ib_device_ops { unsigned int *meta_sg_offset); /** - * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the - * driver initialized data. The struct is kfree()'ed by the sysfs - * core when the device is removed. A lifespan of -1 in the return - * struct tells the core to set a default lifespan. + * alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and + * fill in the driver initialized data. The struct is kfree()'ed by + * the sysfs core when the device is removed. A lifespan of -1 in the + * return struct tells the core to set a default lifespan. */ - struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, - u8 port_num); + struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device); + struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device, + u32 port_num); /** * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or @@ -2526,13 +2596,15 @@ struct ib_device_ops { * one given in index at their option */ int (*get_hw_stats)(struct ib_device *device, - struct rdma_hw_stats *stats, u8 port, int index); - /* - * This function is called once for each port when a ib device is - * registered. + struct rdma_hw_stats *stats, u32 port, int index); + + /** + * modify_hw_stat - Modify the counter configuration + * @enable: true/false when enable/disable a counter + * Return codes - 0 on success or error code otherwise. */ - int (*init_port)(struct ib_device *device, u8 port_num, - struct kobject *port_sysfs); + int (*modify_hw_stat)(struct ib_device *device, u32 port, + unsigned int counter_index, bool enable); /** * Allows rdma drivers to add their own restrack attributes. */ @@ -2543,6 +2615,8 @@ struct ib_device_ops { int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp); int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id); + int (*fill_res_srq_entry)(struct sk_buff *msg, struct ib_srq *ib_srq); + int (*fill_res_srq_entry_raw)(struct sk_buff *msg, struct ib_srq *ib_srq); /* Device lifecycle callbacks */ /* @@ -2572,12 +2646,13 @@ struct ib_device_ops { * @counter - The counter to be bound. If counter->id is zero then * the driver needs to allocate a new counter and set counter->id */ - int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp); + int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp, + u32 port); /** * counter_unbind_qp - Unbind the qp from the dynamically-allocated * counter and bind it onto the default one */ - int (*counter_unbind_qp)(struct ib_qp *qp); + int (*counter_unbind_qp)(struct ib_qp *qp, u32 port); /** * counter_dealloc -De-allocate the hw counter */ @@ -2594,6 +2669,11 @@ struct ib_device_ops { int (*counter_update_stats)(struct rdma_counter *counter); /** + * counter_init - Initialize the driver specific rdma counter struct. + */ + void (*counter_init)(struct rdma_counter *counter); + + /** * Allows rdma drivers to add their own restrack attributes * dumped via 'rdma stat' iproute2 command. */ @@ -2603,15 +2683,48 @@ struct ib_device_ops { int (*query_ucontext)(struct ib_ucontext *context, struct uverbs_attr_bundle *attrs); + /* + * Provide NUMA node. This API exists for rdmavt/hfi1 only. + * Everyone else relies on Linux memory management model. + */ + int (*get_numa_node)(struct ib_device *dev); + + /** + * add_sub_dev - Add a sub IB device + */ + struct ib_device *(*add_sub_dev)(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + /** + * del_sub_dev - Delete a sub IB device + */ + void (*del_sub_dev)(struct ib_device *sub_dev); + + /** + * ufile_cleanup - Attempt to cleanup ubojects HW resources inside + * the ufile. + */ + void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile); + + /** + * report_port_event - Drivers need to implement this if they have + * some private stuff to handle when link status changes. + */ + void (*report_port_event)(struct ib_device *ibdev, + struct net_device *ndev, unsigned long event); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); DECLARE_RDMA_OBJ_SIZE(ib_mw); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_qp); DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); DECLARE_RDMA_OBJ_SIZE(ib_xrcd); + DECLARE_RDMA_OBJ_SIZE(rdma_counter); }; struct ib_core_device { @@ -2658,14 +2771,15 @@ struct ib_device { struct ib_core_device coredev; }; - /* First group for device attributes, - * Second group for driver provided attributes (optional). - * It is NULL terminated array. + /* First group is for device attributes, + * Second group is for driver provided attributes (optional). + * Third group is for the hw_stats + * It is a NULL terminated array. */ - const struct attribute_group *groups[3]; + const struct attribute_group *groups[4]; + u8 hw_stats_attr_index; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; @@ -2676,10 +2790,9 @@ struct ib_device { /* CQ adaptive moderation (RDMA DIM) */ u16 use_cq_dim:1; u8 node_type; - u8 phys_port_cnt; + u32 phys_port_cnt; struct ib_device_attr attrs; - struct attribute_group *hw_stats_ag; - struct rdma_hw_stats *hw_stats; + struct hw_stats_device_data *hw_stats_data; #ifdef CONFIG_CGROUP_RDMA struct rdmacg_device cg_device; @@ -2713,8 +2826,28 @@ struct ib_device { char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; u32 lag_flags; + + /* A parent device has a list of sub-devices */ + struct mutex subdev_lock; + struct list_head subdev_list_head; + + /* A sub device has a type and a parent */ + enum rdma_nl_dev_type type; + struct ib_device *parent; + struct list_head subdev_list; + + enum rdma_nl_name_assign_type name_assign_type; }; +static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, + gfp_t gfp, bool is_numa_aware) +{ + if (is_numa_aware && dev->ops.get_numa_node) + return kzalloc_node(size, gfp, dev->ops.get_numa_node(dev)); + + return kzalloc(size, gfp); +} + struct ib_client_nl_info; struct ib_client { const char *name; @@ -2742,7 +2875,7 @@ struct ib_client { * netdev. */ struct net_device *(*get_net_dev_by_params)( struct ib_device *dev, - u8 port, + u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, @@ -2766,6 +2899,7 @@ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ @@ -2855,6 +2989,23 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +void rdma_user_mmap_disassociate(struct ib_device *device); +#else +static inline void rdma_user_mmap_disassociate(struct ib_device *device) +{ +} +#endif + +static inline int +rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 pgoff) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff, + pgoff); +} + struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); @@ -2901,46 +3052,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, } /** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - -/** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. @@ -2963,10 +3074,10 @@ void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, - u8 port_num, struct ib_port_attr *port_attr); + u32 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, - u8 port_num); + u32 port_num); /** * rdma_cap_ib_switch - Check if the device is IB switch @@ -2990,7 +3101,7 @@ static inline bool rdma_cap_ib_switch(const struct ib_device *device) * * Return start port number */ -static inline u8 rdma_start_port(const struct ib_device *device) +static inline u32 rdma_start_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : 1; } @@ -3001,9 +3112,10 @@ static inline u8 rdma_start_port(const struct ib_device *device) * @iter - The unsigned int to store the port number */ #define rdma_for_each_port(device, iter) \ - for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \ - unsigned int, iter))); \ - iter <= rdma_end_port(device); (iter)++) + for (iter = rdma_start_port(device + \ + BUILD_BUG_ON_ZERO(!__same_type(u32, \ + iter))); \ + iter <= rdma_end_port(device); iter++) /** * rdma_end_port - Return the last valid port number for the device @@ -3013,7 +3125,7 @@ static inline u8 rdma_start_port(const struct ib_device *device) * * Return last port number */ -static inline u8 rdma_end_port(const struct ib_device *device) +static inline u32 rdma_end_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } @@ -3026,55 +3138,63 @@ static inline int rdma_is_port_valid(const struct ib_device *device, } static inline bool rdma_is_grh_required(const struct ib_device *device, - u8 port_num) + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_PORT_IB_GRH_REQUIRED; } -static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_ib(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IB; } -static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } -static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } -static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } -static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_iwarp(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; } -static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) +static inline bool rdma_ib_or_roce(const struct ib_device *device, + u32 port_num) { return rdma_protocol_ib(device, port_num) || rdma_protocol_roce(device, port_num); } -static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_raw_packet(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET; } -static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num) +static inline bool rdma_protocol_usnic(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_PROT_USNIC; @@ -3092,7 +3212,7 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n * * Return: true if the port supports sending/receiving of MAD packets. */ -static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mad(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_MAD; @@ -3117,7 +3237,7 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) * * Return: true if the port supports OPA MAD packet formats. */ -static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_mad(struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_MAD; @@ -3143,7 +3263,7 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) * * Return: true if the port provides an SMI. */ -static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_smi(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SMI; @@ -3164,7 +3284,7 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) * Return: true if the port supports an IB CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_CM; @@ -3182,7 +3302,7 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) * Return: true if the port supports an iWARP CM (this does not guarantee that * a CM is actually running however). */ -static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_iw_cm(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IW_CM; @@ -3203,7 +3323,7 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) * Administration interface. This does not imply that the SA service is * running locally. */ -static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_sa(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_IB_SA; @@ -3226,7 +3346,8 @@ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) * overhead of registering/unregistering with the SM and tracking of the * total number of queue pairs attached to the multicast group. */ -static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_ib_mcast(const struct ib_device *device, + u32 port_num) { return rdma_cap_ib_sa(device, port_num); } @@ -3244,7 +3365,7 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num * Return: true if the port uses a GID address to identify devices on the * network. */ -static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_af_ib(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_AF_IB; @@ -3266,7 +3387,7 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) * addition of a Global Route Header built from our Ethernet Address * Handle into our header list for connectionless packets. */ -static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) +static inline bool rdma_cap_eth_ah(const struct ib_device *device, u32 port_num) { return device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_ETH_AH; @@ -3281,7 +3402,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) * Return: true if we are running on an OPA device which supports * the extended OPA addressing. */ -static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) +static inline bool rdma_cap_opa_ah(struct ib_device *device, u32 port_num) { return (device->port_data[port_num].immutable.core_cap_flags & RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; @@ -3299,7 +3420,8 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) * Return the max MAD size required by the Port. Will return 0 if the port * does not support MADs */ -static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) +static inline size_t rdma_max_mad_size(const struct ib_device *device, + u32 port_num) { return device->port_data[port_num].immutable.max_mad_size; } @@ -3318,7 +3440,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n * its GIDs. */ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, - u8 port_num) + u32 port_num) { return rdma_protocol_roce(device, port_num) && device->ops.add_gid && device->ops.del_gid; @@ -3359,7 +3481,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device, * Return the MTU size supported by the port as an integer value. Will return * -1 if enum value of mtu is not supported. */ -static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port, int mtu) { if (rdma_core_cap_opa_port(device, port)) @@ -3376,7 +3498,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, * * Return the MTU size supported by the port as an integer value. */ -static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, +static inline int rdma_mtu_from_attr(struct ib_device *device, u32 port, struct ib_port_attr *attr) { if (rdma_core_cap_opa_port(device, port)) @@ -3385,34 +3507,34 @@ static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, return ib_mtu_enum_to_int(attr->max_mtu); } -int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, +int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port, int state); -int ib_get_vf_config(struct ib_device *device, int vf, u8 port, +int ib_get_vf_config(struct ib_device *device, int vf, u32 port, struct ifla_vf_info *info); -int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, +int ib_get_vf_stats(struct ib_device *device, int vf, u32 port, struct ifla_vf_stats *stats); -int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, +int ib_get_vf_guid(struct ib_device *device, int vf, u32 port, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); -int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, +int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid, int type); int ib_query_pkey(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); + u32 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, - u8 port_num, int port_modify_mask, + u32 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index); + u32 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, - u8 port_num, u16 pkey, u16 *index); + u32 port_num, u16 pkey, u16 *index); enum ib_pd_flags { /* @@ -3430,6 +3552,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) @@ -3516,7 +3649,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * attributes which are initialized using ib_init_ah_attr_from_wc(). * */ -int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, +int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct rdma_ah_attr *ah_attr); @@ -3533,7 +3666,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, - const struct ib_grh *grh, u8 port_num); + const struct ib_grh *grh, u32 port_num); /** * rdma_modify_ah - Modifies the address vector associated with an address @@ -3655,8 +3788,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +/** + * ib_create_qp - Creates a kernel QP associated with the specific protection + * domain. + * @pd: The protection domain associated with the QP. + * @init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + */ +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_qp_kernel(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. @@ -3929,18 +4076,50 @@ struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + +/* + * Check if a IB device's underlying DMA mapping supports P2PDMA transfers. + */ +static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev) +{ + if (ib_uses_virt_dma(dev)) + return false; + + return dma_pci_p2pdma_supported(dev->dma_device); +} + /** - * ib_req_ncomp_notif - Request completion notification when there are - * at least the specified number of unreaped completions on the CQ. - * @cq: The CQ to generate an event for. - * @wc_cnt: The number of unreaped completions that should be on the - * CQ before an event is generated. + * ib_virt_dma_to_ptr - Convert a dma_addr to a kernel pointer + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() devices to get back to the kernel pointer after + * going through the dma_addr marshalling. */ -static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +static inline void *ib_virt_dma_to_ptr(u64 dma_addr) { - return cq->device->ops.req_ncomp_notif ? - cq->device->ops.req_ncomp_notif(cq, wc_cnt) : - -ENOSYS; + /* virt_dma mode maps the kvs's directly into the dma addr */ + return (void *)(uintptr_t)dma_addr; +} + +/** + * ib_virt_dma_to_page - Convert a dma_addr to a struct page + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() device to get back to the struct page after going + * through the dma_addr marshalling. + */ +static inline struct page *ib_virt_dma_to_page(u64 dma_addr) +{ + return virt_to_page(ib_virt_dma_to_ptr(dma_addr)); } /** @@ -3950,6 +4129,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3964,6 +4145,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3978,7 +4161,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3995,6 +4179,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -4009,7 +4195,63 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +/** + * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses + * @dev: The device for which the DMA addresses are to be created + * @sg: The sg_table object describing the buffer + * @direction: The direction of the DMA + * @attrs: Optional DMA attributes for the map operation + */ +static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + int nents; + + if (ib_uses_virt_dma(dev)) { + nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); + if (!nents) + return -EIO; + sgt->nents = nents; + return 0; + } + return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); +} + +static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs); } /** @@ -4023,7 +4265,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4037,24 +4279,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4065,6 +4290,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4080,7 +4307,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4095,36 +4323,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel @@ -4216,8 +4416,11 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { + u64 device_cap = ib_dev->attrs.device_cap_flags; + /* * Local write permission is required if remote write or * remote atomic permission is also requested. @@ -4229,6 +4432,16 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) + return -EOPNOTSUPP; + + if ((flags & IB_ACCESS_FLUSH_GLOBAL && + !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) || + (flags & IB_ACCESS_FLUSH_PERSISTENT && + !(device_cap & IB_DEVICE_FLUSH_PERSISTENT))) + return -EOPNOTSUPP; + return 0; } @@ -4284,18 +4497,27 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev, enum rdma_driver_id driver_id); struct ib_device *ib_device_get_by_name(const char *name, enum rdma_driver_id driver_id); -struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, +struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); -struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); +struct net_device *ib_device_get_netdev(struct ib_device *ib_dev, + u32 port); +int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev, + u32 *port); + +static inline enum ib_port_state ib_get_curr_port_state(struct net_device *net_dev) +{ + return (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; +} +void ib_dispatch_port_state_event(struct ib_device *ibdev, + struct net_device *ndev); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); -int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, - u32 wq_attr_mask); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); @@ -4323,7 +4545,8 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width); +int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, + u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { @@ -4391,12 +4614,12 @@ static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) return false; } -static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) +static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u32 port_num) { attr->port_num = port_num; } -static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) +static inline u32 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) { return attr->port_num; } @@ -4494,7 +4717,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src); * @port_num: Port number */ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, - u8 port_num) + u32 port_num) { if (rdma_protocol_roce(dev, port_num)) return RDMA_AH_ATTR_TYPE_ROCE; @@ -4503,13 +4726,15 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_OPA; return RDMA_AH_ATTR_TYPE_IB; } + if (dev->type == RDMA_DEVICE_TYPE_SMI) + return RDMA_AH_ATTR_TYPE_IB; return RDMA_AH_ATTR_TYPE_UNDEFINED; } /** * ib_lid_cpu16 - Return lid in 16bit CPU encoding. - * In the current implementation the only way to get + * In the current implementation the only way to * get the 32bit lid is from other sources for OPA. * For IB, lids will always be 16bits so cast the * value accordingly. @@ -4561,45 +4786,33 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) * @device: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ibdev); +void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port); +void roce_del_all_netdev_gids(struct ib_device *ib_dev, + u32 port, struct net_device *ndev); struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif -struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, +struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *)); -int rdma_init_netdev(struct ib_device *device, u8 port_num, +int rdma_init_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), struct net_device *netdev); /** - * rdma_set_device_sysfs_group - Set device attributes group to have - * driver specific sysfs entries at - * for infiniband class. - * - * @device: device pointer for which attributes to be created - * @group: Pointer to group which should be added when device - * is registered with sysfs. - * rdma_set_device_sysfs_group() allows existing drivers to expose one - * group per device to have sysfs attributes. - * - * NOTE: New drivers should not make use of this API; instead new device - * parameter should be exposed via netlink command. This API and mechanism - * exist only for existing drivers. - */ -static inline void -rdma_set_device_sysfs_group(struct ib_device *dev, - const struct attribute_group *group) -{ - dev->groups[1] = group; -} - -/** * rdma_device_to_ibdev - Get ib_device pointer from device pointer * * @device: device pointer for which ib_device pointer to retrieve @@ -4616,12 +4829,25 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) } /** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + +/** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. * * NOTE: New drivers should not make use of this API; This API is only for * existing drivers who have exposed sysfs entries using - * rdma_set_device_sysfs_group(). + * ops->device_group. */ #define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member) @@ -4673,4 +4899,52 @@ static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) return (u32)(v & IB_GRH_FLOWLABEL_MASK); } + +/** + * rdma_get_udp_sport - Calculate and set UDP source port based on the flow + * label. If flow label is not defined in GRH then + * calculate it based on lqpn/rqpn. + * + * @fl: flow label from GRH + * @lqpn: local qp number + * @rqpn: remote qp number + */ +static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + +const struct ib_port_immutable* +ib_port_immutable_read(struct ib_device *dev, unsigned int port); + +/** ib_add_sub_device - Add a sub IB device on an existing one + * + * @parent: The IB device that needs to add a sub device + * @type: The type of the new sub device + * @name: The name of the new sub device + * + * + * Return 0 on success, an error code otherwise + */ +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' + * + * @sub: The sub device that is going to be deleted + * + * Return 0 on success, an error code otherwise + */ +int ib_del_sub_device_and_put(struct ib_device *sub); + +static inline void ib_mark_name_assigned_by_user(struct ib_device *ibdev) +{ + ibdev->name_assign_type = RDMA_NAME_ASSIGN_TYPE_USER; +} + #endif /* IB_VERBS_H */ diff --git a/include/rdma/iba.h b/include/rdma/iba.h index 6a1115b02a0d..dcae154edc26 100644 --- a/include/rdma/iba.h +++ b/include/rdma/iba.h @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <linux/bitfield.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> static inline u32 _iba_get8(const u8 *ptr) { diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 91975400e1b3..2b22f153ef63 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -70,6 +70,7 @@ struct iw_cm_id { u8 tos; bool tos_set:1; bool mapped:1; + bool afonly:1; }; struct iw_cm_conn_param { @@ -114,27 +115,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device, void iw_destroy_cm_id(struct iw_cm_id *cm_id); /** - * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP - * - * @cm_id: The IW CM idenfier to unbind from the QP. - * @qp: The QP - * - * This is called by the provider when destroying the QP to ensure - * that any references held by the IWCM are released. It may also - * be called by the IWCM when destroying a CM_ID to that any - * references held by the provider are released. - */ -void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp); - -/** - * iw_cm_get_qp - Return the ib_qp associated with a QPN - * - * @ib_device: The IB device - * @qpn: The queue pair number - */ -struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn); - -/** * iw_cm_listen - Listen for incoming connection requests on the * specified IW CM id. * diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index cbe3c2811455..d297f084001a 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev) return oparn->dev_priv; } -/* opa_vnic skb meta data structrue */ +/* opa_vnic skb meta data structure */ struct opa_vnic_skb_mdata { u8 vl; u8 entropy; @@ -90,8 +90,7 @@ struct opa_vnic_stats { static inline bool rdma_cap_opa_vnic(struct ib_device *device) { - return !!(device->attrs.device_cap_flags & - IB_DEVICE_RDMA_NETDEV_OPA); + return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA); } #endif /* _OPA_VNIC_H */ diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c672ae1da26b..d1593ad47e28 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -52,7 +52,17 @@ struct rdma_addr { struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; - int num_paths; + + /* Optional path records of primary path */ + struct sa_path_rec *path_rec_inbound; + struct sa_path_rec *path_rec_outbound; + + /* + * 0 - No primary nor alternate path is available + * 1 - Only primary path is available + * 2 - Both primary and alternate path are available + */ + int num_pri_alt_paths; }; struct rdma_conn_param { @@ -107,7 +117,8 @@ struct rdma_cm_id { struct rdma_route route; enum rdma_ucm_port_space ps; enum ib_qp_type qp_type; - u8 port_num; + u32 port_num; + struct work_struct net_work; }; struct rdma_cm_id * @@ -227,19 +238,9 @@ void rdma_destroy_qp(struct rdma_cm_id *id); int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); -/** - * rdma_connect - Initiate an active connection request. - * @id: Connection identifier to connect. - * @conn_param: Connection information used for connected QPs. - * - * Users must have resolved a route for the rdma_cm_id to connect with - * by having called rdma_resolve_route before calling this routine. - * - * This call will either connect to a remote QP or obtain remote QP - * information for unconnected rdma_cm_id's. The actual operation is - * based on the rdma_cm_id's port space. - */ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +int rdma_connect_locked(struct rdma_cm_id *id, + struct rdma_conn_param *conn_param); int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, struct rdma_ucm_ece *ece); @@ -341,6 +342,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); int rdma_set_afonly(struct rdma_cm_id *id, int afonly); int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout); + +int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer); /** * rdma_get_service_id - Return the IB service ID for a specified address. * @id: Communication identifier associated with the address. @@ -385,6 +388,5 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, union ib_gid *dgid); struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id); -struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res); #endif /* RDMA_CM_H */ diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index eb99856e8b30..4204d08a010a 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -23,6 +23,7 @@ struct rdma_counter_mode { enum rdma_nl_counter_mode mode; enum rdma_nl_counter_mask mask; struct auto_mode_param param; + bool bind_opcnt; }; struct rdma_port_counter { @@ -40,26 +41,31 @@ struct rdma_counter { struct rdma_counter_mode mode; struct mutex lock; struct rdma_hw_stats *stats; - u8 port; + u32 port; }; void rdma_counter_init(struct ib_device *dev); void rdma_counter_release(struct ib_device *dev); -int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, - bool on, enum rdma_nl_counter_mask mask); -int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); -int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); +int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port, + enum rdma_nl_counter_mask mask, + bool bind_opcnt, + struct netlink_ext_ack *extack); +int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port); +int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force); int rdma_counter_query_stats(struct rdma_counter *counter); -u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index); -int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, +u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index); +int rdma_counter_bind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); -int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, +int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port, u32 qp_num, u32 *counter_id); -int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port, +int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port, u32 qp_num, u32 counter_id); -int rdma_counter_get_mode(struct ib_device *dev, u8 port, +int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, - enum rdma_nl_counter_mask *mask); + enum rdma_nl_counter_mask *mask, + bool *opcnt); +int rdma_counter_modify(struct ib_device *dev, u32 port, + unsigned int index, bool enable); #endif /* _RDMA_COUNTER_H_ */ diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 2758d9df71ee..326deaf56d5d 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,8 @@ #include <linux/netlink.h> #include <uapi/rdma/rdma_netlink.h> +struct ib_device; + enum { RDMA_NLDEV_ATTR_EMPTY_STRING = 1, RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, @@ -30,7 +32,7 @@ enum rdma_nl_flags { * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ - static inline void __chk_##_index(void) \ + static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ @@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, */ bool rdma_nl_chk_listeners(unsigned int group); +/** + * Prepare and send an event message + * @ib: the IB device which triggered the event + * @port_num: the port number which triggered the event - 0 if unused + * @type: the event type + * Returns 0 on success or a negative error code + */ +int rdma_nl_notify_event(struct ib_device *ib, u32 port_num, + enum rdma_nl_notify_event_type type); + struct rdma_link_ops { struct list_head list; const char *type; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 9fd217b24916..c429d6ddb129 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -92,7 +92,7 @@ struct rvt_ibport { /* * The pkey table is allocated and maintained by the driver. Drivers * need to have access to this before registering with rdmav. However - * rdmavt will need access to it so drivers need to proviee this during + * rdmavt will need access to it so drivers need to provide this during * the attach port API call. */ u16 *pkey_table; @@ -230,7 +230,7 @@ struct rvt_driver_provided { void (*do_send)(struct rvt_qp *qp); /* - * Returns a pointer to the undelying hardware's PCI device. This is + * Returns a pointer to the underlying hardware's PCI device. This is * used to display information as to what hardware is being referenced * in an output message */ @@ -245,7 +245,7 @@ struct rvt_driver_provided { void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* - * Init a struture allocated with qp_priv_alloc(). This should be + * Init a structure allocated with qp_priv_alloc(). This should be * called after all qp fields have been initialized in rdmavt. */ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp, @@ -257,7 +257,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* - * Inform the driver the particular qp in quesiton has been reset so + * Inform the driver the particular qp in question has been reset so * that it can clean up anything it needs to. */ void (*notify_qp_reset)(struct rvt_qp *qp); @@ -281,7 +281,7 @@ struct rvt_driver_provided { void (*stop_send_queue)(struct rvt_qp *qp); /* - * Have the drivr drain any in progress operations + * Have the driver drain any in progress operations */ void (*quiesce_qp)(struct rvt_qp *qp); @@ -309,16 +309,16 @@ struct rvt_driver_provided { /* * Query driver for the state of the port. */ - int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num, struct ib_port_attr *props); /* * Tell driver to shutdown a port */ - int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num); /* Tell driver to send a trap for changed port capabilities */ - void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num); /* * The following functions can be safely ignored completely. Any use of @@ -338,7 +338,7 @@ struct rvt_driver_provided { /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num); + enum ib_qp_type type, u32 port_num); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, @@ -445,7 +445,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, * to work by setting the name manually here. */ dev_set_name(&rdi->ibdev.dev, fmt, name, unit); - strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); + strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); } /** diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 8275954f5ce6..d67892944193 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -11,6 +11,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdmavt_cq.h> #include <rdma/rvt-abi.h> +#include <linux/vmalloc.h> /* * Atomic bit definitions for r_aflags. */ @@ -444,7 +445,7 @@ struct rvt_qp { /* * This sge list MUST be last. Do not add anything below here. */ - struct rvt_sge r_sg_list[] /* verified SGEs */ + struct rvt_sge *r_sg_list /* verified SGEs */ ____cacheline_aligned_in_smp; }; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bc..0d69ded73bf2 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -14,6 +14,9 @@ #include <uapi/rdma/rdma_netlink.h> #include <linux/xarray.h> +/* Mark entry as containing driver specific details, it is used to provide QP subtype for now */ +#define RESTRACK_DD XA_MARK_1 + struct ib_device; struct sk_buff; @@ -50,6 +53,10 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_COUNTER, /** + * @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ) + */ + RDMA_RESTRACK_SRQ, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX @@ -68,6 +75,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -104,8 +119,8 @@ struct rdma_restrack_entry { u32 id; }; -int rdma_restrack_count(struct ib_device *dev, - enum rdma_restrack_type type); +int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, + bool show_details); /** * rdma_is_kernel_res() - check the owner of resource * @res: resource entry @@ -145,4 +160,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of this API should be cross examined. + * Probably you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 6ad9dc836c10..d606cac48233 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -42,29 +42,29 @@ struct rdma_rw_ctx { }; }; -int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, +int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, u64 remote_addr, u32 rkey, enum dma_data_direction dir); -void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, - struct scatterlist *sg, u32 sg_cnt, - enum dma_data_direction dir); +void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, + enum dma_data_direction dir); int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct scatterlist *sg, u32 sg_cnt, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey, enum dma_data_direction dir); void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct scatterlist *sg, u32 sg_cnt, + u32 port_num, struct scatterlist *sg, u32 sg_cnt, struct scatterlist *prot_sg, u32 prot_sg_cnt, enum dma_data_direction dir); struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, - u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); -int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, + u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); +int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); -unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, +unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num, unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740..e6c0de227fad 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -24,6 +24,7 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_RAW_FD, UVERBS_ATTR_TYPE_ENUM_IN, UVERBS_ATTR_TYPE_IDRS_ARRAY, }; @@ -435,8 +436,10 @@ struct uapi_definition { }, \ ##__VA_ARGS__ #define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \ - UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \ - ##__VA_ARGS__) + UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, \ + PTR_IF(IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS), \ + &UVERBS_OBJECT(_object_enum)), \ + ##__VA_ARGS__) /* * ======================================= @@ -521,6 +524,11 @@ struct uapi_definition { .u.obj.access = _access, \ __VA_ARGS__ } }) +#define UVERBS_ATTR_RAW_FD(_attr_id, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id), \ + .attr = { .type = UVERBS_ATTR_TYPE_RAW_FD, __VA_ARGS__ } }) + #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ @@ -621,12 +629,14 @@ struct uverbs_attr { }; struct uverbs_attr_bundle { - struct ib_udata driver_udata; - struct ib_udata ucore; - struct ib_uverbs_file *ufile; - struct ib_ucontext *context; - struct ib_uobject *uobject; - DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); + struct_group_tagged(uverbs_attr_bundle_hdr, hdr, + struct ib_udata driver_udata; + struct ib_udata ucore; + struct ib_uverbs_file *ufile; + struct ib_ucontext *context; + struct ib_uobject *uobject; + DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); + ); struct uverbs_attr attrs[]; }; @@ -647,12 +657,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) @@ -862,9 +875,24 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } -int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, - size_t idx, s64 lower_bound, u64 upper_bound, - s64 *def_val); + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} + +int _uverbs_get_const_signed(s64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val); +int _uverbs_get_const_unsigned(u64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 upper_bound, u64 *def_val); int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size); #else @@ -908,27 +936,84 @@ uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { return -EINVAL; } +static inline int +_uverbs_get_const_signed(s64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, s64 lower_bound, u64 upper_bound, + s64 *def_val) +{ + return -EINVAL; +} +static inline int +_uverbs_get_const_unsigned(u64 *to, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, u64 upper_bound, u64 *def_val) +{ + return -EINVAL; +} #endif -#define uverbs_get_const(_to, _attrs_bundle, _idx) \ +#define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \ ({ \ s64 _val; \ - int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \ - type_min(typeof(*_to)), \ - type_max(typeof(*_to)), NULL); \ - (*_to) = _val; \ + int _ret = \ + _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*(_to))), \ + type_max(typeof(*(_to))), NULL); \ + (*(_to)) = _val; \ _ret; \ }) -#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ +#define uverbs_get_const_unsigned(_to, _attrs_bundle, _idx) \ + ({ \ + u64 _val; \ + int _ret = \ + _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ + type_max(typeof(*(_to))), NULL); \ + (*(_to)) = _val; \ + _ret; \ + }) + +#define uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, _default) \ ({ \ s64 _val; \ s64 _def_val = _default; \ int _ret = \ - _uverbs_get_const(&_val, _attrs_bundle, _idx, \ - type_min(typeof(*_to)), \ - type_max(typeof(*_to)), &_def_val); \ - (*_to) = _val; \ + _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ + type_min(typeof(*(_to))), \ + type_max(typeof(*(_to))), &_def_val); \ + (*(_to)) = _val; \ + _ret; \ + }) + +#define uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, _default) \ + ({ \ + u64 _val; \ + u64 _def_val = _default; \ + int _ret = \ + _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ + type_max(typeof(*(_to))), &_def_val); \ + (*(_to)) = _val; \ _ret; \ }) + +#define uverbs_get_const(_to, _attrs_bundle, _idx) \ + (is_signed_type(typeof(*(_to))) ? \ + uverbs_get_const_signed(_to, _attrs_bundle, _idx) : \ + uverbs_get_const_unsigned(_to, _attrs_bundle, _idx)) \ + +#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ + (is_signed_type(typeof(*(_to))) ? \ + uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, \ + _default) : \ + uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ + _default)) + +static inline int +uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + return uverbs_get_const_signed(to, attrs_bundle, idx); +} + #endif diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index f04f5126f61e..ee7873f872c3 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -20,7 +20,7 @@ /* These are static so they do not need to be qualified */ #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id -#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id +#define UVERBS_OBJECT_METHODS(object_id) _UVERBS_NAME(_object_methods_##object_id, __LINE__) #define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \ diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index fe0512116958..555ea3d142a4 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -34,7 +34,7 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) { if (IS_ERR(uobj)) - return NULL; + return ERR_CAST(uobj); return uobj->object; } #define uobj_get_obj_read(_object, _type, _id, _attrs) \ diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f40..26ba919ac245 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on @@ -129,6 +134,8 @@ static inline void uverbs_uobject_get(struct ib_uobject *uobject) } void uverbs_uobject_put(struct ib_uobject *uobject); +int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode); + struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic @@ -138,13 +145,44 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; }; +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct mutex ucontext_lock; + /* + * ucontext must be accessed via ib_uverbs_get_ucontext() or with + * ucontext_lock held + */ + struct ib_ucontext *ucontext; + struct ib_uverbs_async_event_file *default_async_file; + struct list_head list; + + /* + * To access the uobjects list hw_destroy_rwsem must be held for write + * OR hw_destroy_rwsem held for read AND uobjects_lock held. + * hw_destroy_rwsem should be called across any destruction of the HW + * object of an associated uobject. + */ + struct rw_semaphore hw_destroy_rwsem; + spinlock_t uobjects_lock; + struct list_head uobjects; + + struct mutex umap_lock; + struct list_head umaps; + struct page *disassociate_page; + + struct xarray idr; + + struct mutex disassociation_lock; +}; + extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); |