diff options
Diffstat (limited to 'drivers/infiniband/hw/efa')
-rw-r--r-- | drivers/infiniband/hw/efa/efa.h | 64 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 281 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_admin_defs.h | 47 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_com.c | 358 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_com.h | 39 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_com_cmd.c | 111 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_com_cmd.h | 40 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_common_defs.h | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_io_defs.h | 289 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_main.c | 296 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_regs_defs.h | 30 | ||||
-rw-r--r-- | drivers/infiniband/hw/efa/efa_verbs.c | 695 |
12 files changed, 1785 insertions, 478 deletions
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index aa7396a1588a..7352a1f5d811 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -20,31 +20,27 @@ #define EFA_IRQNAME_SIZE 40 -/* 1 for AENQ + ADMIN */ -#define EFA_NUM_MSIX_VEC 1 #define EFA_MGMNT_MSIX_VEC_IDX 0 +#define EFA_COMP_EQS_VEC_BASE 1 struct efa_irq { irq_handler_t handler; void *data; - int cpu; + u32 irqn; u32 vector; cpumask_t affinity_hint_mask; char name[EFA_IRQNAME_SIZE]; }; -struct efa_sw_stats { +/* Don't use anything other than atomic64 */ +struct efa_stats { atomic64_t alloc_pd_err; atomic64_t create_qp_err; atomic64_t create_cq_err; atomic64_t reg_mr_err; atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; -}; - -/* Don't use anything other than atomic64 */ -struct efa_stats { - struct efa_sw_stats sw_stats; + atomic64_t mmap_err; atomic64_t keep_alive_rcvd; }; @@ -65,6 +61,13 @@ struct efa_dev { struct efa_irq admin_irq; struct efa_stats stats; + + /* Array of completion EQs */ + struct efa_eq *eqs; + unsigned int neqs; + + /* Only stores CQs with interrupts enabled */ + struct xarray cqs_xa; }; struct efa_ucontext { @@ -88,8 +91,11 @@ struct efa_cq { dma_addr_t dma_addr; void *cpu_addr; struct rdma_user_mmap_entry *mmap_entry; + struct rdma_user_mmap_entry *db_mmap_entry; size_t size; u16 cq_idx; + /* NULL when no interrupts requested */ + struct efa_eq *eq; }; struct efa_qp { @@ -120,32 +126,40 @@ struct efa_ah { u8 id[EFA_GID_SIZE]; }; +struct efa_eq { + struct efa_com_eq eeq; + struct efa_irq irq; +}; + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata); -int efa_query_port(struct ib_device *ibdev, u8 port, +int efa_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props); int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int efa_query_gid(struct ib_device *ibdev, u8 port, int index, +int efa_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid); -int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); -struct ib_qp *efa_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, +int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable); int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); @@ -153,16 +167,16 @@ int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, - u8 port_num); -struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num); + u32 port_num); +struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num); +struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev); int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port_num, int index); + u32 port_num, int index); #endif /* _EFA_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 74b787a90660..d4b9226088bd 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { EFA_ADMIN_DEALLOC_PD = 15, EFA_ADMIN_ALLOC_UAR = 16, EFA_ADMIN_DEALLOC_UAR = 17, - EFA_ADMIN_MAX_OPCODE = 17, + EFA_ADMIN_CREATE_EQ = 18, + EFA_ADMIN_DESTROY_EQ = 19, + EFA_ADMIN_MAX_OPCODE = 19, }; enum efa_admin_aq_feature_id { @@ -37,7 +39,8 @@ enum efa_admin_aq_feature_id { EFA_ADMIN_NETWORK_ATTR = 3, EFA_ADMIN_QUEUE_ATTR = 4, EFA_ADMIN_HW_HINTS = 5, - EFA_ADMIN_FEATURES_OPCODE_NUM = 8, + EFA_ADMIN_HOST_INFO = 6, + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, }; /* QP transport type */ @@ -61,6 +64,8 @@ enum efa_admin_qp_state { enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, + EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, + EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, }; enum efa_admin_get_stats_scope { @@ -68,14 +73,6 @@ enum efa_admin_get_stats_scope { EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, }; -enum efa_admin_modify_qp_mask_bits { - EFA_ADMIN_QP_STATE_BIT = 0, - EFA_ADMIN_CUR_QP_STATE_BIT = 1, - EFA_ADMIN_QKEY_BIT = 2, - EFA_ADMIN_SQ_PSN_BIT = 3, - EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, -}; - /* * QP allocation sizes, converted by fabric QueuePair (QP) create command * from QP capabilities. @@ -167,8 +164,8 @@ struct efa_admin_create_qp_resp { u32 qp_handle; /* - * QP number in the given EFA virtual device. Least-significant bits - * (as needed according to max_qp) carry unique QP ID + * QP number in the given EFA virtual device. Least-significant bits (as + * needed according to max_qp) carry unique QP ID */ u16 qp_num; @@ -199,8 +196,14 @@ struct efa_admin_modify_qp_cmd { struct efa_admin_aq_common_desc aq_common_desc; /* - * Mask indicating which fields should be updated see enum - * efa_admin_modify_qp_mask_bits + * Mask indicating which fields should be updated + * 0 : qp_state + * 1 : cur_qp_state + * 2 : qkey + * 3 : sq_psn + * 4 : sq_drained_async_notify + * 5 : rnr_retry + * 31:6 : reserved */ u32 modify_mask; @@ -222,8 +225,8 @@ struct efa_admin_modify_qp_cmd { /* Enable async notification when SQ is drained */ u8 sq_drained_async_notify; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -258,8 +261,8 @@ struct efa_admin_query_qp_resp { /* Indicates that draining is in progress */ u8 sq_draining; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : reserved5 - MBZ * 5 : interrupt_mode_enabled - if set, cq operates - * in interrupt mode (i.e. CQ events and MSI-X are - * generated), otherwise - polling + * in interrupt mode (i.e. CQ events and EQ elements + * are generated), otherwise - polling * 6 : virt - If set, ring base address is virtual * (IOVA returned by MR registration) * 7 : reserved6 - MBZ @@ -441,15 +444,21 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 - MBZ + * 5 : set_src_addr - If set, source address will be + * filled on RX completions from unknown senders. + * Requires 8 words CQ entry size. + * 7:6 : reserved7 - MBZ */ u8 cq_caps_2; /* completion queue depth in # of entries. must be power of 2 */ u16 cq_depth; - /* msix vector assigned to this cq */ - u32 msix_vector_idx; + /* EQ number assigned to this cq */ + u16 eqn; + + /* MBZ */ + u16 reserved; /* * CQ ring base address, virtual or physical depending on 'virt' @@ -465,7 +474,7 @@ struct efa_admin_create_cq_cmd { /* * number of sub cqs - must be equal to sub_cqs_per_cq of queue - * attributes. + * attributes. */ u16 num_sub_cqs; @@ -480,6 +489,15 @@ struct efa_admin_create_cq_resp { /* actual cq depth in number of entries */ u16 cq_actual_depth; + + /* CQ doorbell address, as offset to PCIe DB BAR */ + u32 db_offset; + + /* + * 0 : db_valid - If set, doorbell offset is valid. + * Always set when interrupts are requested. + */ + u32 flags; }; struct efa_admin_destroy_cq_cmd { @@ -530,19 +548,41 @@ struct efa_admin_basic_stats { u64 rx_drops; }; +struct efa_admin_messages_stats { + u64 send_bytes; + + u64 send_wrs; + + u64 recv_bytes; + + u64 recv_wrs; +}; + +struct efa_admin_rdma_read_stats { + u64 read_wrs; + + u64 read_bytes; + + u64 read_wr_err; + + u64 read_resp_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; - struct efa_admin_basic_stats basic_stats; + union { + struct efa_admin_basic_stats basic_stats; + + struct efa_admin_messages_stats messages_stats; + + struct efa_admin_rdma_read_stats rdma_read_stats; + } u; }; struct efa_admin_get_set_feature_common_desc { - /* - * 1:0 : select - 0x1 - current value; 0x3 - default - * value - * 7:3 : reserved3 - MBZ - */ - u8 flags; + /* MBZ */ + u8 reserved0; /* as appears in efa_admin_aq_feature_id */ u8 feature_id; @@ -576,7 +616,9 @@ struct efa_admin_feature_device_attr_desc { /* * 0 : rdma_read - If set, RDMA Read is supported on * TX queues - * 31:1 : reserved - MBZ + * 1 : rnr_retry - If set, RNR retry is supported on + * modify QP command + * 31:2 : reserved - MBZ */ u32 device_caps; @@ -606,8 +648,8 @@ struct efa_admin_feature_queue_attr_desc { /* Number of sub-CQs to be created for each CQ */ u16 sub_cqs_per_cq; - /* MBZ */ - u16 reserved; + /* Minimum number of WQEs per SQ */ + u16 min_sq_depth; /* Maximum number of SGEs (buffers) allowed for a single send WQE */ u16 max_wr_send_sges; @@ -632,6 +674,28 @@ struct efa_admin_feature_queue_attr_desc { /* Maximum number of SGEs for a single RDMA read WQE */ u16 max_wr_rdma_sges; + + /* + * Maximum number of bytes that can be written to SQ between two + * consecutive doorbells (in units of 64B). Driver must ensure that only + * complete WQEs are written to queue before issuing a doorbell. + * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can + * be written to SQ between two consecutive doorbells. max_tx_batch=11 + * and WQE size = 128B, means up to 5 WQEs can be written to SQ between + * two consecutive doorbells. Zero means unlimited. + */ + u16 max_tx_batch; +}; + +struct efa_admin_event_queue_attr_desc { + /* The maximum number of event queues supported */ + u32 max_eq; + + /* Maximum number of EQEs per Event Queue */ + u32 max_eq_depth; + + /* Supported events bitmask */ + u32 event_bitmask; }; struct efa_admin_feature_aenq_desc { @@ -692,6 +756,8 @@ struct efa_admin_get_feature_resp { struct efa_admin_feature_queue_attr_desc queue_attr; + struct efa_admin_event_queue_attr_desc event_queue_attr; + struct efa_admin_hw_hints hw_hints; } u; }; @@ -775,6 +841,60 @@ struct efa_admin_dealloc_uar_resp { struct efa_admin_acq_common_desc acq_common_desc; }; +struct efa_admin_create_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* Size of the EQ in entries, must be power of 2 */ + u16 depth; + + /* MSI-X table entry index */ + u8 msix_vec; + + /* + * 4:0 : entry_size_words - size of EQ entry in + * 32-bit words + * 7:5 : reserved - MBZ + */ + u8 caps; + + /* EQ ring base address */ + struct efa_common_mem_addr ba; + + /* + * Enabled events on this EQ + * 0 : completion_events - Enable completion events + * 31:1 : reserved - MBZ + */ + u32 event_bitmask; + + /* MBZ */ + u32 reserved; +}; + +struct efa_admin_create_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + /* asynchronous event notification groups */ enum efa_admin_aenq_group { EFA_ADMIN_FATAL_ERROR = 1, @@ -784,12 +904,6 @@ enum efa_admin_aenq_group { EFA_ADMIN_AENQ_GROUPS_NUM = 5, }; -enum efa_admin_aenq_notification_syndrom { - EFA_ADMIN_SUSPEND = 0, - EFA_ADMIN_RESUME = 1, - EFA_ADMIN_UPDATE_HINTS = 2, -}; - struct efa_admin_mmio_req_read_less_resp { u16 req_id; @@ -799,30 +913,101 @@ struct efa_admin_mmio_req_read_less_resp { u32 reg_val; }; +enum efa_admin_os_type { + EFA_ADMIN_OS_LINUX = 0, +}; + +struct efa_admin_host_info { + /* OS distribution string format */ + u8 os_dist_str[128]; + + /* Defined in enum efa_admin_os_type */ + u32 os_type; + + /* Kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* + * 7:0 : driver_module_type + * 15:8 : driver_sub_minor + * 23:16 : driver_minor + * 31:24 : driver_major + */ + u32 driver_ver; + + /* + * Device's Bus, Device and Function + * 2:0 : function + * 7:3 : device + * 15:8 : bus + */ + u16 bdf; + + /* + * Spec version + * 7:0 : spec_minor + * 15:8 : spec_major + */ + u16 spec_ver; + + /* + * 0 : intree - Intree driver + * 1 : gdr - GPUDirect RDMA supported + * 31:2 : reserved2 + */ + u32 flags; +}; + /* create_qp_cmd */ #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) -#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1 #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) +/* modify_qp_cmd */ +#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0) +#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1) +#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4) +#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5) + /* reg_mr_cmd */ #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) -#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) -#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ -#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) -#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6 #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5) -/* get_set_feature_common_desc */ -#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) +/* create_cq_resp */ +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) + +/* create_eq_cmd */ +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) + +/* host_info */ +#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) +#define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_DRIVER_MINOR_MASK GENMASK(23, 16) +#define EFA_ADMIN_HOST_INFO_DRIVER_MAJOR_MASK GENMASK(31, 24) +#define EFA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0) +#define EFA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) +#define EFA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_SPEC_MINOR_MASK GENMASK(7, 0) +#define EFA_ADMIN_HOST_INFO_SPEC_MAJOR_MASK GENMASK(15, 8) +#define EFA_ADMIN_HOST_INFO_INTREE_MASK BIT(0) +#define EFA_ADMIN_HOST_INFO_GDR_MASK BIT(1) #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index c8e0c8b905be..83f20c38a840 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc { /* * indicates to the driver which AQ entry has been consumed by the - * device and could be reused + * device and could be reused */ u16 sq_head_indx; }; @@ -118,12 +118,47 @@ struct efa_admin_aenq_entry { u32 inline_data_w4[12]; }; +enum efa_admin_eqe_event_type { + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, +}; + +/* Completion event */ +struct efa_admin_comp_event { + /* CQ number */ + u16 cqn; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +/* Event Queue Element */ +struct efa_admin_eqe { + /* + * 0 : phase + * 8:1 : event_type - Event type + * 31:9 : reserved - MBZ + */ + u32 common; + + /* MBZ */ + u32 reserved; + + union { + /* Event data */ + u32 event_data[2]; + + /* Completion Event */ + struct efa_admin_comp_event comp_event; + } u; +}; + /* aq_common_desc */ #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) -#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) -#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 #define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) /* acq_common_desc */ @@ -133,4 +168,8 @@ struct efa_admin_aenq_entry { /* aenq_common_desc */ #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +/* eqe */ +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) + #endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 0778f4f7dccd..16a24a05fc2a 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -16,26 +16,10 @@ #define EFA_ASYNC_QUEUE_DEPTH 16 #define EFA_ADMIN_QUEUE_DEPTH 32 -#define MIN_EFA_VER\ - ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \ - (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK)) - #define EFA_CTRL_MAJOR 0 #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define MIN_EFA_CTRL_VER \ - (((EFA_CTRL_MAJOR) << \ - (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ - ((EFA_CTRL_MINOR) << \ - (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ - (EFA_CTRL_SUB_MINOR)) - -#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) -#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) - -#define EFA_REGS_ADMIN_INTR_MASK 1 - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -46,8 +30,6 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; - /* status from the device */ - u8 comp_status; u8 cmd_opcode; u8 occupied; }; @@ -74,17 +56,25 @@ static const char *efa_com_cmd_str(u8 cmd) EFA_CMD_STR_CASE(DEALLOC_PD); EFA_CMD_STR_CASE(ALLOC_UAR); EFA_CMD_STR_CASE(DEALLOC_UAR); + EFA_CMD_STR_CASE(CREATE_EQ); + EFA_CMD_STR_CASE(DESTROY_EQ); default: return "unknown command opcode"; } #undef EFA_CMD_STR_CASE } +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) { struct efa_com_mmio_read *mmio_read = &edev->mmio_read; struct efa_admin_mmio_req_read_less_resp *read_resp; unsigned long exp_time; - u32 mmio_read_reg; + u32 mmio_read_reg = 0; u32 err; read_resp = mmio_read->read_resp; @@ -94,10 +84,9 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) /* trash DMA req_id to identify when hardware is done */ read_resp->req_id = mmio_read->seq_num + 0x9aL; - mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & - EFA_REGS_MMIO_REG_READ_REG_OFF_MASK; - mmio_read_reg |= mmio_read->seq_num & - EFA_REGS_MMIO_REG_READ_REQ_ID_MASK; + EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset); + EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID, + mmio_read->seq_num); writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF); @@ -137,9 +126,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) struct efa_com_admin_queue *aq = &edev->aq; struct efa_com_admin_sq *sq = &aq->sq; u16 size = aq->depth * sizeof(*sq->entries); + u32 aq_caps = 0; u32 addr_high; u32 addr_low; - u32 aq_caps; sq->entries = dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL); @@ -154,16 +143,15 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + addr_high = upper_32_bits(sq->dma_addr); + addr_low = lower_32_bits(sq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); - aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK; - aq_caps |= (sizeof(struct efa_admin_aq_entry) << - EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth); + EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE, + sizeof(struct efa_admin_aq_entry)); writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF); @@ -175,9 +163,9 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) struct efa_com_admin_queue *aq = &edev->aq; struct efa_com_admin_cq *cq = &aq->cq; u16 size = aq->depth * sizeof(*cq->entries); + u32 acq_caps = 0; u32 addr_high; u32 addr_low; - u32 acq_caps; cq->entries = dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL); @@ -189,19 +177,17 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) cq->cc = 0; cq->phase = 1; - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + addr_high = upper_32_bits(cq->dma_addr); + addr_low = lower_32_bits(cq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); - acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; - acq_caps |= (sizeof(struct efa_admin_acq_entry) << - EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; - acq_caps |= (aq->msix_vector_idx << - EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) & - EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK; + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth); + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE, + sizeof(struct efa_admin_acq_entry)); + EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR, + aq->msix_vector_idx); writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF); @@ -212,7 +198,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers) { struct efa_com_aenq *aenq = &edev->aenq; - u32 addr_low, addr_high, aenq_caps; + u32 addr_low, addr_high; + u32 aenq_caps = 0; u16 size; if (!aenq_handlers) { @@ -231,19 +218,17 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, aenq->cc = 0; aenq->phase = 1; - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + addr_low = lower_32_bits(aenq->dma_addr); + addr_high = upper_32_bits(aenq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); - aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; - aenq_caps |= (sizeof(struct efa_admin_aenq_entry) << - EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & - EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; - aenq_caps |= (aenq->msix_vector_idx - << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) & - EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK; + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth); + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE, + sizeof(struct efa_admin_aenq_entry)); + EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR, + aenq->msix_vector_idx); writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF); /* @@ -280,8 +265,8 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, struct efa_comp_ctx *comp_ctx) { - u16 cmd_id = comp_ctx->user_cqe->acq_common_descriptor.command & - EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command, + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID); u16 ctx_id = cmd_id & (aq->depth - 1); ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id); @@ -335,8 +320,8 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; cmd->aq_common_descriptor.command_id = cmd_id; - cmd->aq_common_descriptor.flags |= aq->sq.phase & - EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + EFA_SET(&cmd->aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase); comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true); if (!comp_ctx) { @@ -427,8 +412,8 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a struct efa_comp_ctx *comp_ctx; u16 cmd_id; - cmd_id = cqe->acq_common_descriptor.command & - EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + cmd_id = EFA_GET(&cqe->acq_common_descriptor.command, + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID); comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false); if (!comp_ctx) { @@ -439,9 +424,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a } comp_ctx->status = EFA_CMD_COMPLETED; - comp_ctx->comp_status = cqe->acq_common_descriptor.status; - if (comp_ctx->user_cqe) - memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) complete(&comp_ctx->wait_event); @@ -539,7 +522,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -587,7 +570,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com goto out; } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -649,17 +632,20 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); up(&aq->avail_cmds); + atomic64_inc(&aq->stats.cmd_err); return PTR_ERR(comp_ctx); } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); - if (err) + if (err) { ibdev_err_ratelimited( aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, - err); + cmd->aq_common_descriptor.opcode, + comp_ctx->user_cqe->acq_common_descriptor.status, err); + atomic64_inc(&aq->stats.cmd_err); + } up(&aq->avail_cmds); @@ -705,7 +691,7 @@ void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling) u32 mask_value = 0; if (polling) - mask_value = EFA_REGS_ADMIN_INTR_MASK; + EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1); writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF); if (polling) @@ -743,7 +729,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, int err; dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); - if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) { + if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) { ibdev_err(edev->efa_dev, "Device isn't ready, abort com init %#x\n", dev_sts); return -ENODEV; @@ -778,8 +764,7 @@ int efa_com_admin_init(struct efa_com_dev *edev, goto err_destroy_cq; cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); - timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> - EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO); if (timeout) /* the resolution of timeout reg is 100ms */ aq->completion_timeout = timeout * 100000; @@ -811,7 +796,7 @@ err_destroy_comp_ctxt: * This method goes over the admin completion queue and wakes up * all the pending threads that wait on the commands wait event. * - * @note: Should be called after MSI-X interrupt. + * Note: Should be called after MSI-X interrupt. */ void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) { @@ -940,7 +925,9 @@ void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev) int efa_com_validate_version(struct efa_com_dev *edev) { + u32 min_ctrl_ver = 0; u32 ctrl_ver_masked; + u32 min_ver = 0; u32 ctrl_ver; u32 ver; @@ -953,33 +940,42 @@ int efa_com_validate_version(struct efa_com_dev *edev) EFA_REGS_CONTROLLER_VERSION_OFF); ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n", - (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >> - EFA_REGS_VERSION_MAJOR_VERSION_SHIFT, - ver & EFA_REGS_VERSION_MINOR_VERSION_MASK); - - if (ver < MIN_EFA_VER) { + EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION), + EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION)); + + EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION, + EFA_ADMIN_API_VERSION_MAJOR); + EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION, + EFA_ADMIN_API_VERSION_MINOR); + if (ver < min_ver) { ibdev_err(edev->efa_dev, "EFA version is lower than the minimal version the driver supports\n"); return -EOPNOTSUPP; } - ibdev_dbg(edev->efa_dev, - "efa controller version: %d.%d.%d implementation version %d\n", - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> - EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> - EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> - EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + ibdev_dbg( + edev->efa_dev, + "efa controller version: %d.%d.%d implementation version %d\n", + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION), + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION), + EFA_GET(&ctrl_ver, + EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION), + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID)); ctrl_ver_masked = - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | - (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); - + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) | + EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) | + EFA_GET(&ctrl_ver, + EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION); + + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION, + EFA_CTRL_MAJOR); + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION, + EFA_CTRL_MINOR); + EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION, + EFA_CTRL_SUB_MINOR); /* Validate the ctrl version without the implementation ID */ - if (ctrl_ver_masked < MIN_EFA_CTRL_VER) { + if (ctrl_ver_masked < min_ctrl_ver) { ibdev_err(edev->efa_dev, "EFA ctrl version is lower than the minimal ctrl version the driver supports\n"); return -EOPNOTSUPP; @@ -1002,8 +998,7 @@ int efa_com_get_dma_width(struct efa_com_dev *edev) u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); int width; - width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> - EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH); ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width); @@ -1017,16 +1012,14 @@ int efa_com_get_dma_width(struct efa_com_dev *edev) return width; } -static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, - u16 exp_state) +static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on) { u32 val, i; for (i = 0; i < timeout; i++) { val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); - if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == - exp_state) + if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on) return 0; ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val); @@ -1046,36 +1039,34 @@ static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason) { - u32 stat, timeout, cap, reset_val; + u32 stat, timeout, cap; + u32 reset_val = 0; int err; stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); - if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) { + if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) { ibdev_err(edev->efa_dev, "Device isn't ready, can't reset device\n"); return -EINVAL; } - timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >> - EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT); if (!timeout) { ibdev_err(edev->efa_dev, "Invalid timeout value\n"); return -EINVAL; } /* start reset */ - reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK; - reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) & - EFA_REGS_DEV_CTL_RESET_REASON_MASK; + EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1); + EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason); writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); /* reset clears the mmio readless address, restore it */ efa_com_mmio_reg_read_resp_addr_init(edev); - err = wait_for_reset_state(edev, timeout, - EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + err = wait_for_reset_state(edev, timeout, 1); if (err) { ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n"); return err; @@ -1089,8 +1080,7 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return err; } - timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> - EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO); if (timeout) /* the resolution of timeout reg is 100ms */ edev->aq.completion_timeout = timeout * 100000; @@ -1099,3 +1089,159 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return 0; } + +static int efa_com_create_eq(struct efa_com_dev *edev, + struct efa_com_create_eq_params *params, + struct efa_com_create_eq_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_eq_resp resp = {}; + struct efa_admin_create_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); + cmd.depth = params->depth; + cmd.event_bitmask = params->event_bitmask; + cmd.msix_vec = params->msix_vec; + + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, + &cmd.ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create eq[%d]\n", err); + return err; + } + + result->eqn = resp.eqn; + + return 0; +} + +static void efa_com_destroy_eq(struct efa_com_dev *edev, + struct efa_com_destroy_eq_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_destroy_eq_resp resp = {}; + struct efa_admin_destroy_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; + cmd.eqn = params->eqn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, + err); +} + +static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + u32 val = 0; + + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); + + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); +} + +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq) +{ + struct efa_admin_eqe *eqe; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = eeq->cc & (eeq->depth - 1); + phase = eeq->phase; + eqe = &eeq->eqes[ci]; + + /* Go over all the events */ + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + eeq->cb(eeq, eqe); + + /* Get next event entry */ + ci++; + processed++; + + if (ci == eeq->depth) { + ci = 0; + phase = !phase; + } + + eqe = &eeq->eqes[ci]; + } + + eeq->cc += processed; + eeq->phase = phase; + efa_com_arm_eq(eeq->edev, eeq); +} + +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + struct efa_com_destroy_eq_params params = { + .eqn = eeq->eqn, + }; + + efa_com_destroy_eq(edev, ¶ms); + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), + eeq->eqes, eeq->dma_addr); +} + +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec) +{ + struct efa_com_create_eq_params params = {}; + struct efa_com_create_eq_result result = {}; + int err; + + params.depth = depth; + params.entry_size_in_bytes = sizeof(*eeq->eqes); + EFA_SET(¶ms.event_bitmask, + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); + params.msix_vec = msix_vec; + + eeq->eqes = dma_alloc_coherent(edev->dmadev, + params.depth * sizeof(*eeq->eqes), + ¶ms.dma_addr, GFP_KERNEL); + if (!eeq->eqes) + return -ENOMEM; + + err = efa_com_create_eq(edev, ¶ms, &result); + if (err) + goto err_free_coherent; + + eeq->eqn = result.eqn; + eeq->edev = edev; + eeq->dma_addr = params.dma_addr; + eeq->phase = 1; + eeq->depth = params.depth; + eeq->cb = cb; + efa_com_arm_eq(edev, eeq); + + return 0; + +err_free_coherent: + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), + eeq->eqes, params.dma_addr); + return err; +} diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index c67dd8109d1c..77282234ce68 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -47,6 +47,7 @@ struct efa_com_admin_sq { struct efa_com_stats_admin { atomic64_t submitted_cmd; atomic64_t completed_cmd; + atomic64_t cmd_err; atomic64_t no_completion; }; @@ -79,6 +80,9 @@ struct efa_com_admin_queue { }; struct efa_aenq_handlers; +struct efa_com_eq; +typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, + struct efa_admin_eqe *eqe); struct efa_com_aenq { struct efa_admin_aenq_entry *entries; @@ -111,6 +115,33 @@ struct efa_com_dev { struct efa_com_mmio_read mmio_read; }; +struct efa_com_eq { + struct efa_com_dev *edev; + struct efa_admin_eqe *eqes; + dma_addr_t dma_addr; + u32 cc; /* Consumer counter */ + u16 eqn; + u16 depth; + u8 phase; + efa_eqe_handler cb; +}; + +struct efa_com_create_eq_params { + dma_addr_t dma_addr; + u32 event_bitmask; + u16 depth; + u8 entry_size_in_bytes; + u8 msix_vec; +}; + +struct efa_com_create_eq_result { + u16 eqn; +}; + +struct efa_com_destroy_eq_params { + u16 eqn; +}; + typedef void (*efa_aenq_handler)(void *data, struct efa_admin_aenq_entry *aenq_e); @@ -120,9 +151,13 @@ struct efa_aenq_handlers { efa_aenq_handler unimplemented_handler; }; +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_admin_init(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers); void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec); +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason); void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); @@ -139,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, struct efa_admin_acq_entry *comp, size_t comp_size); void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq); #endif /* _EFA_COM_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index e20bd84a1014..8f8885e002ba 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,17 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" #include "efa_com_cmd.h" -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) -{ - *addr_low = lower_32_bits(addr); - *addr_high = upper_32_bits(addr); -} - int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res) @@ -76,6 +70,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev, cmd.qkey = params->qkey; cmd.sq_psn = params->sq_psn; cmd.sq_drained_async_notify = params->sq_drained_async_notify; + cmd.rnr_retry = params->rnr_retry; err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd, @@ -121,6 +116,7 @@ int efa_com_query_qp(struct efa_com_dev *edev, result->qkey = resp.qkey; result->sq_draining = resp.sq_draining; result->sq_psn = resp.sq_psn; + result->rnr_retry = resp.rnr_retry; return 0; } @@ -155,18 +151,27 @@ int efa_com_create_cq(struct efa_com_dev *edev, struct efa_com_create_cq_params *params, struct efa_com_create_cq_result *result) { - struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_resp cmd_completion = {}; struct efa_admin_create_cq_cmd create_cmd = {}; struct efa_com_admin_queue *aq = &edev->aq; int err; create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ; - create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) & - EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); create_cmd.cq_depth = params->cq_depth; create_cmd.num_sub_cqs = params->num_sub_cqs; create_cmd.uar = params->uarn; - + if (params->interrupt_mode_enabled) { + EFA_SET(&create_cmd.cq_caps_1, + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); + create_cmd.eqn = params->eqn; + } + if (params->set_src_addr) { + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1); + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, &create_cmd.cq_ba.mem_addr_low); @@ -184,6 +189,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, result->cq_idx = cmd_completion.cq_idx; result->actual_depth = params->cq_depth; + result->db_off = cmd_completion.db_offset; + result->db_valid = EFA_GET(&cmd_completion.flags, + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); return 0; } @@ -227,8 +235,8 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR; mr_cmd.pd = params->pd; mr_cmd.mr_length = params->mr_length_in_bytes; - mr_cmd.flags |= params->page_shift & - EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; + EFA_SET(&mr_cmd.flags, EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT, + params->page_shift); mr_cmd.iova = params->iova; mr_cmd.permissions = params->permissions; @@ -242,11 +250,11 @@ int efa_com_register_mr(struct efa_com_dev *edev, params->pbl.pbl.address.mem_addr_low; mr_cmd.pbl.pbl.address.mem_addr_high = params->pbl.pbl.address.mem_addr_high; - mr_cmd.aq_common_desc.flags |= - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK; + EFA_SET(&mr_cmd.aq_common_desc.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); if (params->indirect) - mr_cmd.aq_common_desc.flags |= - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + EFA_SET(&mr_cmd.aq_common_desc.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1); } err = efa_com_cmd_exec(aq, @@ -350,7 +358,7 @@ int efa_com_destroy_ah(struct efa_com_dev *edev, return 0; } -static bool +bool efa_com_check_supported_feature_id(struct efa_com_dev *edev, enum efa_admin_aq_feature_id feature_id) { @@ -386,9 +394,8 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev, get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE; if (control_buff_size) - get_cmd.aq_common_descriptor.flags = - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; - + EFA_SET(&get_cmd.aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); efa_com_set_dma_addr(control_buf_dma_addr, &get_cmd.control_buffer.address.mem_addr_high, @@ -480,6 +487,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + result->max_tx_batch = resp.u.queue_attr.max_tx_batch; + result->min_sq_depth = resp.u.queue_attr.min_sq_depth; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { @@ -493,6 +502,23 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, sizeof(resp.u.network_attr.addr)); result->mtu = resp.u.network_attr.mtu; + if (efa_com_check_supported_feature_id(edev, + EFA_ADMIN_EVENT_QUEUE_ATTR)) { + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_EVENT_QUEUE_ATTR); + if (err) { + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get event queue attributes %d\n", + err); + return err; + } + + result->max_eq = resp.u.event_queue_attr.max_eq; + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; + } + return 0; } @@ -517,12 +543,12 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev, return 0; } -static int efa_com_set_feature_ex(struct efa_com_dev *edev, - struct efa_admin_set_feature_resp *set_resp, - struct efa_admin_set_feature_cmd *set_cmd, - enum efa_admin_aq_feature_id feature_id, - dma_addr_t control_buf_dma_addr, - u32 control_buff_size) +int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) { struct efa_com_admin_queue *aq; int err; @@ -538,8 +564,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev, set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE; if (control_buff_size) { - set_cmd->aq_common_descriptor.flags = - EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + set_cmd->aq_common_descriptor.flags = 0; + EFA_SET(&set_cmd->aq_common_descriptor.flags, + EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1); efa_com_set_dma_addr(control_buf_dma_addr, &set_cmd->control_buffer.address.mem_addr_high, &set_cmd->control_buffer.address.mem_addr_low); @@ -747,11 +774,27 @@ int efa_com_get_stats(struct efa_com_dev *edev, return err; } - result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes; - result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts; - result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes; - result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts; - result->basic_stats.rx_drops = resp.basic_stats.rx_drops; + switch (cmd.type) { + case EFA_ADMIN_GET_STATS_TYPE_BASIC: + result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes; + result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts; + result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes; + result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts; + result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops; + break; + case EFA_ADMIN_GET_STATS_TYPE_MESSAGES: + result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes; + result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs; + result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes; + result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs; + break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ: + result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs; + result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes; + result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; + result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; + break; + } return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 31db5a0cbd5b..0898ad5bc340 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -47,6 +47,7 @@ struct efa_com_modify_qp_params { u32 qkey; u32 sq_psn; u8 sq_drained_async_notify; + u8 rnr_retry; }; struct efa_com_query_qp_params { @@ -58,6 +59,7 @@ struct efa_com_query_qp_result { u32 qkey; u32 sq_draining; u32 sq_psn; + u8 rnr_retry; }; struct efa_com_destroy_qp_params { @@ -71,7 +73,10 @@ struct efa_com_create_cq_params { u16 cq_depth; u16 num_sub_cqs; u16 uarn; + u16 eqn; u8 entry_size_in_bytes; + u8 interrupt_mode_enabled : 1; + u8 set_src_addr : 1; }; struct efa_com_create_cq_result { @@ -79,6 +84,8 @@ struct efa_com_create_cq_result { u16 cq_idx; /* actual cq depth in # of entries */ u16 actual_depth; + u32 db_off; + bool db_valid; }; struct efa_com_destroy_cq_params { @@ -123,10 +130,15 @@ struct efa_com_get_device_attr_result { u32 max_llq_size; u32 max_rdma_size; u32 device_caps; + u32 max_eq; + u32 max_eq_depth; + u32 event_bitmask; /* EQ events bitmask */ u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; u16 max_wr_rdma_sge; + u16 max_tx_batch; + u16 min_sq_depth; u8 db_bar; }; @@ -236,11 +248,26 @@ struct efa_com_basic_stats { u64 rx_drops; }; +struct efa_com_messages_stats { + u64 send_bytes; + u64 send_wrs; + u64 recv_bytes; + u64 recv_wrs; +}; + +struct efa_com_rdma_read_stats { + u64 read_wrs; + u64 read_bytes; + u64 read_wr_err; + u64 read_resp_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; + struct efa_com_messages_stats messages_stats; + struct efa_com_rdma_read_stats rdma_read_stats; }; -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res); @@ -270,6 +297,15 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, struct efa_com_get_hw_hints_result *result); +bool +efa_com_check_supported_feature_id(struct efa_com_dev *edev, + enum efa_admin_aq_feature_id feature_id); +int efa_com_set_feature_ex(struct efa_com_dev *edev, + struct efa_admin_set_feature_resp *set_resp, + struct efa_admin_set_feature_cmd *set_cmd, + enum efa_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size); int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups); int efa_com_alloc_pd(struct efa_com_dev *edev, struct efa_com_alloc_pd_result *result); diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h index c559ec08898e..90af1c82c9c6 100644 --- a/drivers/infiniband/hw/efa/efa_common_defs.h +++ b/drivers/infiniband/hw/efa/efa_common_defs.h @@ -1,14 +1,25 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COMMON_H_ #define _EFA_COMMON_H_ +#include <linux/bitfield.h> + #define EFA_COMMON_SPEC_VERSION_MAJOR 2 #define EFA_COMMON_SPEC_VERSION_MINOR 0 +#define EFA_GET(ptr, mask) FIELD_GET(mask##_MASK, *(ptr)) + +#define EFA_SET(ptr, mask, value) \ + ({ \ + typeof(ptr) _ptr = ptr; \ + *_ptr = (*_ptr & ~(mask##_MASK)) | \ + FIELD_PREP(mask##_MASK, value); \ + }) + struct efa_common_mem_addr { u32 mem_addr_low; diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h new file mode 100644 index 000000000000..17ba8984b11e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_io_defs.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_IO_H_ +#define _EFA_IO_H_ + +#define EFA_IO_TX_DESC_NUM_BUFS 2 +#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 +#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 +#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 + +enum efa_io_queue_type { + /* send queue (of a QP) */ + EFA_IO_SEND_QUEUE = 1, + /* recv queue (of a QP) */ + EFA_IO_RECV_QUEUE = 2, +}; + +enum efa_io_send_op_type { + /* send message */ + EFA_IO_SEND = 0, + /* RDMA read */ + EFA_IO_RDMA_READ = 1, +}; + +enum efa_io_comp_status { + /* Successful completion */ + EFA_IO_COMP_STATUS_OK = 0, + /* Flushed during QP destroy */ + EFA_IO_COMP_STATUS_FLUSHED = 1, + /* Internal QP error */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, + /* Bad operation type */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, + /* Bad AH */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, + /* LKEY not registered or does not match IOVA */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, + /* Message too long */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, + /* Destination ENI is down or does not run EFA */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, + /* Connection was reset by remote side */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, + /* Bad dest QP number (QP does not exist or is in error state) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9, + /* Destination resource not ready (no WQEs posted on RQ) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10, + /* Receiver SGL too short */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, + /* Unexpected status returned by responder */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, + /* Unresponsive remote - detected locally */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, +}; + +struct efa_io_tx_meta_desc { + /* Verbs-generated Request ID */ + u16 req_id; + + /* + * control flags + * 3:0 : op_type - operation type: send/rdma/fast mem + * ops/etc + * 4 : has_imm - immediate_data field carries valid + * data. + * 5 : inline_msg - inline mode - inline message data + * follows this descriptor (no buffer descriptors). + * Note that it is different from immediate data + * 6 : meta_extension - Extended metadata. MBZ + * 7 : meta_desc - Indicates metadata descriptor. + * Must be set. + */ + u8 ctrl1; + + /* + * control flags + * 0 : phase + * 1 : reserved25 - MBZ + * 2 : first - Indicates first descriptor in + * transaction. Must be set. + * 3 : last - Indicates last descriptor in + * transaction. Must be set. + * 4 : comp_req - Indicates whether completion should + * be posted, after packet is transmitted. Valid only + * for the first descriptor + * 7:5 : reserved29 - MBZ + */ + u8 ctrl2; + + u16 dest_qp_num; + + /* + * If inline_msg bit is set, length of inline message in bytes, + * otherwise length of SGL (number of buffers). + */ + u16 length; + + /* + * immediate data: if has_imm is set, then this field is included + * within Tx message and reported in remote Rx completion. + */ + u32 immediate_data; + + u16 ah; + + u16 reserved; + + /* Queue key */ + u32 qkey; + + u8 reserved2[12]; +}; + +/* + * Tx queue buffer descriptor, for any transport type. Preceded by metadata + * descriptor. + */ +struct efa_io_tx_buf_desc { + /* length in bytes */ + u32 length; + + /* + * 23:0 : lkey - local memory translation key + * 31:24 : reserved - MBZ + */ + u32 lkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_remote_mem_addr { + /* length in bytes */ + u32 length; + + /* remote memory translation key */ + u32 rkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_rdma_req { + /* Remote memory address */ + struct efa_io_remote_mem_addr remote_mem; + + /* Local memory address */ + struct efa_io_tx_buf_desc local_mem[1]; +}; + +/* + * Tx WQE, composed of tx meta descriptors followed by either tx buffer + * descriptors or inline data + */ +struct efa_io_tx_wqe { + /* TX meta */ + struct efa_io_tx_meta_desc meta; + + union { + /* Send buffer descriptors */ + struct efa_io_tx_buf_desc sgl[2]; + + u8 inline_data[32]; + + /* RDMA local and remote memory addresses */ + struct efa_io_rdma_req rdma_req; + } data; +}; + +/* + * Rx buffer descriptor; RX WQE is composed of one or more RX buffer + * descriptors. + */ +struct efa_io_rx_desc { + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer Pointer[63:32] */ + u32 buf_addr_hi; + + /* Verbs-generated request id. */ + u16 req_id; + + /* Length in bytes. */ + u16 length; + + /* + * LKey and control flags + * 23:0 : lkey + * 29:24 : reserved - MBZ + * 30 : first - Indicates first descriptor in WQE + * 31 : last - Indicates last descriptor in WQE + */ + u32 lkey_ctrl; +}; + +/* Common IO completion descriptor */ +struct efa_io_cdesc_common { + /* + * verbs-generated request ID, as provided in the completed tx or rx + * descriptor. + */ + u16 req_id; + + u8 status; + + /* + * flags + * 0 : phase - Phase bit + * 2:1 : q_type - enum efa_io_queue_type: send/recv + * 3 : has_imm - indicates that immediate data is + * present - for RX completions only + * 7:4 : reserved28 - MBZ + */ + u8 flags; + + /* local QP number */ + u16 qp_num; + + /* Transferred length */ + u16 length; +}; + +/* Tx completion descriptor */ +struct efa_io_tx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; +}; + +/* Rx Completion Descriptor */ +struct efa_io_rx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; + + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ + u16 ah; + + u16 src_qp_num; + + /* Immediate data */ + u32 imm; +}; + +/* Extended Rx Completion Descriptor */ +struct efa_io_rx_cdesc_ex { + /* Base RX completion info */ + struct efa_io_rx_cdesc rx_cdesc_base; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is + * enabled. + */ + u8 src_addr[16]; +}; + +/* tx_meta_desc */ +#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0) +#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4) +#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5) +#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6) +#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7) +#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0) +#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2) +#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3) +#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4) + +/* tx_buf_desc */ +#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) + +/* rx_desc */ +#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) +#define EFA_IO_RX_DESC_FIRST_MASK BIT(30) +#define EFA_IO_RX_DESC_LAST_MASK BIT(31) + +/* cdesc_common */ +#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) +#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) +#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) + +#endif /* _EFA_IO_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index faf3ff1bca2a..15ee92081118 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,19 +1,25 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> #include <linux/pci.h> +#include <linux/utsname.h> +#include <linux/version.h> #include <rdma/ib_user_verbs.h> #include "efa.h" -#define PCI_DEV_ID_EFA_VF 0xefa0 +#define PCI_DEV_ID_EFA0_VF 0xefa0 +#define PCI_DEV_ID_EFA1_VF 0xefa1 +#define PCI_DEV_ID_EFA2_VF 0xefa2 static const struct pci_device_id efa_pci_tbl[] = { - { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) }, { } }; @@ -63,6 +69,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) pci_release_selected_regions(pdev, release_bars); } +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) +{ + u16 cqn = eqe->u.comp_event.cqn; + struct efa_cq *cq; + + /* Safe to load as we're in irq and removal calls synchronize_irq() */ + cq = xa_load(&dev->cqs_xa, cqn); + if (unlikely(!cq)) { + ibdev_err_ratelimited(&dev->ibdev, + "Completion event on non-existent CQ[%u]", + cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) +{ + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); + + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) + efa_process_comp_eqe(dev, eqe); + else + ibdev_err_ratelimited(&dev->ibdev, + "Unknown event type received %lu", + EFA_GET(&eqe->common, + EFA_ADMIN_EQE_EVENT_TYPE)); +} + +static irqreturn_t efa_intr_msix_comp(int irq, void *data) +{ + struct efa_eq *eq = data; + struct efa_com_dev *edev = eq->eeq.edev; + + efa_com_eq_comp_intr_handler(edev, &eq->eeq); + + return IRQ_HANDLED; +} + static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) { struct efa_dev *dev = data; @@ -73,27 +120,43 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) return IRQ_HANDLED; } -static int efa_request_mgmnt_irq(struct efa_dev *dev) +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) { - struct efa_irq *irq; int err; - irq = &dev->admin_irq; - err = request_irq(irq->vector, irq->handler, 0, irq->name, - irq->data); + err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); if (err) { - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", - err); + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", + irq->name, err); return err; } - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", - nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector); - irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); return 0; } +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, + int vector) +{ + u32 cpu; + + cpu = vector - EFA_COMP_EQS_VEC_BASE; + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, + pci_name(dev->pdev)); + eq->irq.handler = efa_intr_msix_comp; + eq->irq.data = eq; + eq->irq.vector = vector; + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); +} + +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) +{ + irq_set_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, irq->data); +} + static void efa_setup_mgmnt_irq(struct efa_dev *dev) { u32 cpu; @@ -102,32 +165,22 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) "efa-mgmnt@pci:%s", pci_name(dev->pdev)); dev->admin_irq.handler = efa_intr_msix_mgmnt; dev->admin_irq.data = dev; - dev->admin_irq.vector = - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + dev->admin_irq.vector = dev->admin_msix_vector_idx; + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, + dev->admin_msix_vector_idx); cpu = cpumask_first(cpu_online_mask); - dev->admin_irq.cpu = cpu; cpumask_set_cpu(cpu, &dev->admin_irq.affinity_hint_mask); - dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n", - &dev->admin_irq, - dev->admin_irq.vector, + dev_info(&dev->pdev->dev, "Setup irq:%d name:%s\n", + dev->admin_irq.irqn, dev->admin_irq.name); } -static void efa_free_mgmnt_irq(struct efa_dev *dev) -{ - struct efa_irq *irq; - - irq = &dev->admin_irq; - irq_set_affinity_hint(irq->vector, NULL); - free_irq(irq->vector, irq->data); -} - static int efa_set_mgmnt_irq(struct efa_dev *dev) { efa_setup_mgmnt_irq(dev); - return efa_request_mgmnt_irq(dev); + return efa_request_irq(dev, &dev->admin_irq); } static int efa_request_doorbell_bar(struct efa_dev *dev) @@ -187,17 +240,130 @@ static void efa_stats_init(struct efa_dev *dev) atomic64_set(s, 0); } +static void efa_set_host_info(struct efa_dev *dev) +{ + struct efa_admin_set_feature_resp resp = {}; + struct efa_admin_set_feature_cmd cmd = {}; + struct efa_admin_host_info *hinf; + u32 bufsz = sizeof(*hinf); + dma_addr_t hinf_dma; + + if (!efa_com_check_supported_feature_id(&dev->edev, + EFA_ADMIN_HOST_INFO)) + return; + + /* Failures in host info set shall not disturb probe */ + hinf = dma_alloc_coherent(&dev->pdev->dev, bufsz, &hinf_dma, + GFP_KERNEL); + if (!hinf) + return; + + strscpy(hinf->os_dist_str, utsname()->release, + sizeof(hinf->os_dist_str)); + hinf->os_type = EFA_ADMIN_OS_LINUX; + strscpy(hinf->kernel_ver_str, utsname()->version, + sizeof(hinf->kernel_ver_str)); + hinf->kernel_ver = LINUX_VERSION_CODE; + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MAJOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MINOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR, 0); + EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE, 0); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_BUS, dev->pdev->bus->number); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_DEVICE, + PCI_SLOT(dev->pdev->devfn)); + EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_FUNCTION, + PCI_FUNC(dev->pdev->devfn)); + EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MAJOR, + EFA_COMMON_SPEC_VERSION_MAJOR); + EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MINOR, + EFA_COMMON_SPEC_VERSION_MINOR); + EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_INTREE, 1); + EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_GDR, 0); + + efa_com_set_feature_ex(&dev->edev, &resp, &cmd, EFA_ADMIN_HOST_INFO, + hinf_dma, bufsz); + + dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); +} + +static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) +{ + efa_com_eq_destroy(&dev->edev, &eq->eeq); + efa_free_irq(dev, &eq->irq); +} + +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +{ + int err; + + efa_setup_comp_irq(dev, eq, msix_vec); + err = efa_request_irq(dev, &eq->irq); + if (err) + return err; + + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, + dev->dev_attr.max_eq_depth, msix_vec); + if (err) + goto err_free_comp_irq; + + return 0; + +err_free_comp_irq: + efa_free_irq(dev, &eq->irq); + return err; +} + +static int efa_create_eqs(struct efa_dev *dev) +{ + unsigned int neqs = dev->dev_attr.max_eq; + int err; + int i; + + neqs = min_t(unsigned int, neqs, num_online_cpus()); + dev->neqs = neqs; + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); + if (!dev->eqs) + return -ENOMEM; + + for (i = 0; i < neqs; i++) { + err = efa_create_eq(dev, &dev->eqs[i], + i + EFA_COMP_EQS_VEC_BASE); + if (err) + goto err_destroy_eqs; + } + + return 0; + +err_destroy_eqs: + for (i--; i >= 0; i--) + efa_destroy_eq(dev, &dev->eqs[i]); + kfree(dev->eqs); + + return err; +} + +static void efa_destroy_eqs(struct efa_dev *dev) +{ + int i; + + for (i = 0; i < dev->neqs; i++) + efa_destroy_eq(dev, &dev->eqs[i]); + + kfree(dev->eqs); +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION, - .alloc_hw_stats = efa_alloc_hw_stats, + .alloc_hw_port_stats = efa_alloc_hw_port_stats, + .alloc_hw_device_stats = efa_alloc_hw_device_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, - .create_ah = efa_create_ah, .create_cq = efa_create_cq, .create_qp = efa_create_qp, + .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, .dealloc_ucontext = efa_dealloc_ucontext, .dereg_mr = efa_dereg_mr, @@ -216,10 +382,12 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp), INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext), }; @@ -251,42 +419,29 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + err = efa_create_eqs(dev); + if (err) + goto err_release_doorbell_bar; + + efa_set_host_info(dev); + dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; dev->ibdev.dev.parent = &pdev->dev; - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_CREATE_AH) | - (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); - err = ib_register_device(&dev->ibdev, "efa_%d"); + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) - goto err_release_doorbell_bar; + goto err_destroy_eqs; ibdev_info(&dev->ibdev, "IB device registered\n"); return 0; +err_destroy_eqs: + efa_destroy_eqs(dev); err_release_doorbell_bar: efa_release_doorbell_bar(dev); return err; @@ -294,9 +449,10 @@ err_release_doorbell_bar: static void efa_ib_device_remove(struct efa_dev *dev) { - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -309,8 +465,12 @@ static int efa_enable_msix(struct efa_dev *dev) { int msix_vecs, irq_num; - /* Reserve the max msix vectors we might need */ - msix_vecs = EFA_NUM_MSIX_VEC; + /* + * Reserve the max msix vectors we might need, one vector is reserved + * for admin. + */ + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), + num_online_cpus() + 1); dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -325,6 +485,7 @@ static int efa_enable_msix(struct efa_dev *dev) } if (irq_num != msix_vecs) { + efa_disable_msix(dev); dev_err(&dev->pdev->dev, "Allocated %d MSI-X (out of %d requested)\n", irq_num, msix_vecs); @@ -353,20 +514,13 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) return err; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width)); if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err); - return err; - } - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (err) { - dev_err(&pdev->dev, - "err_pci_set_consistent_dma_mask failed %d\n", - err); + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } @@ -397,6 +551,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) edev->efa_dev = dev; edev->dmadev = &pdev->dev; dev->pdev = pdev; + xa_init(&dev->cqs_xa); bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); @@ -452,7 +607,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) return dev; err_free_mgmnt_irq: - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); err_disable_msix: efa_disable_msix(dev); err_reg_read_destroy: @@ -475,11 +630,12 @@ static void efa_remove_device(struct pci_dev *pdev) edev = &dev->edev; efa_com_admin_destroy(edev); - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); efa_com_mmio_reg_read_destroy(edev); devm_iounmap(&pdev->dev, edev->reg_bar); efa_release_bars(dev, EFA_BASE_BAR_MASK); + xa_destroy(&dev->cqs_xa); ib_dealloc_device(&dev->ibdev); pci_disable_device(pdev); } diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h index bb9cad3d6a15..714ae6258800 100644 --- a/drivers/infiniband/hw/efa/efa_regs_defs.h +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_REGS_H_ @@ -42,72 +42,60 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_OFF 0x5c #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 +#define EFA_REGS_EQ_DB_OFF 0x68 /* version register */ #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff -#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 #define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 /* controller_version register */ #define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff -#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 #define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 -#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 #define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 -#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 #define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 /* caps register */ #define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 -#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 #define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e -#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 #define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 -#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16 #define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000 /* aq_caps register */ #define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff -#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 /* acq_caps register */ #define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff -#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000 -#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24 #define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000 /* aenq_caps register */ #define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff -#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 #define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000 -#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24 #define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000 +/* intr_mask register */ +#define EFA_REGS_INTR_MASK_EN_MASK 0x1 + /* dev_ctl register */ #define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 -#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 #define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 -#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28 #define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000 /* dev_sts register */ #define EFA_REGS_DEV_STS_READY_MASK 0x1 -#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 #define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 -#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 #define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 -#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 #define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 -#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 #define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 -#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 #define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 /* mmio_reg_read register */ #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff -#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 +/* eq_db register */ +#define EFA_REGS_EQ_DB_EQN_MASK 0xffff +#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 + #endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index ec5545870554..31454643f8c5 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <linux/vmalloc.h> +#include <linux/log2.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> @@ -12,6 +15,7 @@ #include <rdma/uverbs_ioctl.h> #include "efa.h" +#include "efa_io_defs.h" enum { EFA_MMAP_DMA_PAGE = 0, @@ -29,31 +33,53 @@ struct efa_user_mmap_entry { u8 mmap_flag; }; -#define EFA_DEFINE_STATS(op) \ - op(EFA_TX_BYTES, "tx_bytes") \ - op(EFA_TX_PKTS, "tx_pkts") \ - op(EFA_RX_BYTES, "rx_bytes") \ - op(EFA_RX_PKTS, "rx_pkts") \ - op(EFA_RX_DROPS, "rx_drops") \ +#define EFA_DEFINE_DEVICE_STATS(op) \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ + op(EFA_CMDS_ERR, "cmds_err") \ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ op(EFA_CREATE_QP_ERR, "create_qp_err") \ + op(EFA_CREATE_CQ_ERR, "create_cq_err") \ op(EFA_REG_MR_ERR, "reg_mr_err") \ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ - op(EFA_CREATE_AH_ERR, "create_ah_err") + op(EFA_CREATE_AH_ERR, "create_ah_err") \ + op(EFA_MMAP_ERR, "mmap_err") + +#define EFA_DEFINE_PORT_STATS(op) \ + op(EFA_TX_BYTES, "tx_bytes") \ + op(EFA_TX_PKTS, "tx_pkts") \ + op(EFA_RX_BYTES, "rx_bytes") \ + op(EFA_RX_PKTS, "rx_pkts") \ + op(EFA_RX_DROPS, "rx_drops") \ + op(EFA_SEND_BYTES, "send_bytes") \ + op(EFA_SEND_WRS, "send_wrs") \ + op(EFA_RECV_BYTES, "recv_bytes") \ + op(EFA_RECV_WRS, "recv_wrs") \ + op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ + op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ + op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ + op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ #define EFA_STATS_ENUM(ename, name) ename, -#define EFA_STATS_STR(ename, name) [ename] = name, +#define EFA_STATS_STR(ename, nam) \ + [ename].name = nam, + +enum efa_hw_device_stats { + EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) +}; + +static const struct rdma_stat_desc efa_device_stats_descs[] = { + EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) +}; -enum efa_hw_stats { - EFA_DEFINE_STATS(EFA_STATS_ENUM) +enum efa_hw_port_stats { + EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) }; -static const char *const efa_stats_names[] = { - EFA_DEFINE_STATS(EFA_STATS_STR) +static const struct rdma_stat_desc efa_port_stats_descs[] = { + EFA_DEFINE_PORT_STATS(EFA_STATS_STR) }; #define EFA_CHUNK_PAYLOAD_SHIFT 12 @@ -139,13 +165,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } -static inline bool is_rdma_read_cap(struct efa_dev *dev) -{ - return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; -} - -#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ - sizeof_field(typeof(x), fld) <= (sz)) +#define EFA_DEV_CAP(dev, cap) \ + ((dev)->dev_attr.device_caps & \ + EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -169,6 +191,14 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } +static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr, + dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) +{ + dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir); + free_pages_exact(cpu_addr, size); +} + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -204,6 +234,7 @@ int efa_query_device(struct ib_device *ibdev, props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; props->max_sge_rd = dev_attr->max_wr_rdma_sge; + props->max_pkeys = 1; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; @@ -212,9 +243,16 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; - if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; + if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; + if (EFA_DEV_CAP(dev, RNR_RETRY)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + + if (dev->neqs) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -227,7 +265,7 @@ int efa_query_device(struct ib_device *ibdev, return 0; } -int efa_query_port(struct ib_device *ibdev, u8 port, +int efa_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { struct efa_dev *dev = to_edev(ibdev); @@ -260,7 +298,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, #define EFA_QUERY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ - IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -282,6 +320,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->sq_psn = result.sq_psn; qp_attr->sq_draining = result.sq_draining; qp_attr->port_num = 1; + qp_attr->rnr_retry = result.rnr_retry; qp_attr->cap.max_send_wr = qp->max_send_wr; qp_attr->cap.max_recv_wr = qp->max_recv_wr; @@ -298,7 +337,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return 0; } -int efa_query_gid(struct ib_device *ibdev, u8 port, int index, +int efa_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { struct efa_dev *dev = to_edev(ibdev); @@ -308,7 +347,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { if (index > 0) @@ -367,17 +406,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) err_dealloc_pd: efa_pd_dealloc(dev, result.pdn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + atomic64_inc(&dev->stats.alloc_pd_err); return err; } -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); struct efa_pd *pd = to_epd(ibpd); ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); efa_pd_dealloc(dev, pd->pdn); + return 0; } static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) @@ -402,6 +442,9 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int err; ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); + + efa_qp_user_mmap_entries_remove(qp); + err = efa_destroy_qp_handle(dev, qp->qp_handle); if (err) return err; @@ -411,12 +454,10 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr); - dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, - DMA_TO_DEVICE); + efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); } - efa_qp_user_mmap_entries_remove(qp); - kfree(qp); return 0; } @@ -575,17 +616,16 @@ static int efa_qp_validate_attr(struct efa_dev *dev, return 0; } -struct ib_qp *efa_create_qp(struct ib_pd *ibpd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct efa_com_create_qp_params create_qp_params = {}; struct efa_com_create_qp_result create_qp_resp; - struct efa_dev *dev = to_edev(ibpd->device); + struct efa_dev *dev = to_edev(ibqp->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; + struct efa_qp *qp = to_eqp(ibqp); struct efa_ucontext *ucontext; - struct efa_qp *qp; int err; ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext, @@ -599,7 +639,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_out; - if (!field_avail(cmd, driver_qp_type, udata->inlen)) { + if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, no input udata\n"); err = -EINVAL; @@ -630,14 +670,8 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, goto err_out; } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) { - err = -ENOMEM; - goto err_out; - } - create_qp_params.uarn = ucontext->uarn; - create_qp_params.pd = to_epd(ibpd)->pdn; + create_qp_params.pd = to_epd(ibqp->pd)->pdn; if (init_attr->qp_type == IB_QPT_UD) { create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD; @@ -648,7 +682,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, "Unsupported qp type %d driver qp type %d\n", init_attr->qp_type, cmd.driver_qp_type); err = -EOPNOTSUPP; - goto err_free_qp; + goto err_out; } ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n", @@ -666,7 +700,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, qp->rq_size, DMA_TO_DEVICE); if (!qp->rq_cpu_addr) { err = -ENOMEM; - goto err_free_qp; + goto err_out; } ibdev_dbg(&dev->ibdev, @@ -693,7 +727,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; - qp->ibqp.qp_type = init_attr->qp_type; qp->max_send_wr = init_attr->cap.max_send_wr; qp->max_recv_wr = init_attr->cap.max_recv_wr; qp->max_send_sge = init_attr->cap.max_send_sge; @@ -713,25 +746,128 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num); - return &qp->ibqp; + return 0; err_remove_mmap_entries: efa_qp_user_mmap_entries_remove(qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: - if (qp->rq_size) { - dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, - DMA_TO_DEVICE); + if (qp->rq_size) + efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr, + qp->rq_size, DMA_TO_DEVICE); +err_out: + atomic64_inc(&dev->stats.create_qp_err); + return err; +} - if (!qp->rq_mmap_entry) - free_pages_exact(qp->rq_cpu_addr, qp->rq_size); +static const struct { + int valid; + enum ib_qp_attr_mask req_param; + enum ib_qp_attr_mask opt_param; +} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + [IB_QPS_RTR] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + }, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = IB_QP_SQ_PSN, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY | + IB_QP_RNR_RETRY, + + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, } -err_free_qp: - kfree(qp); -err_out: - atomic64_inc(&dev->stats.sw_stats.create_qp_err); - return ERR_PTR(err); +}; + +static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, + enum ib_qp_state next_state, + enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return false; + + if (!srd_qp_state_table[cur_state][next_state].valid) + return false; + + req_param = srd_qp_state_table[cur_state][next_state].req_param; + opt_param = srd_qp_state_table[cur_state][next_state].opt_param; + + if ((mask & req_param) != req_param) + return false; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return false; + + return true; } static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, @@ -739,9 +875,12 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + int err; + #define EFA_MODIFY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ - IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ + IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -750,8 +889,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, return -EOPNOTSUPP; } - if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, - qp_attr_mask)) { + if (qp->ibqp.qp_type == IB_QPT_DRIVER) + err = !efa_modify_srd_qp_is_ok(cur_state, new_state, + qp_attr_mask); + else + err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask); + + if (err) { ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); return -EINVAL; } @@ -779,6 +924,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, enum ib_qp_state new_state; int err; + if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) { ibdev_dbg(&dev->ibdev, @@ -798,28 +946,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, params.qp_handle = qp->qp_handle; if (qp_attr_mask & IB_QP_STATE) { - params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | - BIT(EFA_ADMIN_CUR_QP_STATE_BIT); - params.cur_qp_state = qp_attr->cur_qp_state; - params.qp_state = qp_attr->qp_state; + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, + 1); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); + params.cur_qp_state = cur_state; + params.qp_state = new_state; } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { - params.modify_mask |= - BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; } if (qp_attr_mask & IB_QP_QKEY) { - params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); params.qkey = qp_attr->qkey; } if (qp_attr_mask & IB_QP_SQ_PSN) { - params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); params.sq_psn = qp_attr->sq_psn; } + if (qp_attr_mask & IB_QP_RNR_RETRY) { + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, + 1); + params.rnr_retry = qp_attr->rnr_retry; + } + err = efa_com_modify_qp(&dev->edev, ¶ms); if (err) return err; @@ -836,7 +992,13 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) +{ + rdma_user_mmap_entry_remove(cq->db_mmap_entry); + rdma_user_mmap_entry_remove(cq->mmap_entry); +} + +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); struct efa_cq *cq = to_ecq(ibcq); @@ -845,14 +1007,25 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); + efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); - dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, - DMA_FROM_DEVICE); - rdma_user_mmap_entry_remove(cq->mmap_entry); + if (cq->eq) { + xa_erase(&dev->cqs_xa, cq->cq_idx); + synchronize_irq(cq->eq->irq.irqn); + } + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); + return 0; +} + +static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) +{ + return &dev->eqs[vec]; } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, - struct efa_ibv_create_cq_resp *resp) + struct efa_ibv_create_cq_resp *resp, + bool db_valid) { resp->q_mmap_size = cq->size; cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, @@ -862,6 +1035,21 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, if (!cq->mmap_entry) return -ENOMEM; + if (db_valid) { + cq->db_mmap_entry = + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + dev->db_bar_addr + resp->db_off, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->db_mmap_key); + if (!cq->db_mmap_entry) { + rdma_user_mmap_entry_remove(cq->mmap_entry); + return -ENOMEM; + } + + resp->db_off &= ~PAGE_MASK; + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; + } + return 0; } @@ -870,18 +1058,22 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, { struct efa_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct efa_ucontext, ibucontext); + struct efa_com_create_cq_params params = {}; struct efa_ibv_create_cq_resp resp = {}; - struct efa_com_create_cq_params params; struct efa_com_create_cq_result result; struct ib_device *ibdev = ibcq->device; struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); int entries = attr->cqe; + bool set_src_addr; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); + if (attr->flags) + return -EOPNOTSUPP; + if (entries < 1 || entries > dev->dev_attr.max_cq_depth) { ibdev_dbg(ibdev, "cq: requested entries[%u] non-positive or greater than max[%u]\n", @@ -890,7 +1082,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!field_avail(cmd, num_sub_cqs, udata->inlen)) { + if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) { ibdev_dbg(ibdev, "Incompatible ABI params, no input udata\n"); err = -EINVAL; @@ -913,14 +1105,17 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { ibdev_dbg(ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; goto err_out; } - if (!cmd.cq_entry_size) { + set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); + if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && + (set_src_addr || + cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { ibdev_dbg(ibdev, "Invalid entry size [%u]\n", cmd.cq_entry_size); err = -EINVAL; @@ -949,29 +1144,46 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + params.set_src_addr = set_src_addr; + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { + cq->eq = efa_vec2eq(dev, attr->comp_vector); + params.eqn = cq->eq->eeq.eqn; + params.interrupt_mode_enabled = true; + } + err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) goto err_free_mapped; + resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; cq->cq_idx = result.cq_idx; cq->ibcq.cqe = result.actual_depth; WARN_ON_ONCE(entries != result.actual_depth); - err = cq_mmap_entries_setup(dev, cq, &resp); + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); if (err) { ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", cq->cq_idx); goto err_destroy_cq; } + if (cq->eq) { + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); + if (err) { + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", + cq->cq_idx); + goto err_remove_mmap; + } + } + if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_remove_mmap; + goto err_xa_erase; } } @@ -980,18 +1192,19 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_xa_erase: + if (cq->eq) + xa_erase(&dev->cqs_xa, cq->cq_idx); err_remove_mmap: - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: - dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, - DMA_FROM_DEVICE); - if (!cq->mmap_entry) - free_pages_exact(cq->cpu_addr, cq->size); + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); err_out: - atomic64_inc(&dev->stats.sw_stats.create_cq_err); + atomic64_inc(&dev->stats.create_cq_err); return err; } @@ -1008,8 +1221,7 @@ static int umem_to_page_list(struct efa_dev *dev, ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - BIT(hp_shift)) + rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; @@ -1021,7 +1233,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) struct page *pg; int i; - sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); if (!sglist) return NULL; sg_init_table(sglist, page_cnt); @@ -1341,54 +1553,49 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, + struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); - struct efa_com_reg_mr_params params = {}; - struct efa_com_reg_mr_result result = {}; - struct pbl_context pbl; int supp_access_flags; - unsigned int pg_sz; struct efa_mr *mr; - int inline_size; - int err; if (udata && udata->inlen && !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } supp_access_flags = IB_ACCESS_LOCAL_WRITE | - (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0); access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", access_flags, supp_access_flags); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EOPNOTSUPP); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); - goto err_free; - } + return mr; +} + +static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, + u64 length, u64 virt_addr, int access_flags) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + struct pbl_context pbl; + unsigned int pg_sz; + int inline_size; + int err; params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; @@ -1399,15 +1606,13 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, dev->dev_attr.page_size_cap, virt_addr); if (!pg_sz) { - err = -EOPNOTSUPP; ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", dev->dev_attr.page_size_cap); - goto err_unmap; + return -EOPNOTSUPP; } - params.page_shift = __ffs(pg_sz); - params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), - pg_sz); + params.page_shift = order_base_2(pg_sz); + params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); ibdev_dbg(&dev->ibdev, "start %#llx length %#llx params.page_shift %u params.page_num %u\n", @@ -1417,21 +1622,21 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (params.page_num <= inline_size) { err = efa_create_inline_pbl(dev, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); if (err) - goto err_unmap; + return err; } else { err = efa_create_pbl(dev, &pbl, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); pbl_destroy(dev, &pbl); if (err) - goto err_unmap; + return err; } mr->ibmr.lkey = result.l_key; @@ -1439,14 +1644,83 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, mr->ibmr.length = length; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + return 0; +} + +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, + access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + return &mr->ibmr; -err_unmap: +err_release: ib_umem_release(mr->umem); err_free: kfree(mr); err_out: - atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + atomic64_inc(&dev->stats.reg_mr_err); + return ERR_PTR(err); +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + + return &mr->ibmr; + +err_release: + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.reg_mr_err); return ERR_PTR(err); } @@ -1470,7 +1744,7 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } -int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num, +int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; @@ -1497,11 +1771,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn) return efa_com_dealloc_uar(&dev->edev, ¶ms); } +#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \ + (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \ + NULL : #_attr) + +static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext, + const struct efa_ibv_alloc_ucontext_cmd *cmd) +{ + struct efa_dev *dev = to_edev(ibucontext->device); + char *attr_str; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch, + EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str)) + goto err; + + if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth, + EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR, + attr_str)) + goto err; + + return 0; + +err: + ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n", + attr_str); + return -EOPNOTSUPP; +} + int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); struct efa_ibv_alloc_ucontext_resp resp = {}; + struct efa_ibv_alloc_ucontext_cmd cmd = {}; struct efa_com_alloc_uar_result result; int err; @@ -1510,6 +1812,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) * we will ack input fields in our response. */ + err = ib_copy_from_udata(&cmd, udata, + min(sizeof(cmd), udata->inlen)); + if (err) { + ibdev_dbg(&dev->ibdev, + "Cannot copy udata for alloc_ucontext\n"); + goto err_out; + } + + err = efa_user_comp_handshake(ibucontext, &cmd); + if (err) + goto err_out; + err = efa_com_alloc_uar(&dev->edev, &result); if (err) goto err_out; @@ -1521,20 +1835,20 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq; resp.inline_buf_size = dev->dev_attr.inline_buf_size; resp.max_llq_size = dev->dev_attr.max_llq_size; + resp.max_tx_batch = dev->dev_attr.max_tx_batch; + resp.min_sq_wr = dev->dev_attr.min_sq_depth; - if (udata && udata->outlen) { - err = ib_copy_to_udata(udata, &resp, - min(sizeof(resp), udata->outlen)); - if (err) - goto err_dealloc_uar; - } + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; return 0; err_dealloc_uar: efa_dealloc_uar(dev, result.uarn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + atomic64_inc(&dev->stats.alloc_ucontext_err); return err; } @@ -1550,10 +1864,6 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); - /* DMA mapping is already gone, now free the pages */ - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - free_pages_exact(phys_to_virt(entry->address), - entry->rdma_entry.npages * PAGE_SIZE); kfree(entry); } @@ -1571,6 +1881,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); + atomic64_inc(&dev->stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1606,12 +1917,14 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, err = -EINVAL; } - if (err) + if (err) { ibdev_dbg( &dev->ibdev, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); + atomic64_inc(&dev->stats.mmap_err); + } rdma_user_mmap_entry_put(rdma_entry); return err; @@ -1642,10 +1955,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) } int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct efa_dev *dev = to_edev(ibah->device); struct efa_com_create_ah_params params = {}; struct efa_ibv_create_ah_resp resp = {}; @@ -1653,7 +1966,7 @@ int efa_create_ah(struct ib_ah *ibah, struct efa_ah *ah = to_eah(ibah); int err; - if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Create address handle is not supported in atomic context\n"); err = -EOPNOTSUPP; @@ -1695,11 +2008,11 @@ int efa_create_ah(struct ib_ah *ibah, err_destroy_ah: efa_ah_destroy(dev, ah); err_out: - atomic64_inc(&dev->stats.sw_stats.create_ah_err); + atomic64_inc(&dev->stats.create_ah_err); return err; } -void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +int efa_destroy_ah(struct ib_ah *ibah, u32 flags) { struct efa_dev *dev = to_edev(ibah->pd->device); struct efa_ah *ah = to_eah(ibah); @@ -1709,32 +2022,64 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags) if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Destroy address handle is not supported in atomic context\n"); - return; + return -EOPNOTSUPP; } efa_ah_destroy(dev, ah); + return 0; } -struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) +struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) { - return rdma_alloc_hw_stats_struct(efa_stats_names, - ARRAY_SIZE(efa_stats_names), + return rdma_alloc_hw_stats_struct(efa_port_stats_descs, + ARRAY_SIZE(efa_port_stats_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } -int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port_num, int index) +struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) +{ + return rdma_alloc_hw_stats_struct(efa_device_stats_descs, + ARRAY_SIZE(efa_device_stats_descs), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int efa_fill_device_stats(struct efa_dev *dev, + struct rdma_hw_stats *stats) +{ + struct efa_com_stats_admin *as = &dev->edev.aq.stats; + struct efa_stats *s = &dev->stats; + + stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); + stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); + stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); + stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); + + stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); + stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); + stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); + stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); + stats->value[EFA_ALLOC_UCONTEXT_ERR] = + atomic64_read(&s->alloc_ucontext_err); + stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); + + return ARRAY_SIZE(efa_device_stats_descs); +} + +static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, + u32 port_num) { struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; - struct efa_dev *dev = to_edev(ibdev); + struct efa_com_rdma_read_stats *rrs; + struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; - struct efa_com_stats_admin *as; - struct efa_stats *s; int err; - params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; + params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; err = efa_com_get_stats(&dev->edev, ¶ms, &result); if (err) @@ -1747,24 +2092,42 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_RX_PKTS] = bs->rx_pkts; stats->value[EFA_RX_DROPS] = bs->rx_drops; - as = &dev->edev.aq.stats; - stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); - stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); - stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); + params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; - s = &dev->stats; - stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); - stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); - stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); - stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); - stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); - stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); + ms = &result.messages_stats; + stats->value[EFA_SEND_BYTES] = ms->send_bytes; + stats->value[EFA_SEND_WRS] = ms->send_wrs; + stats->value[EFA_RECV_BYTES] = ms->recv_bytes; + stats->value[EFA_RECV_WRS] = ms->recv_wrs; - return ARRAY_SIZE(efa_stats_names); + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rrs = &result.rdma_read_stats; + stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; + stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; + stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; + stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + + return ARRAY_SIZE(efa_port_stats_descs); +} + +int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + if (port_num) + return efa_fill_port_stats(to_edev(ibdev), stats, port_num); + else + return efa_fill_device_stats(to_edev(ibdev), stats); } enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, - u8 port_num) + u32 port_num) { return IB_LINK_LAYER_UNSPECIFIED; } |