From 7fc7a7cffab6b94cb5e47148e6852ba633078ea1 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 25 Jul 2018 21:22:13 +0530 Subject: rdma/cxgb4: Add support for srq functions & structs This patch adds kernel mode t4_srq structures and support functions, uapi structures and defines, as well as firmware work request structures. Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 38 ++++++++++ drivers/infiniband/hw/cxgb4/t4.h | 117 +++++++++++++++++++++++++++++- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 19 +++++ include/uapi/rdma/cxgb4-abi.h | 17 +++++ 4 files changed, 190 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 8866bf992316..1d567aaf88e3 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -97,6 +97,7 @@ struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; }; struct c4iw_qid_list { @@ -130,6 +131,8 @@ struct c4iw_stats { struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; struct c4iw_stat ocqp; u64 db_full; u64 db_empty; @@ -549,6 +552,7 @@ struct c4iw_qp { struct kref kref; wait_queue_head_t wait; int sq_sig_all; + struct c4iw_srq *srq; struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; @@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) return container_of(ibqp, struct c4iw_qp, ibqp); } +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; @@ -1040,6 +1064,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq); +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, @@ -1076,12 +1107,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, enum cxgb4_bar2_qtype qtype, unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 838a7dee48bd..29a4dd5053f2 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,12 +52,16 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) #define T4_EQ_ENTRY_SIZE 64 #define T4_SQ_NUM_SLOTS 5 @@ -248,6 +252,7 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) @@ -331,6 +336,7 @@ struct t4_swrqe { u64 wr_id; ktime_t host_time; u64 sge_ts; + int valid; }; struct t4_rq { @@ -360,8 +366,98 @@ struct t4_wq { void __iomem *db; struct c4iw_rdev *rdev; int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; }; +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_rqes_posted(struct t4_wq *wq) { return wq->rq.in_use; @@ -475,6 +571,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 58c531db4f4a..0f4f86b004d6 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -263,6 +263,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +297,20 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; } u; }; @@ -707,6 +722,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h index 65c9eacd3ffb..d0b2d829471a 100644 --- a/include/uapi/rdma/cxgb4-abi.h +++ b/include/uapi/rdma/cxgb4-abi.h @@ -84,6 +84,23 @@ struct c4iw_create_qp_resp { __u32 flags; }; +struct c4iw_create_srq_resp { + __aligned_u64 srq_key; + __aligned_u64 srq_db_gts_key; + __aligned_u64 srq_memsize; + __u32 srqid; + __u32 srq_size; + __u32 rqt_abs_idx; + __u32 qid_mask; + __u32 flags; + __u32 reserved; /* explicit padding */ +}; + +enum { + /* HW supports SRQ_LIMIT_REACHED event */ + T4_SRQ_LIMIT_SUPPORT = 1 << 0, +}; + struct c4iw_alloc_ucontext_resp { __aligned_u64 status_page_key; __u32 status_page_size; -- cgit v1.2.3-59-g8ed1b