From ceb26569af5bc33a8f47b755fd64f12c81801ce8 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 12 May 2017 09:19:36 -0700 Subject: IB/hfi1: Remove unnecessary initialization from tx request The tx request is unnecessarily initialized in the hot code path with memset(), however, there's no need to do this as most fields are initialized later on. this initialization shows to be costly in the profile. Remove unnecessary initialization from tx request and make sure all variables are initialized properly. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d55339f5d73b..16fd519216dc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -607,12 +607,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->flags = 0; + req->tids = NULL; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -701,12 +708,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { + req->data_iovs = i; req->status = ret; goto free_req; } @@ -749,6 +758,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->tids = tmp; req->n_tids = ntids; + req->tididx = 0; idx++; } -- cgit v1.2.3-59-g8ed1b From aa560df381f7199fafa4e7f71382de28f0400270 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 12 May 2017 09:19:47 -0700 Subject: IB/hfi1: Remove unused mk_qpn function Leftover function that is not used. Remove it. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..e91be05062e6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); -static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, unsigned off) -{ - return (map - qpt->map) * RVT_BITS_PER_PAGE + off; -} - const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { [IB_WR_RDMA_WRITE] = { .length = sizeof(struct ib_rdma_wr), -- cgit v1.2.3-59-g8ed1b From 7dafbab3753fcf59bc81748e5b2c5bf04e1c62c7 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:19:55 -0700 Subject: IB/hfi1: Add functions to parse BTH/IB headers Improve code readablity by adding inline functions to read specific BTH/IB fields without knowledge of byte offsets. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 6 +-- drivers/infiniband/hw/hfi1/rc.c | 8 ++-- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 6 +-- include/rdma/ib_hdrs.h | 84 +++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/rdmavt_qp.h | 2 +- 8 files changed, 101 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e455a3..0583479f2576 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -286,7 +286,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; } /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(ohdr); if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; @@ -438,7 +438,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_GSI: case IB_QPT_UD: rlid = ib_get_slid(hdr); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + rqpn = ib_get_sqpn(ohdr); svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -461,7 +461,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); + u16 pkey = ib_bth_get_pkey(ohdr); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1080778a1f7c..66e6843aab48 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, ohdr->u.aeth = rvt_compute_aeth(qp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); @@ -1009,7 +1009,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; } - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); reset_sending_psn(qp, psn); /* @@ -1943,7 +1943,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* @@ -2388,7 +2388,7 @@ void hfi1_rc_hdrerr( if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5da1e4546543..2a5650f8aee0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -319,7 +319,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6a4e95cefae5..49fe179ad3ae 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -549,7 +549,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, hdr.lrh[3] = cpu_to_be16(slid); plen = 2 /* PBC */ + hwords; - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { @@ -689,8 +689,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u16 slid; u8 extra_bytes; - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + qkey = ib_get_qkey(ohdr); + src_qp = ib_get_sqpn(ohdr); dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); slid = ib_get_slid(hdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 90e7b77d68e8..128d2917a2d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -595,7 +595,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(packet->ohdr); lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -863,7 +863,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); @@ -999,7 +999,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 opcode = get_opcode(&tx->phdr.hdr); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 5519f31f043a..c124d515f7d5 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) #define IB_LNH_MASK 3 #define IB_SC_MASK 0xf #define IB_SC_SHIFT 12 +#define IB_SC5_MASK 0x10 #define IB_SL_MASK 0xf #define IB_SL_SHIFT 4 +#define IB_SL_SHIFT 4 +#define IB_LVER_MASK 0xf +#define IB_LVER_SHIFT 8 static inline u8 ib_get_lnh(struct ib_header *hdr) { @@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr) return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); } +static inline bool ib_is_sc5(u16 sc5) +{ + return !!(sc5 & IB_SC5_MASK); +} + static inline u8 ib_get_sl(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); @@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr) return (be16_to_cpu(hdr->lrh[3])); } +static inline u8 ib_get_lver(struct ib_header *hdr) +{ + return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) & + IB_LVER_MASK); +} + +static inline u16 ib_get_len(struct ib_header *hdr) +{ + return (u16)(be16_to_cpu(hdr->lrh[2])); +} + +static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) +{ + return be32_to_cpu(ohdr->u.ud.deth[0]); +} + +static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK); +} + /* * BTH */ @@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr) #define IB_BTH_PAD_MASK 3 #define IB_BTH_PKEY_MASK 0xffff #define IB_BTH_PAD_SHIFT 20 +#define IB_BTH_A_MASK 1 +#define IB_BTH_A_SHIFT 31 +#define IB_BTH_M_MASK 1 +#define IB_BTH_M_SHIFT 22 +#define IB_BTH_SE_MASK 1 +#define IB_BTH_SE_SHIFT 23 +#define IB_BTH_TVER_MASK 0xf +#define IB_BTH_TVER_SHIFT 16 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { @@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) IB_BTH_OPCODE_MASK); } +static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) & + IB_BTH_A_MASK); +} + +static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) & + IB_BTH_M_MASK); +} + +static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) & + IB_BTH_SE_MASK); +} + +static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr) +{ + return (u32)(be32_to_cpu(ohdr->bth[2])); +} + +static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr) +{ + return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK); +} + +static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK); +} + +static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK); +} + +static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) & + IB_BTH_TVER_MASK); +} + #endif /* IB_HDRS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ba8314ec5768..8f1ce4e27bbd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -664,6 +664,8 @@ union rdma_network_hdr { }; }; +#define IB_QPN_MASK 0xFFFFFF + enum { IB_MULTICAST_QPN = 0xffffff }; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index be6472e5b06b..13f43b3527a8 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -396,7 +396,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) -#define RVT_QPN_MASK 0xFFFFFF +#define RVT_QPN_MASK IB_QPN_MASK /* * QPN-map pages start out as NULL, they get allocated upon -- cgit v1.2.3-59-g8ed1b From 228d2af1b723deedee38f03d144b7d25b39f6f86 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:08 -0700 Subject: IB/hfi1: Separate input/output header tracing Calls to trace incoming packets will now receive the packet context as parameter. This enables trace support for future packet types. Header trace output is in the format : which makes parsing easier. input_ibhdr trace before change: -0 [001] d.h. 5904.250925: input_ibhdr: [0000:05:00.0] vl 0 lver 0 sl 0 lnh 2,LRH_BTH dlid 0002 len 18 slid 0001 op 0x64,UD_SEND_ONLY se 0 m 0 pad 0 tver 0 pkey 0xffff f 0 b 0 qpn 0x000001 a 0 psn 0x000001b2 deth qkey 0x80010000 sqpn 0x000001 input_ibhdr trace after change: -0 [001] d.h. 6655.714488: input_ibhdr: [0000:05:00.0] (IB) len:124 sc:0 dlid:0x0001 slid:0x0002 lnh:2,LRH_BTH lver:0 sl:0 age:0 becn:0 fecn:0 l4:0 rc:0 entropy:0 op:0x64,UD_SEND_ONLY se:0 m:0 pad:0 tver:0 pkey:0x7fff f:0 b:0 qpn:0x000001 a:0 psn:0x00000036 hlen:8 deth qkey:0x80010000 sqpn:0x000001 Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 49 ----- drivers/infiniband/hw/hfi1/rc.c | 3 +- drivers/infiniband/hw/hfi1/trace.c | 58 +++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 322 ++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/trace_rx.h | 9 + drivers/infiniband/hw/hfi1/verbs.c | 7 +- 6 files changed, 279 insertions(+), 169 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 414a04a481c2..3b76631cbcbd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2086,53 +2086,4 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 66e6843aab48..b443c1e01543 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); + trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &hdr, ib_is_sc5(sc5)); /* write the pbc and data */ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index eafae487face..b80b74d0c252 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct ib_header *hdr) +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } -#define IMM_PRN "imm %d" -#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" -#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" -#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" -#define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %llx" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +{ + return "IB"; +} + +#define IMM_PRN "imm:%d" +#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" +#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" +#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define IETH_PRN "ieth rkey:0x%.8x" +#define ATOMICACKETH_PRN "origdata:%llx" +#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op @@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome) return ""; } +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *becn = ib_bth_get_becn(ohdr); + *fecn = ib_bth_get_fecn(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *se = ib_bth_get_se(ohdr); + *pad = ib_bth_get_pad(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *tver = ib_bth_get_tver(ohdr); + *pkey = ib_bth_get_pkey(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid) +{ + *lnh = ib_get_lnh(hdr); + *lver = ib_get_lver(hdr); + *sl = ib_get_sl(hdr); + *sc = ib_get_sc(hdr) | (sc5 << 4); + *len = ib_get_len(hdr); + *dlid = ib_get_dlid(hdr); + *slid = ib_get_slid(hdr); + + if (*lnh == HFI1_LRH_BTH) + *ohdr = &hdr->u.oth; + else + *ohdr = &hdr->u.l.oth; +} + const char *parse_everbs_hdrs( struct trace_seq *p, u8 opcode, diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 090f6b506953..0f2d2da057ec 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,8 +55,57 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct ib_header *hdr); +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) + const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -66,139 +115,198 @@ __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " #define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define EHDR_PRN "hlen:%d %s" -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, +DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr), - TP_ARGS(dd, hdr), + struct hfi1_packet *packet, + bool sc5), + TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) + __field(u8, lnh) __field(u8, lver) __field(u8, sl) + __field(u16, len) + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) + __field(u8, opcode) + __field(u8, se) + __field(u8, mig) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, fecn) + __field(u8, becn) + __field(u32, qpn) + __field(u8, ack) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(packet->hdr)) + ), + TP_fast_assign( + struct ib_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &ohdr, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_packet *packet, bool sc5), + TP_ARGS(dd, packet, sc5)); + +DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, + bool sc5), + TP_ARGS(dd, hdr, sc5), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) __field(u8, lnh) - __field(u16, dlid) + __field(u8, lver) + __field(u8, sl) __field(u16, len) - __field(u16, slid) - /* BTH */ + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) __field(u8, opcode) __field(u8, se) - __field(u8, m) + __field(u8, mig) __field(u8, pad) __field(u8, tver) __field(u16, pkey) - __field(u8, f) - __field(u8, b) + __field(u8, fecn) + __field(u8, becn) __field(u32, qpn) - __field(u8, a) + __field(u8, ack) __field(u32, psn) /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(hdr)) ), - TP_fast_assign( + TP_fast_assign( struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & - IB_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & - IB_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + + hfi1_trace_parse_9b_hdr(hdr, sc5, + &ohdr, &__entry->lnh, + &__entry->lver, &__entry->sl, + &__entry->sc, &__entry->len, + &__entry->dlid, &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) ); -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f77e59fb43fe..05fc6d68ffe8 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -55,6 +55,15 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 128d2917a2d9..5f4be35f31b6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -589,8 +589,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } - trace_input_ibhdr(rcd->dd, hdr); - + trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); @@ -885,7 +884,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, return ret; } trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; bail_ecomm: @@ -1058,7 +1057,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: if (qp->s_wqe) { -- cgit v1.2.3-59-g8ed1b From 9039746cdf39dcbf2ddfcc4a68f729cbbbc853df Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:20 -0700 Subject: IB/hfi1: Setup common IB fields in hfi1_packet struct We move many common IB fields into the hfi1_packet structure and set them up in a single function. This allows us to set the fields in a single place and not deal with them throughout the driver. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 -- drivers/infiniband/hw/hfi1/chip.h | 2 - drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 158 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 21 ++++- drivers/infiniband/hw/hfi1/rc.c | 33 +++----- drivers/infiniband/hw/hfi1/ruc.c | 89 ++++++++++---------- drivers/infiniband/hw/hfi1/uc.c | 16 +--- drivers/infiniband/hw/hfi1/ud.c | 23 ++---- drivers/infiniband/hw/hfi1/verbs.c | 89 +++++++++----------- drivers/infiniband/hw/hfi1/verbs.h | 6 +- 11 files changed, 236 insertions(+), 211 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b89df6a..5dbee3c1bd45 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9810,15 +9810,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) -{ - u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - - return (struct ib_header *) - (rhf_addr - dd->rhf_offset + offset); -} - static const char * const ib_cfg_name_strings[] = { "HFI1_IB_CFG_LIDLMC", "HFI1_IB_CFG_LWID_DG_ENB", diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index cbe455d9ab8b..0b4f418ba0ac 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1347,8 +1347,6 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr); void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995d62c7f9a7..ba9ab971ced9 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -325,6 +325,7 @@ struct diag_pkt { #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ /* misc. */ +#define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define LIM_MGMT_P_KEY 0x7FFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0583479f2576..2a1022e374a5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -224,6 +224,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } +static inline void *hfi1_get_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); + + return (void *)(rhf_addr - dd->rhf_offset + offset); +} + +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct ib_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -249,7 +263,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = ib_get_lnh(rhdr); + u8 lnh = ib_get_lnh(rhdr); + bool has_grh = false; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -257,37 +272,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + has_grh = true; + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } + if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = ib_get_dlid(rhdr); + u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 rcv_flags = 0; + u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (lnh == HFI1_LRH_BTH) { - ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + if (has_grh) { u32 vtf; + struct ib_grh *grh = packet->grh; - ohdr = &rhdr->u.l.oth; - if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + if (grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; - rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; } + /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(ohdr); - if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { + qp_num = ib_bth_get_qpn(packet->ohdr); + if (dlid < mlid_base) { struct rvt_qp *qp; unsigned long flags; @@ -312,11 +332,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - hfi1_rc_hdrerr( - rcd, - rhdr, - rcv_flags, - qp); + hfi1_rc_hdrerr(rcd, packet, qp); break; default: /* For now don't handle any other QP types */ @@ -332,9 +348,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (rte) { case RHF_RTE_ERROR_OP_CODE_ERR: { - u32 opcode; void *ebuf = NULL; - __be32 *bth = NULL; + u8 opcode; if (rhf_use_egr_bfr(packet->rhf)) ebuf = packet->ebuf; @@ -342,16 +357,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (!ebuf) goto drop; /* this should never happen */ - if (lnh == HFI1_LRH_BTH) - bth = (__be32 *)ebuf; - else if (lnh == HFI1_LRH_GRH) - bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh)); - else - goto drop; - - opcode = be32_to_cpu(bth[0]) >> 24; - opcode &= 0xff; - + opcode = ib_bth_get_opcode(packet->ohdr); if (opcode == IB_OPCODE_CNP) { /* * Only in pre-B0 h/w is the CNP_OPCODE handled @@ -365,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; + lqpn = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (!qp) { @@ -415,7 +421,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhqoff = rcd->head; packet->numpkt = 0; - packet->rcv_flags = 0; } void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, @@ -424,15 +429,12 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; - struct ib_grh *grh = NULL; + struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; - if (pkt->rcv_flags & HFI1_HAS_GRH) - grh = &hdr->u.l.grh; - switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -591,9 +593,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; + packet->grh = NULL; } else if (lnh == HFI1_LRH_GRH) { packet->ohdr = &hdr->u.l.oth; - packet->rcv_flags |= HFI1_HAS_GRH; + packet->grh = &hdr->u.l.grh; } else { goto next; /* just in case */ } @@ -698,10 +701,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; - packet->hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; packet->etype = rhf_rcv_type(packet->rhf); + + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ @@ -759,7 +761,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet) packet->etail, 0, 0); packet->updegr = 0; } - packet->rcv_flags = 0; + packet->grh = NULL; } static inline void finish_packet(struct hfi1_packet *packet) @@ -896,12 +898,15 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); + u8 sc = SC15_PACKET; - if (etype == RHF_RCV_TYPE_IB && - hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB) { + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } + if (sc != SC15_PACKET) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1321,6 +1326,58 @@ bail: return ret; } +static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) +{ + packet->hdr = (struct hfi1_ib_message_header *) + hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; +} + +static int hfi1_setup_9B_packet(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); + struct ib_header *hdr; + u8 lnh; + + hfi1_setup_ib_header(packet); + hdr = packet->hdr; + + lnh = ib_get_lnh(hdr); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + packet->grh = NULL; + } else if (lnh == HFI1_LRH_GRH) { + u32 vtf; + + packet->ohdr = &hdr->u.l.oth; + packet->grh = &hdr->u.l.grh; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->slid = ib_get_slid(hdr); + packet->dlid = ib_get_dlid(hdr); + packet->sl = ib_get_sl(hdr); + packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); + packet->pad = ib_bth_get_pad(packet->ohdr); + packet->extra_byte = 0; + packet->fecn = ib_bth_get_fecn(packet->ohdr); + packet->becn = ib_bth_get_becn(packet->ohdr); + + return 0; +drop: + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1351,6 +1408,9 @@ int process_receive_ib(struct hfi1_packet *packet) if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + if (hfi1_setup_9B_packet(packet)) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1422,6 +1482,7 @@ int process_receive_error(struct hfi1_packet *packet) rhf_rcv_type_err(packet->rhf) == 3)) return RHF_RCV_CONTINUE; + hfi1_setup_ib_header(packet); handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1435,6 +1496,8 @@ int kdeth_process_expected(struct hfi1_packet *packet) { if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1445,6 +1508,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); if (unlikely(hfi1_dbg_fault_packet(packet))) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3b76631cbcbd..9c6c73448461 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -356,17 +356,26 @@ struct hfi1_packet { __le32 *rhf_addr; struct rvt_qp *qp; struct ib_other_headers *ohdr; + struct ib_grh *grh; u64 rhf; u32 maxcnt; u32 rhqoff; + u32 dlid; + u32 slid; u16 tlen; s16 etail; u8 hlen; u8 numpkt; u8 rsize; u8 updegr; - u8 rcv_flags; u8 etype; + u8 extra_byte; + u8 pad; + u8 sc; + u8 sl; + u8 opcode; + bool becn; + bool fecn; }; struct rvt_sge_state; @@ -2086,4 +2095,14 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +/* + * hfi1_check_mcast- Check if the given lid is + * in the IB multicast range. + */ +static inline bool hfi1_check_mcast(u16 lid) +{ + return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (lid != be16_to_cpu(IB_LID_PERMISSIVE))); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index b443c1e01543..baa67bf0772b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 bth0; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; int diff; @@ -1938,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 rkey; lockdep_assert_held(&qp->r_lock); + bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2075,8 +2073,6 @@ no_immediate_data: wc.wc_flags = 0; wc.ex.imm_data = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2369,28 +2365,19 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp) { - int has_grh = rcv_flags & HFI1_HAS_GRH; - struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = rcd_to_iport(rcd); int diff; u32 opcode; - u32 psn, bth0; - - /* Check for GRH */ - ohdr = &hdr->u.oth; - if (has_grh) - ohdr = &hdr->u.l.oth; + u32 psn; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); + psn = ib_bth_get_psn(packet->ohdr); + opcode = ib_bth_get_opcode(packet->ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3a17daba28a9..9cc9c7be9dd4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0) +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) { __be64 guid; unsigned long flags; + struct rvt_qp *qp = packet->qp; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - - if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { - if (!has_grh) { + u32 dlid = packet->dlid; + u32 slid = packet->slid; + u32 sl = packet->sl; + int migrated; + u32 bth0, bth1; + + bth0 = be32_to_cpu(packet->ohdr->bth[0]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); + migrated = bth0 & IB_BTH_MIG_REQ; + + if (qp->s_mig_state == IB_MIG_ARMED && migrated) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->alt_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->alt_ah_attr) || + if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) || ppd_from_ibp(ibp)->port != rdma_ah_get_port_num(&qp->alt_ah_attr)) - goto err; + return 1; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { - if (!has_grh) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->remote_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->remote_ah_attr) || + if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) || ppd_from_ibp(ibp)->port != qp->port_num) - goto err; - if (qp->s_mig_state == IB_MIG_REARM && - !(bth0 & IB_BTH_MIG_REQ)) + return 1; + if (qp->s_mig_state == IB_MIG_REARM && !migrated) qp->s_mig_state = IB_MIG_ARMED; } return 0; - -err: - return 1; } /** diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 2a5650f8aee0..76c2451a53d7 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,31 +297,25 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; struct ib_reth *reth; - int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; process_ecn(qp, packet, true); psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); - /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { /* @@ -432,8 +426,6 @@ no_immediate_data: wc.ex.imm_data = 0; wc.wc_flags = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -527,8 +519,6 @@ rdma_first: rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 49fe179ad3ae..c995aa58c36a 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -668,36 +668,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, void hfi1_ud_rcv(struct hfi1_packet *packet) { struct ib_other_headers *ohdr = packet->ohdr; - int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; u32 qkey; u32 src_qp; - u16 dlid, pkey; + u16 pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; - u8 sl_from_sc, sl; - u16 slid; - u8 extra_bytes; + u8 sl_from_sc; + u8 extra_bytes = packet->pad; + u8 opcode = packet->opcode; + u8 sl = packet->sl; + u32 dlid = packet->dlid; + u32 slid = packet->slid; + bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - dlid = ib_get_dlid(hdr); - bth1 = be32_to_cpu(ohdr->bth[1]); - slid = ib_get_slid(hdr); pkey = ib_bth_get_pkey(ohdr); - opcode = ib_bth_get_opcode(ohdr); - sl = ib_get_sl(hdr); - extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; @@ -811,7 +806,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } - if (has_grh) { + if (packet->grh) { hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 5f4be35f31b6..af54d3f4696a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -508,13 +508,14 @@ again: /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(struct hfi1_packet *packet) { if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) return NULL; - if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || - (opcode == IB_OPCODE_CNP)) - return opcode_handler_tbl[opcode]; + if (((packet->opcode & RVT_OPCODE_QP_MASK) == + packet->qp->allowed_ops) || + (packet->opcode == IB_OPCODE_CNP)) + return opcode_handler_tbl[packet->opcode]; return NULL; } @@ -548,68 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } -/** - * hfi1_ib_rcv - process an incoming packet - * @packet: data packet information - * - * This is called to process an incoming packet at interrupt level. - * - * Tlen is the length of the header + data + CRC in bytes. - */ -void hfi1_ib_rcv(struct hfi1_packet *packet) +static inline void hfi1_handle_packet(struct hfi1_packet *packet, + bool is_mcast) { + u32 qp_num; struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; opcode_handler packet_handler; unsigned long flags; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - /* Check for GRH */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - u32 vtf; - - packet->ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) - goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); - if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) - goto drop; - packet->rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; - } - trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); - opcode = ib_bth_get_opcode(packet->ohdr); - inc_opstats(tlen, &rcd->opstats->stats[opcode]); + inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]); - /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); - lid = ib_get_dlid(hdr); - if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { + if (unlikely(is_mcast)) { struct rvt_mcast *mcast; struct rvt_mcast_qp *p; - if (lnh != HFI1_LRH_GRH) + if (!packet->grh) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); + mcast = rvt_mcast_find(&ibp->rvp, + &packet->grh->dgid, + packet->dlid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -623,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { + /* Get the destination QP number. */ + qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; } - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, true))) { rcu_read_unlock(); goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -644,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); } return; - drop: ibp->rvp.n_pkt_drops++; } +/** + * hfi1_ib_rcv - process an incoming packet + * @packet: data packet information + * + * This is called to process an incoming packet at interrupt level. + */ +void hfi1_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + bool is_mcast = false; + + if (unlikely(hfi1_check_mcast(packet->dlid))) + is_mcast = true; + + trace_input_ibhdr(rcd->dd, packet, + !!(packet->rhf & RHF_DC_INFO_SMASK)); + hfi1_handle_packet(packet, is_mcast); +} + /* * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index cd635d0c1d3b..17b38cd5f654 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp); u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); @@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0); +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, const struct ib_global_route *grh, u32 hwords, u32 nwords); -- cgit v1.2.3-59-g8ed1b From 14fe13fcd3afb96b06809f280b586be1c998332c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:20:31 -0700 Subject: IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok() SGEs that are contiguous needlessly consume driver dependent TX resources. The lkey validation logic is enhanced to compress the SGE that ends up in the send wqe when consecutive addresses are detected. The lkey validation API used to return 1 (success) or 0 (fail). The return value is now an -errno, 0 (compressed), or 1 (uncompressed). A additional argument is added to pass the last SQE for the compression. Loopback callers always pass a NULL to last_sge since the optimization is of little benefit in that situation. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 51 +++++++++++++++++++++++---- drivers/infiniband/sw/rdmavt/qp.c | 23 ++++++------ drivers/infiniband/sw/rdmavt/trace_mr.h | 62 +++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_tx.h | 11 +++--- include/rdma/rdma_vt.h | 3 +- 7 files changed, 130 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cc9c7be9dd4..476fe5da2992 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index bd09de7c6e56..88d84cbf7e5a 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index aa5f9ea318e4..ea95672d9675 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -777,24 +777,55 @@ out: return ret; } +/** + * rvt_sge_adjacent - is isge compressible + * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written + * @sge: SGE to check + * + * If adjacent will update last_sge to add length. + * + * Return: true if isge is adjacent to last sge + */ +static inline bool rvt_sge_adjacent(struct rvt_sge *isge, + struct rvt_sge *last_sge, + struct ib_sge *sge) +{ + if (last_sge && sge->lkey == last_sge->mr->lkey && + ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) { + if (sge->lkey) { + if (unlikely((sge->addr - last_sge->mr->user_base + + sge->length > last_sge->mr->length))) + return false; /* overrun, caller will catch */ + } else { + last_sge->length += sge->length; + } + last_sge->sge_length += sge->length; + trace_rvt_sge_adjacent(last_sge, sge); + return true; + } + return false; +} + /** * rvt_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against * @pd: protection domain * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written * @sge: SGE to check * @acc: access flags * * Check the IB SGE for validity and initialize our internal version * of it. * - * Return: 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success + * Increments the reference count when a new sge is stored. * + * Return: 0 if compressed, 1 if added , otherwise returns -errno. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc) + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc) { struct rvt_mregion *mr; unsigned n, m; @@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, * We use LKEY == zero for kernel virtual addresses * (see rvt_get_dma_mr() and dma_virt_ops). */ - rcu_read_lock(); if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); if (pd->user) - goto bail; + return -EINVAL; + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; @@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (!mr) goto bail; @@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->m = m; isge->n = n; ok: + trace_rvt_sge_new(isge, sge); return 1; bail_unref: rvt_put_mr(mr); bail: rcu_read_unlock(); - return 0; + return -EINVAL; } EXPORT_SYMBOL(rvt_lkey_ok); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..a3dd1e536860 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1646,7 +1646,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; - int ret; + int ret, incr; size_t cplen; bool reserved_op; int local_ops_delayed = 0; @@ -1719,22 +1719,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->length = 0; j = 0; if (wr->num_sge) { + struct rvt_sge *last_sge = NULL; + acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; for (i = 0; i < wr->num_sge; i++) { u32 length = wr->sg_list[i].length; - int ok; if (length == 0) continue; - ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) { - ret = -EINVAL; - goto bail_inval_free; - } + incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, + &wr->sg_list[i], acc); + if (unlikely(incr < 0)) + goto bail_lkey_error; wqe->length += length; - j++; + if (incr) + last_sge = &wqe->sg_list[j]; + j += incr; } wqe->wr.num_sge = j; } @@ -1781,12 +1782,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; qp->s_avail--; } - trace_rvt_post_one_wr(qp, wqe); + trace_rvt_post_one_wr(qp, wqe, wr->num_sge); smp_wmb(); /* see request builders */ qp->s_head = next; return 0; +bail_lkey_error: + ret = incr; bail_inval_free: /* release mr holds */ while (j) { diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 3318a6c36373..976e482930a3 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -103,6 +103,68 @@ DEFINE_EVENT( TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), TP_ARGS(mr, m, n, v, len)); +DECLARE_EVENT_CLASS( + rvt_sge_template, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device)) + __field(struct rvt_mregion *, mr) + __field(struct rvt_sge *, sge) + __field(struct ib_sge *, isge) + __field(void *, vaddr) + __field(u64, ivaddr) + __field(u32, lkey) + __field(u32, sge_length) + __field(u32, length) + __field(u32, ilength) + __field(int, user) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device)); + __entry->mr = sge->mr; + __entry->sge = sge; + __entry->isge = isge; + __entry->vaddr = sge->vaddr; + __entry->ivaddr = isge->addr; + __entry->lkey = sge->mr->lkey; + __entry->sge_length = sge->sge_length; + __entry->length = sge->length; + __entry->ilength = isge->length; + __entry->m = sge->m; + __entry->n = sge->m; + __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user; + ), + TP_printk( + "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u", + __get_str(dev), + __entry->mr, + __entry->sge, + __entry->isge, + __entry->vaddr, + __entry->ivaddr, + __entry->lkey, + __entry->sge_length, + __entry->length, + __entry->ilength, + __entry->m, + __entry->n, + __entry->user + ) +); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_adjacent, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_new, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + #endif /* __RVT_TRACE_MR_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index a613a2223751..0ef25fc49f25 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -84,12 +84,12 @@ __print_symbolic(opcode, \ wr_opcode_name(RESERVED10)) #define POS_PRN \ -"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u" +"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u" TRACE_EVENT( rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge), + TP_ARGS(qp, wqe, wr_num_sge), TP_STRUCT__entry( RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) __field(u64, wr_id) @@ -108,6 +108,7 @@ TRACE_EVENT( __field(int, send_flags) __field(pid_t, pid) __field(int, num_sge) + __field(int, wr_num_sge) ), TP_fast_assign( RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) @@ -127,6 +128,7 @@ TRACE_EVENT( __entry->ssn = wqe->ssn; __entry->send_flags = wqe->wr.send_flags; __entry->num_sge = wqe->wr.num_sge; + __entry->wr_num_sge = wr_num_sge; ), TP_printk( POS_PRN, @@ -146,7 +148,8 @@ TRACE_EVENT( __entry->head, __entry->last, __entry->pid, - __entry->num_sge + __entry->num_sge, + __entry->wr_num_sge ) ); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..d0b9f91e5f4d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc); + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, u16 lid); -- cgit v1.2.3-59-g8ed1b From 7be85676f1d13c77a7e0c72e04903bfd39580d4f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:12 -0700 Subject: IB/hfi1: Don't remove RB entry when not needed. An RB tree is used for the SDMA pinning cache. Cache entries are extracted and reinserted from the tree in case the address range for it changes. However, if the address range for the entry doesn't change, deleting the entry from the RB tree is not necessary. This affects performance since the tree needs to be rebalanced for each insertion, and this happens in the hot path. Optimize RB search by not removing entries when it's not needed. Reviewed-by: Mike Marciniszyn Reviewed-by: Mitko Haralanov Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 14 ++++++++++---- drivers/infiniband/hw/hfi1/mmu_rb.h | 5 +++-- drivers/infiniband/hw/hfi1/user_sdma.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index ccbf52c8ff6f..d41fd87a39f2 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len) +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node) { struct mmu_rb_node *node; unsigned long flags; + bool ret = false; spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); if (node) { + if (node->addr == addr && node->len == len) + goto unlock; __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ + ret = true; } +unlock: spin_unlock_irqrestore(&handler->lock, flags); - - return node; + *rb_node = node; + return ret; } void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 754f6ebf13fb..f04cec1e99d1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len); +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 16fd519216dc..79450cf2a3d5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1165,14 +1165,23 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - - rb_node = hfi1_mmu_rb_extract(pq->handler, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + bool extracted; + + extracted = + hfi1_mmu_rb_remove_unless_exact(pq->handler, + (unsigned long) + iovec->iov.iov_base, + iovec->iov.iov_len, &rb_node); + if (rb_node) { node = container_of(rb_node, struct sdma_mmu_node, rb); - else - rb_node = NULL; + if (!extracted) { + atomic_inc(&node->refcount); + iovec->pages = node->pages; + iovec->npages = node->npages; + iovec->node = node; + return 0; + } + } if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From e3304b7cc4f14d365f46d6847a35563ae8b017f7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:18 -0700 Subject: IB/hfi1: Optimize cachelines for user SDMA request structure The current user SDMA request structure layout has holes. The cachelines can be reduced to improve cacheline trading. Separate fields in the following categories: mostly read, writable and shared with interrupt. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 106 ++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 79450cf2a3d5..92517cebb4c7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -117,6 +117,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -204,25 +205,42 @@ struct evict_data { }; struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; /* This is the original header from user space */ struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; /* * Pointer to the SDMA engine for this request. * Since different request could be on different VLs, * each request will need it's own engine pointer. */ struct sdma_engine *sde; - s8 ahg_idx; - u32 ahg[9]; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors */ - u32 koffset; + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + unsigned long flags; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; /* * KDETH.OFFSET (TID) field * The offset can cover multiple packets, depending on the @@ -230,29 +248,19 @@ struct user_sdma_request { */ u32 tidoffset; /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; /* progress index moving along the iovs array */ - unsigned iov_idx; + u8 iov_idx; + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; +} ____cacheline_aligned_in_smp; /* * A single txreq could span up to 3 physical pages when the MTU @@ -1034,11 +1042,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -1442,21 +1445,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1468,13 +1472,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1492,9 +1495,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } omfactor = ((EXP_TID_GET(tidval, LEN) * @@ -1502,7 +1504,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, + AHG_HEADER_SET(ahg, diff, 7, 0, 16, ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) & 0x7fff))); @@ -1522,12 +1524,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, req->ahg, diff, tidval); + req->ahg_idx, ahg, diff, tidval); + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); + return diff; } -- cgit v1.2.3-59-g8ed1b From 721c462123b4b53745c1ccd99619b16e8f4e091b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:25 -0700 Subject: IB/hfi1: Name function prototype parameters for affinity module To improve the readability of function prototypes, give the parameters names in the affinity module. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index e78c7aa094e0..2a1e374169c0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -75,24 +75,26 @@ struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void); /* Initialize driver affinity data */ -int hfi1_dev_affinity_init(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *dd); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. */ -int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Remove the IRQ's CPU affinity. This function also updates * any internal CPU tracking data */ -void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +void hfi1_put_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(int); +int hfi1_get_proc_affinity(int node); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(int); +void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; -- cgit v1.2.3-59-g8ed1b From bb7dde8784913c06ccd1456bed6dcc5ebd0b3c24 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:31 -0700 Subject: IB/hfi1: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 18 +++--- drivers/infiniband/hw/hfi1/chip.c | 105 +++++++++++++++++----------------- drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/pcie.c | 80 ++++++-------------------- 4 files changed, 78 insertions(+), 131 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index e2cd2cd3b28a..a97055dd4fbd 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) sde->cpu = cpu; cpumask_clear(&msix->mask); cpumask_set_cpu(cpu, &msix->mask); - dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], sde->this_idx, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); /* * Set the new cpu in the hfi1_affinity_node and clean @@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) { struct irq_affinity_notify *notify = &msix->notify; - notify->irq = msix->msix.vector; + notify->irq = msix->irq; notify->notify = hfi1_irq_notifier_notify; notify->release = hfi1_irq_notifier_release; @@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd, } cpumask_set_cpu(cpu, &msix->mask); - dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], extra, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); if (msix->type == IRQ_SDMA) { sde->cpu = cpu; @@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, } } - irq_set_affinity_hint(msix->msix.vector, NULL); + irq_set_affinity_hint(msix->irq, NULL); cpumask_clear(&msix->mask); mutex_unlock(&node_affinity.lock); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5dbee3c1bd45..9118618d28e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12800,30 +12800,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) for (i = 0; i < dd->num_msix_entries; i++, me++) { if (!me->arg) /* => no irq, no affinity */ continue; - hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); - free_irq(me->msix.vector, me->arg); + hfi1_put_irq_affinity(dd, me); + free_irq(me->irq, me->arg); } + + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; } else { /* INTx */ if (dd->requested_intx_irq) { free_irq(dd->pcidev->irq, dd); dd->requested_intx_irq = 0; } - } - - /* turn off interrupts */ - if (dd->num_msix_entries) { - /* MSI-X */ - pci_disable_msix(dd->pcidev); - } else { - /* INTx */ disable_intx(dd->pcidev); } - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } /* @@ -12972,13 +12966,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd) continue; /* make sure the name is terminated */ me->name[sizeof(me->name) - 1] = 0; + me->irq = pci_irq_vector(dd->pcidev, i); + /* + * On err return me->irq. Don't need to clear this + * because 'arg' has not been set, and cleanup will + * do the right thing. + */ + if (me->irq < 0) + return me->irq; - ret = request_threaded_irq(me->msix.vector, handler, thread, 0, + ret = request_threaded_irq(me->irq, handler, thread, 0, me->name, arg); if (ret) { dd_dev_err(dd, - "unable to allocate %s interrupt, vector %d, index %d, err %d\n", - err_info, me->msix.vector, idx, ret); + "unable to allocate %s interrupt, irq %d, index %d, err %d\n", + err_info, me->irq, idx, ret); return ret; } /* @@ -12989,8 +12991,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, - "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); } return ret; @@ -13009,7 +13010,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - synchronize_irq(me->msix.vector); + synchronize_irq(me->irq); } } @@ -13022,7 +13023,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); me->arg = NULL; } @@ -13050,14 +13051,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) DRIVER_NAME "_%d kctxt%d", dd->unit, idx); me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; - + me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); + if (me->irq < 0) { + dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", + idx, me->irq); + return; + } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + ret = request_threaded_irq(me->irq, receive_context_interrupt, receive_context_thread, 0, me->name, arg); if (ret) { - dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", - me->msix.vector, idx, ret); + dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", + me->irq, idx, ret); return; } /* @@ -13070,7 +13076,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); } } @@ -13093,9 +13099,8 @@ static void reset_interrupts(struct hfi1_devdata *dd) static int set_up_interrupts(struct hfi1_devdata *dd) { - struct hfi1_msix_entry *entries; - u32 total, request; - int i, ret; + u32 total; + int ret, request; int single_interrupt = 0; /* we expect to have all the interrupts */ /* @@ -13107,39 +13112,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd) */ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; - entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); - if (!entries) { - ret = -ENOMEM; - goto fail; - } - /* 1-1 MSI-X entry assignment */ - for (i = 0; i < total; i++) - entries[i].msix.entry = i; - /* ask for MSI-X interrupts */ - request = total; - request_msix(dd, &request, entries); - - if (request == 0) { + request = request_msix(dd, total); + if (request < 0) { + ret = request; + goto fail; + } else if (request == 0) { /* using INTx */ /* dd->num_msix_entries already zero */ - kfree(entries); single_interrupt = 1; dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); + } else if (request < total) { + /* using MSI-X, with reduced interrupts */ + dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", + total, request); + ret = -EINVAL; + goto fail; } else { - /* using MSI-X */ - dd->num_msix_entries = request; - dd->msix_entries = entries; - - if (request != total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err( - dd, - "cannot handle reduced interrupt case, want %u, got %u\n", - total, request); - ret = -EINVAL; + dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), + GFP_KERNEL); + if (!dd->msix_entries) { + ret = -ENOMEM; goto fail; } + /* using MSI-X */ + dd->num_msix_entries = total; dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9c6c73448461..8f74cf6d6c9a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -521,7 +521,7 @@ static inline void incr_cntr32(u32 *cntr) #define MAX_NAME_SIZE 64 struct hfi1_msix_entry { enum irq_type type; - struct msix_entry msix; + int irq; void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; @@ -1838,9 +1838,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry); -void hfi1_enable_intx(struct pci_dev *pdev); +int request_msix(struct hfi1_devdata *dd, u32 msireq); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 6a9f6f9819e1..f01841b51946 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, - struct hfi1_msix_entry *hfi1_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - /* - * We can't pass hfi1_msix_entry array to msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the hfi1_msix_entry array. - */ - msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) { - ret = -ENOMEM; - goto do_intx; - } - - for (i = 0; i < nvec; i++) - msix_entry[i] = hfi1_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - nvec = ret; - - for (i = 0; i < nvec; i++) - hfi1_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - hfi1_enable_intx(dd->pcidev); -} - /* return the PCIe link speed from the given link status */ static u32 extract_speed(u16 linkstat) { @@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd) } /* - * Returns in *nent: - * - actual number of interrupts allocated + * Returns: + * - actual number of interrupts allocated or * - 0 if fell back to INTx. + * - error */ -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry) +int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int pos; + int nvec; - pos = dd->pcidev->msix_cap; - if (*nent && pos) { - msix_setup(dd, pos, nent, entry); - /* did it, either MSI-X or INTx */ - } else { - *nent = 0; - hfi1_enable_intx(dd->pcidev); + nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, + PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + if (nvec < 0) { + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); + return nvec; } tune_pcie_caps(dd); -} -void hfi1_enable_intx(struct pci_dev *pdev) -{ - /* first, turn on INTx */ - pci_intx(pdev, 1); - /* then turn off MSI-X */ - pci_disable_msix(pdev); + /* check for legacy IRQ */ + if (nvec == 1 && !dd->pcidev->msix_enabled) + return 0; + + return nvec; } /* restore command and BARs after a reset has wiped them out */ -- cgit v1.2.3-59-g8ed1b From cb49366f3616fdf197893c24a5b2677b8c26ce29 Mon Sep 17 00:00:00 2001 From: "Vishwanathapura, Niranjana" Date: Thu, 1 Jun 2017 17:04:02 -0700 Subject: IB/core,rdmavt,hfi1,opa-vnic: Send OPA cap_mask3 in trap Provide the ability for IB clients to modify the OPA specific capability mask and include this mask in the subsequent trap data. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Michael N. Henry Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 7 ++----- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +++++- drivers/infiniband/sw/rdmavt/vt.c | 9 +++++++-- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 27 ++++++++++++++++++++++++- include/rdma/ib_verbs.h | 3 ++- include/rdma/rdma_vt.h | 1 + 7 files changed, 44 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673a52d4..70831ad621b0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -260,6 +260,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) data.issuer_lid = cpu_to_be32(lid); data.ntc_144.lid = data.issuer_lid; data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); send_trap(ibp, &data, sizeof(data)); } @@ -704,11 +705,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | - OPA_CAP_MASK3_IsEthOnFabricSupported); - /* Driver does not support mcast/collective configuration */ - pi->opa_cap_mask &= - cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) << 3 | (HFI1_MCAST_NR & 0x7)); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 5aa3fd1be653..a4e2506bd5ca 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -115,7 +115,7 @@ struct opa_mad_notice_attr { __be32 lid; /* LID where change occurred */ __be32 new_cap_mask; /* new capability mask */ __be16 reserved2; - __be16 cap_mask; + __be16 cap_mask3; __be16 change_flags; /* low 4 bits only */ } __packed ntc_144; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index af54d3f4696a..2d7759f0c6b4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1537,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; ibp->rvp.sm_lid = 0; - /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ + /* + * Below should only set bits defined in OPA PortInfo.CapabilityMask + * and PortInfo.CapabilityMask3 + */ ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | IB_PORT_CAP_MASK_NOTICE_SUP; + ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 0d7c6bb551d9..64bdd442078a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, return -EINVAL; rvp = rdi->ports[port_index]; - rvp->port_cap_flags |= props->set_port_cap_mask; - rvp->port_cap_flags &= ~props->clr_port_cap_mask; + if (port_modify_mask & IB_PORT_OPA_MASK_CHG) { + rvp->port_cap3_flags |= props->set_port_cap_mask; + rvp->port_cap3_flags &= ~props->clr_port_cap_mask; + } else { + rvp->port_cap_flags |= props->set_port_cap_mask; + rvp->port_cap_flags &= ~props->clr_port_cap_mask; + } if (props->set_port_cap_mask || props->clr_port_cap_mask) rdi->driver_f.cap_mask_chg(rdi, port_num); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 875694f9a7f9..32cdd7a35415 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -52,7 +52,9 @@ #include #include -#include +#include +#include +#include #include "opa_vnic_internal.h" @@ -979,6 +981,27 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) return 0; } +/** + * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM + * by way of ib_modify_port to indicate support for ethernet on the + * fabric. + * @cport: pointer to control port + * @en: enable or disable ethernet on fabric support + */ +static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) +{ + struct ib_port_modify pm = { 0 }; + int i; + + if (en) + pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + else + pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + + for (i = 1; i <= cport->num_ports; i++) + ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm); +} + /** * opa_vnic_vema_add_one -- Handle new ib device * @device: ib device pointer @@ -1007,6 +1030,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device) c_info("VNIC client initialized\n"); ib_set_client_data(device, &opa_vnic_client, cport); + opa_vnic_ctrl_config_dev(cport, true); } /** @@ -1025,6 +1049,7 @@ static void opa_vnic_vema_rem_one(struct ib_device *device, return; c_info("removing VNIC client\n"); + opa_vnic_ctrl_config_dev(cport, false); vema_unregister(cport); kfree(cport); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8f1ce4e27bbd..9d4d2a74c95e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -577,7 +577,8 @@ struct ib_device_modify { enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), - IB_PORT_RESET_QKEY_CNTR = (1<<3) + IB_PORT_RESET_QKEY_CNTR = (1<<3), + IB_PORT_OPA_MASK_CHG = (1<<4) }; struct ib_port_modify { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d0b9f91e5f4d..0f18ffd98dd7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -75,6 +75,7 @@ struct rvt_ibport { __be64 mkey; u64 tid; u32 port_cap_flags; + u16 port_cap3_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; -- cgit v1.2.3-59-g8ed1b From b888429c202197916c8c549811a2dd62f090280d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:44 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_SEND_DONE bit operation The atomic SDMA_REQ_SEND_DONE bit is set by the process-level code, and then the same process-level code uses the bit to test that all packets have been submitted incurring a costly atomic read. Use a bool type with a READ_ONCE/WRITE_ONCE pairing for this bit, and use the same condition that is used to set the bit to test that all packets have been submitted. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 92517cebb4c7..6fb70f0064a6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -154,10 +154,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ -#define SDMA_REQ_FOR_THREAD 1 -#define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAS_ERROR 3 -#define SDMA_REQ_DONE_ERROR 4 +#define SDMA_REQ_HAS_ERROR 1 +#define SDMA_REQ_DONE_ERROR 2 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -258,6 +256,7 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; + u8 done; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -628,6 +627,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqsubmitted = 0; req->flags = 0; req->tids = NULL; + req->done = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -809,7 +809,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * request have been submitted to the SDMA engine. However, it * will not wait for send completions. */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { if (ret != -EBUSY) { @@ -1118,7 +1118,7 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1585,7 +1585,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || + (READ_ONCE(req->done) || test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { user_sdma_free_request(req, false); pq_update(pq); -- cgit v1.2.3-59-g8ed1b From e9c48ebd0cb4d31bbaf60ddec2a2fa40227b8cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:50 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_HAS_ERROR bit operation Atomic bit tests are used to single errors and the completion of request submissions. These operations don't need to be atomic and show to be expensive on the profile. Replace each atomic bit operation with a bool type and a READ_ONCE/WRITE_ONCE pairing. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6fb70f0064a6..fcadbb9978ca 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -153,10 +153,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -/* SDMA request flag bits */ -#define SDMA_REQ_HAS_ERROR 1 -#define SDMA_REQ_DONE_ERROR 2 - #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) #define SDMA_PKT_Q_DEFERRED BIT(2) @@ -232,7 +228,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - unsigned long flags; /* status of the last txreq completed */ int status; @@ -257,6 +252,7 @@ struct user_sdma_request { /* progress index moving along the iovs array */ u8 iov_idx; u8 done; + u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -625,9 +621,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqnum = 0; req->seqcomp = 0; req->seqsubmitted = 0; - req->flags = 0; req->tids = NULL; req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -814,7 +810,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -916,10 +912,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } /* * Check if we might have sent the entire request already @@ -942,10 +936,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) @@ -1566,7 +1558,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1586,7 +1578,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); -- cgit v1.2.3-59-g8ed1b From b4e9e2f0fc87b876a4e2162733c1940e861785b1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:57 -0700 Subject: IB/hfi1: Reclassify type of messages printed for platform config logic Reclassify messages printed out to /var/log/messages into warnings and errors to facilitate debugging in the future for issues related to the platform config logic. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 838fe84e285a..cbda9b189216 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -242,7 +242,7 @@ static int qual_power(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Port disabled due to system power restrictions\n", __func__); @@ -268,7 +268,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Cable failed bitrate check, disabling port\n", __func__); @@ -709,15 +709,15 @@ static void apply_tunings( ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) - dd_dev_info(ppd->dd, - "%s: Failed set ext device config params\n", - __func__); + dd_dev_err(ppd->dd, + "%s: Failed set ext device config params\n", + __func__); } if (tx_preset_index == OPA_INVALID_INDEX) { if (ppd->port_type == PORT_TYPE_QSFP && limiting_active) - dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n", - __func__); + dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n", + __func__); return; } @@ -900,7 +900,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, case 0xD: /* fallthrough */ case 0xF: default: - dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n", + dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", __func__); break; } @@ -942,7 +942,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED); - dd_dev_info(dd, "%s: Port disconnected, disabling port\n", + dd_dev_warn(dd, "%s: Port disconnected, disabling port\n", __func__); goto bail; case PORT_TYPE_FIXED: @@ -1027,7 +1027,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } break; default: - dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); + dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__); ppd->port_type = PORT_TYPE_UNKNOWN; tuning_method = OPA_UNKNOWN_TUNING; total_atten = 0; -- cgit v1.2.3-59-g8ed1b From 90dba23e1e30af72dbf4379842a5161e811b26b8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 May 2017 05:36:04 -0700 Subject: IB/hfi1: Fix up sdma_init function comment sdma_init does not take a number of sdma engine parameters, rather it initializes all of the sdma engines. Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bfd0d5187e9b..d82ff57214c5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) * @dd: hfi1_devdata * @port: port number (currently only zero) * - * sdma_init initializes the specified number of engines. - * - * The code initializes each sde, its csrs. Interrupts - * are not required to be enabled. + * Initializes each sde and its csrs. + * Interrupts are not required to be enabled. * * Returns: * 0 - success, -errno on failure -- cgit v1.2.3-59-g8ed1b From bc54f6714c3a5d1f7ac6e7e5a5f7c390b1a01285 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9118618d28e5..d6af715c35bf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12832,7 +12832,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; -- cgit v1.2.3-59-g8ed1b From 67838e64fa63415fe4e0da7149bcb123ed9f5612 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:46 -0700 Subject: IB/hfi1: Fix spelling mistake in linkdown reason Spell receive correctly in OPA_LINKDOWN_REASON_RCV_ERROR Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d6af715c35bf..99c29ddcca35 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6906,7 +6906,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd) static const char * const link_down_reason_strs[] = { [OPA_LINKDOWN_REASON_NONE] = "None", - [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0", + [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0", [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length", [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long", [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short", -- cgit v1.2.3-59-g8ed1b From 52d86e72c515ebd4fb0d329470aa50698e28fb36 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:46 -0700 Subject: IB/hfi1: Remove subtraction of uninitialized value In process_receive_packet the packet header field is used to calculate the length of the packet. However this is not necessarily setup. In fact only if the ECN prescan is enabled will the packet header be valid at this point. The code works as is because we do not do anything with the packet length at this point in the packet processing. The length and header are setup correctly in hfi1_setup_ib_header which is called by the following sequence: process_receive_packet() -> rhf_receieve_function_map[]() --> process_receive_ib() ---> hfi1_setup_9B_packet() ----> hfi1_setup_ib_header() Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a1022e374a5..9e59430bc55c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -703,7 +703,6 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) packet->etype = rhf_rcv_type(packet->rhf); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ -- cgit v1.2.3-59-g8ed1b From f1685179820bb7f9d9c4f5d0ac9bfb269529a919 Mon Sep 17 00:00:00 2001 From: Neel Desai Date: Mon, 29 May 2017 17:20:27 -0700 Subject: IB/hfi1: Add error checking for buffer overrun in OPA aggregate Improve safety of code by checking the size of the data buffer and prevent buffer overrun Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Neel Desai Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 296 ++++++++++++++++++++++++--------------- 1 file changed, 185 insertions(+), 111 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 70831ad621b0..b180dff5f2e9 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,14 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +static int smp_length_check(u32 data_size, u32 request_len) +{ + if (unlikely(request_len < data_size)) + return -EINVAL; + + return 0; +} + static int reply(struct ib_mad_hdr *smp) { /* @@ -308,11 +316,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct opa_node_description *nd; - if (am) { + if (am || smp_length_check(sizeof(*nd), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -329,7 +337,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_node_info *ni; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -339,6 +347,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, /* GUID 0 is illegal */ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + smp_length_check(sizeof(*ni), max_len) || get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -520,7 +529,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd) static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int i; struct hfi1_devdata *dd; @@ -536,7 +545,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, u32 buffer_units; u64 tmp = 0; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -745,7 +754,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_req = OPA_AM_NBLK(am); @@ -768,6 +777,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_req > n_blocks_avail || n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; " @@ -1071,7 +1085,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1092,7 +1106,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, int ret, i, invalid = 0, call_set_mtu = 0; int call_link_downgrade_policy = 0; - if (num_ports != 1) { + if (num_ports != 1 || + smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1343,7 +1358,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ret) return ret; - ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); /* restore re-reg bit per o14-12.2.1 */ pi->clientrereg_subnettimeout |= clientrereg; @@ -1360,7 +1376,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return ret; get_only: - return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); } /** @@ -1421,7 +1438,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_sent = OPA_AM_NBLK(am); @@ -1431,6 +1448,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, int i; u16 n_blocks_avail; unsigned npkeys = hfi1_get_npkeys(dd); + u32 size = 0; if (n_blocks_sent == 0) { pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n", @@ -1441,6 +1459,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; + size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE); + + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_sent > n_blocks_avail || n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n", @@ -1458,7 +1483,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len, + max_len); } #define ILLEGAL_VL 12 @@ -1519,14 +1545,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1542,14 +1568,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; + size_t size = ARRAY_SIZE(ibp->sl_to_sc); int i; u8 sc; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1564,19 +1591,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, } } - return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1592,13 +1620,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); + size_t size = ARRAY_SIZE(ibp->sc_to_sl); u8 *p = data; int i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1606,19 +1635,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++) ibp->sc_to_sl[i] = *p++; - return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); void *vp = (void *)data; size_t size = 4 * sizeof(u64); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1633,7 +1663,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); int async_update = OPA_AM_ASYNC(am); @@ -1641,8 +1671,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, void *vp = (void *)data; struct hfi1_pportdata *ppd; int lstate; + /* + * set_sc2vlt_tables writes the information contained in *data + * to four 64-bit registers SendSC2VLt[0-3]. We need to make + * sure *max_len is not greater than the total size of the four + * SendSC2VLt[0-3] registers. + */ + size_t size = 4 * sizeof(u64); - if (n_blocks != 1 || async_update) { + if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1662,27 +1699,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, set_sc2vlt_tables(dd, vp); - return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; - int size; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp); + fm_get_table(ppd, FM_TBL_SC2VLNT, vp); if (resp_len) *resp_len += size; @@ -1692,15 +1730,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; int lstate; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1718,12 +1757,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, fm_set_table(ppd, FM_TBL_SC2VLNT, vp); return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1732,7 +1771,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_pportdata *ppd; struct opa_port_state_info *psi = (struct opa_port_state_info *)data; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1765,7 +1804,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1776,7 +1815,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct opa_port_state_info *psi = (struct opa_port_state_info *)data; int ret, invalid = 0; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1806,19 +1845,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (invalid) smp->status |= IB_SMP_INVALID_FIELD; - return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 addr = OPA_AM_CI_ADDR(am); u32 len = OPA_AM_CI_LEN(am) + 1; int ret; - if (dd->pport->port_type != PORT_TYPE_QSFP) { + if (dd->pport->port_type != PORT_TYPE_QSFP || + smp_length_check(len, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1861,21 +1902,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - int size; + int size = sizeof(struct buffer_control); - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); + fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); trace_bct_get(dd, p); if (resp_len) *resp_len += size; @@ -1884,14 +1926,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1902,41 +1945,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; - int size = 0; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } switch (section) { case OPA_VLARB_LOW_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); + fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); break; case OPA_VLARB_HIGH_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); + fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; case OPA_VLARB_PREEMPT_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); break; case OPA_VLARB_PREEMPT_MATRIX: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); break; default: pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n", be32_to_cpu(smp->attr_mod)); smp->status |= IB_SMP_INVALID_FIELD; + size = 0; break; } @@ -1948,14 +1993,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1983,7 +2029,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, break; } - return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_pma_mad { @@ -3279,13 +3326,18 @@ struct opa_congestion_info_attr { static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_info_attr *p = (struct opa_congestion_info_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; p->congestion_log_length = OPA_CONG_LOG_ELEMS; @@ -3298,7 +3350,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { int i; struct opa_congestion_setting_attr *p = @@ -3308,6 +3360,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, struct opa_congestion_setting_entry_shadow *entries; struct cc_state *cc_state; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + rcu_read_lock(); cc_state = get_cc_state(ppd); @@ -3382,7 +3439,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_setting_attr *p = (struct opa_congestion_setting_attr *)data; @@ -3391,6 +3448,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + /* * Save details from packet into the ppd. Hold the cc_state_lock so * our information is consistent with anyone trying to apply the state. @@ -3412,12 +3474,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -3425,7 +3487,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, s64 ts; int i; - if (am != 0) { + if (am || smp_length_check(sizeof(*cong_log), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3483,7 +3545,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *cc_table_attr = (struct ib_cc_table_attr *)data; @@ -3495,9 +3557,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; struct cc_state *cc_state; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3527,14 +3590,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, rcu_read_unlock(); if (resp_len) - *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); + *resp_len += size; return reply((struct ib_mad_hdr *)smp); } static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3545,9 +3608,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3578,7 +3642,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, /* now apply the information */ apply_cc_state(ppd); - return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_led_info { @@ -3591,7 +3656,7 @@ struct opa_led_info { static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd = dd->pport; @@ -3599,7 +3664,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 nport = OPA_AM_NPORT(am); u32 is_beaconing_active; - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3621,14 +3686,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK); - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3638,12 +3703,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, else shutdown_led_override(dd->pport); - return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len, + max_len); } static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3651,71 +3717,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_NODE_DESC: ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_NODE_INFO: ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PORT_INFO: ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_get_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_get_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CABLE_INFO: ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_INFO: ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_get_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_LOG: ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_get_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3733,7 +3799,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3741,51 +3807,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_set_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_set_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3841,7 +3907,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, memset(next_smp + sizeof(*agg), 0, agg_data_len); (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3884,7 +3953,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3994,12 +4065,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *data; - u32 am; + u32 am, data_size; __be16 attr_id; int ret; *out_mad = *in_mad; data = opa_get_smp_data(smp); + data_size = (u32)opa_get_smp_data_size(smp); am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; @@ -4043,7 +4115,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: clear_opa_smp_data(smp); ret = subn_get_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_get_opa_aggregate(smp, ibdev, port, @@ -4055,7 +4128,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, switch (attr_id) { default: ret = subn_set_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, -- cgit v1.2.3-59-g8ed1b From bec7c79cd8f764ba84c8ec6d8c402b8a7cd3a54f Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 29 May 2017 17:21:32 -0700 Subject: IB/hfi1: Modify handling of physical link state by Host Driver Ensure states returned to the Fabric Manager are consistent with the OPA specification by caching the physical state along with the logical state. Reviewed-by: Stuart Summers Reviewed-by: Ira Weiny Reviewed-by: Andrzej Kotlowski Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 102 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 8 +-- drivers/infiniband/hw/hfi1/hfi.h | 18 ++++++- drivers/infiniband/hw/hfi1/mad.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 6 files changed, 97 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 99c29ddcca35..3fae98d079e4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1066,6 +1066,8 @@ static int thermal_init(struct hfi1_devdata *dd); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10028,28 +10030,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) sdma_update_lmc(dd, mask, ppd->lid); } -static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) -{ - unsigned long timeout; - u32 curr_state; - - timeout = jiffies + msecs_to_jiffies(msecs); - while (1) { - curr_state = read_physical_state(dd); - if (curr_state == state) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, curr_state); - return -ETIMEDOUT; - } - usleep_range(1950, 2050); /* sleep 2ms-ish */ - } - - return 0; -} - static const char *state_completed_string(u32 completed) { static const char * const state_completed[] = { @@ -10283,7 +10263,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_wait) { /* it can take a while for the link to go down */ - ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } @@ -10536,6 +10516,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } + /* + * Wait for Link_Up physical state. + * Physical and Logical states should already be + * be transitioned to LinkUp and LinkInit respectively. + */ + ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to LINK-UP\n", + __func__); + break; + } + ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { dd_dev_err(dd, @@ -10649,6 +10642,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) */ if (ret) goto_offline(ppd, 0); + else + cache_physical_state(ppd); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10673,6 +10668,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; break; } + ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to DISABLED\n", + __func__); + break; + } dc_shutdown(dd); } ppd->host_link_state = HLS_DN_DISABLE; @@ -10690,6 +10692,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; + cache_physical_state(ppd); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12663,21 +12666,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return -ETIMEDOUT; } -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) +/* + * Read the physical hardware link state and set the driver's cached value + * of it. + */ +void cache_physical_state(struct hfi1_pportdata *ppd) { - u32 pstate; + u32 read_pstate; u32 ib_pstate; - pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); - if (ppd->last_pstate != ib_pstate) { + read_pstate = read_physical_state(ppd->dd); + ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); + /* check if OPA pstate changed */ + if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, - pstate); - ppd->last_pstate = ib_pstate; + read_pstate); + } + ppd->pstate = read_pstate; +} + +/* + * wait_physical_linkstate - wait for an physical link state change to occur + * @ppd: port device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for physical link state change to occur. + * Returns 0 if state reached, otherwise -ETIMEDOUT. + */ +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + cache_physical_state(ppd); + if (ppd->pstate == state) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link state 0x%x, current state is 0x%x\n", + state, ppd->pstate); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ } - return ib_pstate; + + return 0; } #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ @@ -14781,7 +14819,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->last_pstate = 0xff; /* invalid value */ + ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 0b4f418ba0ac..3dab3156ba4a 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); u32 get_logical_state(struct hfi1_pportdata *ppd); +void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); @@ -1354,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 9e59430bc55c..e64e9e28c936 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -906,10 +906,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_9B_get_sc5(hdr, packet->rhf); } if (sc != SC15_PACKET) { - int hwstate = read_logical_state(dd); + int hwstate = driver_lstate(rcd->ppd); - if (hwstate != LSTATE_ACTIVE) { - dd_dev_info(dd, "Unexpected link state %d\n", hwstate); + if (hwstate != IB_PORT_ACTIVE) { + dd_dev_info(dd, + "Unexpected link state %s\n", + opa_lstate_name(hwstate)); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 8f74cf6d6c9a..bca781c3b5ac 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -663,7 +663,7 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 last_pstate; /* info only */ + u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1330,6 +1330,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) return ppd->lstate; } +/* return the driver's idea of the physical OPA port state */ +static inline u32 driver_pstate(struct hfi1_pportdata *ppd) +{ + /* + * The driver does some processing from the time the physical + * link state is at LINKUP to the time the SM can be notified + * as such. Return IB_PORTPHYSSTATE_TRAINING until the software + * state is ready. + */ + if (ppd->pstate == PLS_LINKUP && + !(ppd->host_link_state & HLS_UP)) + return IB_PORTPHYSSTATE_TRAINING; + else + return chip_to_opa_pstate(ppd->dd, ppd->pstate); +} + void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index b180dff5f2e9..c8daf633212d 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -113,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) return; /* o14-3.2.1 */ - if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) return; /* o14-2 */ @@ -615,7 +615,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; pi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | state; + (driver_pstate(ppd) << 4) | state; pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; @@ -1791,7 +1791,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; psi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); + (driver_pstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2d7759f0c6b4..5b53faf47042 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1354,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->lmc = ppd->lmc; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); - props->phys_state = hfi1_ibphys_portstate(ppd); + props->phys_state = driver_pstate(ppd); props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ -- cgit v1.2.3-59-g8ed1b From 13d84914db56c1afd1c9bf4f41e9bf91f061a7dd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:22:01 -0700 Subject: IB/hfi1,qib: Do not send QKey trap for UD qps According to IBTA spec a QKey violation should not result in a bad qkey trap being triggered for UD queue pairs. Also since it is a silent error we do not increment the q_key violation or the dropped packet counters. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 14 ++++-------- drivers/infiniband/hw/hfi1/ruc.c | 8 +++---- drivers/infiniband/hw/hfi1/ud.c | 37 ++++++++++-------------------- drivers/infiniband/hw/hfi1/verbs.h | 4 ++-- drivers/infiniband/hw/qib/qib_mad.c | 13 ++++------- drivers/infiniband/hw/qib/qib_ruc.c | 20 ++++++++-------- drivers/infiniband/hw/qib/qib_ud.c | 43 ++++++++++++----------------------- drivers/infiniband/hw/qib/qib_verbs.h | 4 ++-- 8 files changed, 54 insertions(+), 89 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index c8daf633212d..a081a98d728a 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -180,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2) { struct opa_mad_notice_attr data; u32 lid = ppd_from_ibp(ibp)->lid; @@ -191,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 _lid2 = lid2; memset(&data, 0, sizeof(data)); - - if (trap_num == OPA_TRAP_BAD_P_KEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = OPA_TRAP_BAD_P_KEY; data.issuer_lid = cpu_to_be32(lid); data.ntc_257_258.lid1 = cpu_to_be32(_lid1); data.ntc_257_258.lid2 = cpu_to_be32(_lid2); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 476fe5da2992..9cf506a9a796 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -254,8 +254,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -290,8 +290,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index c995aa58c36a..6bf7a1b08491 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, rdma_ah_get_dlid(ah_attr)); + hfi1_bad_pkey(ibp, pkey, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - lid, - rdma_ah_get_dlid(ah_attr)); - goto drop; - } + if (unlikely(qkey != qp->qkey)) + goto drop; /* silently drop per IBTA spec */ } /* @@ -722,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * for invalid pkeys is optional according to * IB spec (release 1.3, section 10.9.4) */ - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + hfi1_bad_pkey(ibp, + pkey, sl, + src_qp, qp->ibqp.qp_num, + slid, dlid); return; } } else { @@ -734,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + if (unlikely(qkey != qp->qkey)) /* Silent drop */ return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen > 2048 || (sc5 == 0xF)))) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 17b38cd5f654..fdf1e1fb880c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) /* * This must be called with s_lock held. */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index da295e0392ed..a4a7f2a76f24 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -134,24 +134,21 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) { struct ib_mad_notice_attr data; - if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_msb = 0; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = IB_NOTICE_TRAP_BAD_PKEY; data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); data.toggle_count = 0; memset(&data.details, 0, sizeof(data.details)); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 88d84cbf7e5a..28528459a052 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -256,11 +256,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_alt_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -295,11 +295,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 341a123ee95c..be4907453ac4 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -66,8 +66,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; - rcu_read_unlock(); - return; + goto drop; } sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? @@ -94,11 +93,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + qib_bad_pkey(ibp, pkey1, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(rdma_ah_get_dlid(ah_attr))); goto drop; } } @@ -113,18 +112,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + if (unlikely(qkey != qp->qkey)) goto drop; - } } /* @@ -487,22 +476,18 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, pkey1 = be32_to_cpu(ohdr->bth[0]); pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - pkey1, - (be16_to_cpu(hdr->lrh[0]) >> 4) & + qib_bad_pkey(ibp, + pkey1, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); return; } } - if (unlikely(qkey != qp->qkey)) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + if (unlikely(qkey != qp->qkey)) return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen != 256 || diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..33d5691a9b2d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -241,8 +241,8 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0); } -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); -- cgit v1.2.3-59-g8ed1b From ddbf2efff4ad85135b9fd1cb53b10069a160bd58 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 9 Jun 2017 15:59:26 -0700 Subject: IB/hfi1: Fix DC 8051 host info flag array Fix info array of host message flags by adding entry for link width downgrade and reverse values for BC SMA and BC PWR_MSG messages Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3fae98d079e4..ebcb2c405c5f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = { */ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), - FLAG_ENTRY0("BC SMA message", 0x0002), - FLAG_ENTRY0("BC PWR_MGM message", 0x0004), + FLAG_ENTRY0("BC PWR_MGM message", 0x0002), + FLAG_ENTRY0("BC SMA message", 0x0004), FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008), FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010), FLAG_ENTRY0("External device config request", 0x0020), FLAG_ENTRY0("VerifyCap all frames received", 0x0040), FLAG_ENTRY0("LinkUp achieved", 0x0080), FLAG_ENTRY0("Link going down", 0x0100), + FLAG_ENTRY0("Link width downgraded", 0x0200), }; static u32 encoded_size(u32 size); -- cgit v1.2.3-59-g8ed1b From 702265fc0002497fd0ddaae4a5bec00a8a40da3e Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:33 -0700 Subject: IB/hfi1: Set proper logging levels on QSFP cable error events Change QSFP cable error events logging levels from info to error. Reviewed-by: Jakub Byczkowski Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 64 +++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ebcb2c405c5f..b8ee0e27dae6 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9376,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too high\n", + __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too low\n", + __func__); /* * The remaining alarms/warnings don't matter if the link is down. @@ -9392,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too high\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too low\n", + __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n", + __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ -- cgit v1.2.3-59-g8ed1b From 9c1a99c3882beb9e88ed41d914e75bab2d593926 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:40 -0700 Subject: IB/hfi1: Create common expected receive verbs/PSM code Declarations and code in common between verbs and PSM are now moved to exp_rcv.[ch]. Reviewed-by: Michael J. Ruhl Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/exp_rcv.c | 118 +++++++++++++++++++ drivers/infiniband/hw/hfi1/exp_rcv.h | 187 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/file_ops.c | 4 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 121 ------------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 23 +--- drivers/infiniband/hw/hfi1/user_sdma.c | 38 ------ 7 files changed, 309 insertions(+), 184 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.c create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.h (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 88085f65432e..66d538c033b0 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ - eprom.o file_ops.o firmware.o \ + eprom.o exp_rcv.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c new file mode 100644 index 000000000000..08d13ed1b574 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -0,0 +1,118 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "exp_rcv.h" +#include "trace.h" + +/** + * exp_tid_group_init - initialize exp_tid_set + * @set - the set + */ +void hfi1_exp_tid_group_init(struct exp_tid_set *set) +{ + INIT_LIST_HEAD(&set->list); + set->count = 0; +} + +/** + * alloc_ctxt_rcv_groups - initialize expected receive groups + * @rcd - the context to add the groupings to + */ +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 tidbase; + struct tid_group *grp; + int i; + + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); + + tidbase = rcd->expected_base; + for (i = 0; i < rcd->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto bail; + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &rcd->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; +bail: + hfi1_free_ctxt_rcv_groups(rcd); + return -ENOMEM; +} + +/** + * free_ctxt_rcv_groups - free expected receive groups + * @rcd - the context to free + * + * The routine dismantles the expect receive linked + * list and clears any tids associated with the receive + * context. + * + * This should only be called for kernel contexts and the + * a base user context. + */ +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct tid_group *grp, *gptr; + + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); + + list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { + tid_group_remove(grp, &rcd->tid_group_list); + kfree(grp); + } + + hfi1_clear_tids(rcd); +} diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h new file mode 100644 index 000000000000..c7d02bcddded --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -0,0 +1,187 @@ +#ifndef _HFI1_EXP_RCV_H +#define _HFI1_EXP_RCV_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" + +#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) + +#define EXP_TID_TIDLEN_MASK 0x7FFULL +#define EXP_TID_TIDLEN_SHIFT 0 +#define EXP_TID_TIDCTRL_MASK 0x3ULL +#define EXP_TID_TIDCTRL_SHIFT 20 +#define EXP_TID_TIDIDX_MASK 0x3FFULL +#define EXP_TID_TIDIDX_SHIFT 22 +#define EXP_TID_GET(tid, field) \ + (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) + +#define EXP_TID_SET(field, value) \ + (((value) & EXP_TID_TID##field##_MASK) << \ + EXP_TID_TID##field##_SHIFT) +#define EXP_TID_CLEAR(tid, field) ({ \ + (tid) &= ~(EXP_TID_TID##field##_MASK << \ + EXP_TID_TID##field##_SHIFT); \ + }) +#define EXP_TID_RESET(tid, field, value) do { \ + EXP_TID_CLEAR(tid, field); \ + (tid) |= EXP_TID_SET(field, (value)); \ + } while (0) + +/* + * Define fields in the KDETH header so we can update the header + * template. + */ +#define KDETH_OFFSET_SHIFT 0 +#define KDETH_OFFSET_MASK 0x7fff +#define KDETH_OM_SHIFT 15 +#define KDETH_OM_MASK 0x1 +#define KDETH_TID_SHIFT 16 +#define KDETH_TID_MASK 0x3ff +#define KDETH_TIDCTRL_SHIFT 26 +#define KDETH_TIDCTRL_MASK 0x3 +#define KDETH_INTR_SHIFT 28 +#define KDETH_INTR_MASK 0x1 +#define KDETH_SH_SHIFT 29 +#define KDETH_SH_MASK 0x1 +#define KDETH_KVER_SHIFT 30 +#define KDETH_KVER_MASK 0x3 +#define KDETH_JKEY_SHIFT 0x0 +#define KDETH_JKEY_MASK 0xff +#define KDETH_HCRC_UPPER_SHIFT 16 +#define KDETH_HCRC_UPPER_MASK 0xff +#define KDETH_HCRC_LOWER_SHIFT 24 +#define KDETH_HCRC_LOWER_MASK 0xff + +#define KDETH_GET(val, field) \ + (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) +#define KDETH_SET(dw, field, val) do { \ + u32 dwval = le32_to_cpu(dw); \ + dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ + dwval |= (((val) & KDETH_##field##_MASK) << \ + KDETH_##field##_SHIFT); \ + dw = cpu_to_le32(dwval); \ + } while (0) + +#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); }) + +/* KDETH OM multipliers and switch over point */ +#define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 +#define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 +#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) + +struct tid_group { + struct list_head list; + u32 base; + u8 size; + u8 used; + u8 map; +}; + +/* + * Write an "empty" RcvArray entry. + * This function exists so the TID registaration code can use it + * to write to unused/unneeded entries and still take advantage + * of the WC performance improvements. The HFI will ignore this + * write to the RcvArray entry. + */ +static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) +{ + /* + * Doing the WC fill writes only makes sense if the device is + * present and the RcvArray has been mapped as WC memory. + */ + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + writeq(0, dd->rcvarray_wc + (index * 8)); +} + +static inline void tid_group_add_tail(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_add_tail(&grp->list, &set->list); + set->count++; +} + +static inline void tid_group_remove(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_del_init(&grp->list); + set->count--; +} + +static inline void tid_group_move(struct tid_group *group, + struct exp_tid_set *s1, + struct exp_tid_set *s2) +{ + tid_group_remove(group, s1); + tid_group_add_tail(group, s2); +} + +static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) +{ + struct tid_group *grp = + list_first_entry(&set->list, struct tid_group, list); + list_del_init(&grp->list); + set->count--; + return grp; +} + +static inline u32 rcventry2tidinfo(u32 rcventry) +{ + u32 pair = rcventry & ~0x1; + + return EXP_TID_SET(IDX, pair >> 1) | + EXP_TID_SET(CTRL, 1 << (rcventry - pair)); +} + +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_exp_tid_group_init(struct exp_tid_set *set); + +#endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3158128d57e8..2dd8758f0644 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -804,7 +804,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; @@ -1260,7 +1260,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = hfi1_user_exp_rcv_grp_init(fd); + ret = hfi1_alloc_ctxt_rcv_groups(uctxt); if (ret) goto setup_failed; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index a8f0aa4722f6..6318e6ca1b18 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -51,14 +51,6 @@ #include "trace.h" #include "mmu_rb.h" -struct tid_group { - struct list_head list; - u32 base; - u8 size; - u8 used; - u8 map; -}; - struct tid_rb_node { struct mmu_rb_node mmu; unsigned long phys; @@ -75,8 +67,6 @@ struct tid_pageset { u16 count; }; -#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) - #define num_user_pages(vaddr, len) \ (1 + (((((unsigned long)(vaddr) + \ (unsigned long)(len) - 1) & PAGE_MASK) - \ @@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = { .invalidate = tid_rb_invalidate }; -static inline u32 rcventry2tidinfo(u32 rcventry) -{ - u32 pair = rcventry & ~0x1; - - return EXP_TID_SET(IDX, pair >> 1) | - EXP_TID_SET(CTRL, 1 << (rcventry - pair)); -} - -static inline void exp_tid_group_init(struct exp_tid_set *set) -{ - INIT_LIST_HEAD(&set->list); - set->count = 0; -} - -static inline void tid_group_remove(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_del_init(&grp->list); - set->count--; -} - -static inline void tid_group_add_tail(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_add_tail(&grp->list, &set->list); - set->count++; -} - -static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) -{ - struct tid_group *grp = - list_first_entry(&set->list, struct tid_group, list); - list_del_init(&grp->list); - set->count--; - return grp; -} - -static inline void tid_group_move(struct tid_group *group, - struct exp_tid_set *s1, - struct exp_tid_set *s2) -{ - tid_group_remove(group, s1); - tid_group_add_tail(group, s2); -} - -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) -{ - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = fd->dd; - u32 tidbase; - u32 i; - struct tid_group *grp, *gptr; - - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto grp_failed; - - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - - return 0; - -grp_failed: - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - - return -ENOMEM; -} - /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) return ret; } -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) -{ - struct tid_group *grp, *gptr; - - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); -} - void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; @@ -302,23 +198,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/* - * Write an "empty" RcvArray entry. - * This function exists so the TID registaration code can use it - * to write to unused/unneeded entries and still take advantage - * of the WC performance improvements. The HFI will ignore this - * write to the RcvArray entry. - */ -static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) -{ - /* - * Doing the WC fill writes only makes sense if the device is - * present and the RcvArray has been mapped as WC memory. - */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) - writeq(0, dd->rcvarray_wc + (index * 8)); -} - /* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 5250c897298d..1bdc61be53cb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -49,29 +49,8 @@ #include "hfi.h" -#define EXP_TID_TIDLEN_MASK 0x7FFULL -#define EXP_TID_TIDLEN_SHIFT 0 -#define EXP_TID_TIDCTRL_MASK 0x3ULL -#define EXP_TID_TIDCTRL_SHIFT 20 -#define EXP_TID_TIDIDX_MASK 0x3FFULL -#define EXP_TID_TIDIDX_SHIFT 22 -#define EXP_TID_GET(tid, field) \ - (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) +#include "exp_rcv.h" -#define EXP_TID_SET(field, value) \ - (((value) & EXP_TID_TID##field##_MASK) << \ - EXP_TID_TID##field##_SHIFT) -#define EXP_TID_CLEAR(tid, field) ({ \ - (tid) &= ~(EXP_TID_TID##field##_MASK << \ - EXP_TID_TID##field##_SHIFT); \ - }) -#define EXP_TID_RESET(tid, field, value) do { \ - EXP_TID_CLEAR(tid, field); \ - (tid) |= EXP_TID_SET(field, (value)); \ - } while (0) - -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fcadbb9978ca..8f7cfdd9e9ec 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -94,27 +94,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Number of BTH.PSN bits used for sequence number in expected rcvs */ #define BTH_SEQ_MASK 0x7ffull -/* - * Define fields in the KDETH header so we can update the header - * template. - */ -#define KDETH_OFFSET_SHIFT 0 -#define KDETH_OFFSET_MASK 0x7fff -#define KDETH_OM_SHIFT 15 -#define KDETH_OM_MASK 0x1 -#define KDETH_TID_SHIFT 16 -#define KDETH_TID_MASK 0x3ff -#define KDETH_TIDCTRL_SHIFT 26 -#define KDETH_TIDCTRL_MASK 0x3 -#define KDETH_INTR_SHIFT 28 -#define KDETH_INTR_MASK 0x1 -#define KDETH_SH_SHIFT 29 -#define KDETH_SH_MASK 0x1 -#define KDETH_HCRC_UPPER_SHIFT 16 -#define KDETH_HCRC_UPPER_MASK 0xff -#define KDETH_HCRC_LOWER_SHIFT 24 -#define KDETH_HCRC_LOWER_MASK 0xff - #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 #define AHG_KDETH_ARRAY_SIZE 9 @@ -122,16 +101,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define KDETH_GET(val, field) \ - (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) -#define KDETH_SET(dw, field, val) do { \ - u32 dwval = le32_to_cpu(dw); \ - dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ - dwval |= (((val) & KDETH_##field##_MASK) << \ - KDETH_##field##_SHIFT); \ - dw = cpu_to_le32(dwval); \ - } while (0) - #define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ do { \ if ((idx) < ARRAY_SIZE((arr))) \ @@ -142,13 +111,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 return -ERANGE; \ } while (0) -/* KDETH OM multipliers and switch over point */ -#define KDETH_OM_SMALL 4 -#define KDETH_OM_SMALL_SHIFT 2 -#define KDETH_OM_LARGE 64 -#define KDETH_OM_LARGE_SHIFT 6 -#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) - /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -- cgit v1.2.3-59-g8ed1b From f5114440c5491d4737b061c3a77195088bbaca52 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:46 -0700 Subject: IB/hfi1: Remove reading platform configuration from EFI variable Currently, platform configuration can be read from EFI variable for discrete cards. It will happen when reading from EPROM fails. EFI variables should not be queried for platform configuration in any scenario. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbda9b189216..41307e474525 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; - unsigned long size = 0; u8 *temp_platform_config = NULL; u32 esize; @@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd) dd->platform_config.size = esize; return; } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) { - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; - return; - } } dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", -- cgit v1.2.3-59-g8ed1b From f523984fb85d16e098aac94642cc16803a4cc61f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:53 -0700 Subject: IB/hfi1: Use a template for tid reg/unreg This is the preferred way to add a duplicate trace call. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace_rx.h | 46 ++++++++++------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 05fc6d68ffe8..7af593827d37 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -138,7 +138,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -TRACE_EVENT(hfi1_exp_tid_reg, +DECLARE_EVENT_CLASS( + hfi1_exp_tid_reg_unreg, TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, unsigned long va, unsigned long pa, dma_addr_t dma), @@ -172,38 +173,17 @@ TRACE_EVENT(hfi1_exp_tid_reg, ) ); -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); + +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, -- cgit v1.2.3-59-g8ed1b From 8cb1021b806bda3f5fda6f3699e1f98df14245df Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:59 -0700 Subject: IB/hfi1: Add traces for TID operations This patch adds a trace for putting a TID and for writing the RcvArray CSR. The CSR access template can be easily extended for additional CSR readq/writeq calls. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 ++------------- drivers/infiniband/hw/hfi1/trace_misc.h | 20 +++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_rx.h | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b8ee0e27dae6..937350d9deab 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9745,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd) return 0; } -static const char * const pt_names[] = { - "expected", - "eager", - "invalid" -}; - -static const char *pt_name(u32 type) -{ - return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type]; -} - /* * index is the index into the receive array */ @@ -9777,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, type, index); goto done; } - - hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", - pt_name(type), index, pa, (unsigned long)order); + trace_hfi1_put_tid(dd, index, type, pa, order); #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; + trace_hfi1_write_rcvarray(base + (index * 8), reg); writeq(reg, base + (index * 8)); if (type == PT_EAGER) diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index deac77ddaeab..8db2253523ff 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +DECLARE_EVENT_CLASS( + hfi1_csr_template, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value), + TP_STRUCT__entry( + __field(void __iomem *, addr) + __field(u64, value) + ), + TP_fast_assign( + __entry->addr = addr; + __entry->value = value; + ), + TP_printk("addr %p value %llx", __entry->addr, __entry->value) +); + +DEFINE_EVENT( + hfi1_csr_template, hfi1_write_rcvarray, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value)); + #ifdef CONFIG_FAULT_INJECTION TRACE_EVENT(hfi1_fault_opcode, TP_PROTO(struct rvt_qp *qp, u8 opcode), diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 7af593827d37..84929578cfe6 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -52,6 +52,13 @@ #include "hfi.h" +#define tidtype_name(type) { PT_##type, #type } +#define show_tidtype(type) \ +__print_symbolic(type, \ + tidtype_name(EXPECTED), \ + tidtype_name(EAGER), \ + tidtype_name(INVALID)) \ + #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx @@ -185,6 +192,34 @@ DEFINE_EVENT( unsigned long va, unsigned long pa, dma_addr_t dma), TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); +TRACE_EVENT( + hfi1_put_tid, + TP_PROTO(struct hfi1_devdata *dd, + u32 index, u32 type, unsigned long pa, u16 order), + TP_ARGS(dd, index, type, pa, order), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(unsigned long, pa); + __field(u32, index); + __field(u32, type); + __field(u16, order); + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->pa = pa; + __entry->index = index; + __entry->type = type; + __entry->order = order; + ), + TP_printk("[%s] type %s pa %lx index %u order %u", + __get_str(dev), + show_tidtype(__entry->type), + __entry->pa, + __entry->index, + __entry->order + ) +); + TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, u32 npages, dma_addr_t dma), -- cgit v1.2.3-59-g8ed1b From 581d01aaaca1fbb9df83cf3337c77e85215dcc5b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:06 -0700 Subject: IB/qib: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 8 +- drivers/infiniband/hw/qib/qib_iba6120.c | 6 +- drivers/infiniband/hw/qib/qib_iba7220.c | 7 +- drivers/infiniband/hw/qib/qib_iba7322.c | 48 +++++----- drivers/infiniband/hw/qib/qib_pcie.c | 149 +++++++++++++------------------- 5 files changed, 98 insertions(+), 120 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index a3e21a25cea5..f9e1c69603a5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -443,7 +443,7 @@ struct qib_irq_notify; #endif struct qib_msix_entry { - struct msix_entry msix; + int irq; void *arg; #ifdef CONFIG_INFINIBAND_QIB_DCA int dca; @@ -1433,9 +1433,9 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent); int qib_reinit_intr(struct qib_devdata *); -void qib_enable_intx(struct pci_dev *); +void qib_enable_intx(struct qib_devdata *dd); void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index e423b71e6ea0..46045fc28fa0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -1838,7 +1838,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ @@ -3562,7 +3562,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, if (qib_mini_init) goto bail; - if (qib_pcie_params(dd, 8, NULL, NULL)) + if (qib_pcie_params(dd, 8, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index c3679c48e61c..49cd6e3beb72 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2011 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -2148,7 +2149,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -3309,7 +3310,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); /* * Some newer kernels require free_irq before disable_msi, * and irq can be changed during disable and INTx enable @@ -4619,7 +4620,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, minwidth = 8; /* x8 capable boards */ break; } - if (qib_pcie_params(dd, minwidth, NULL, NULL)) + if (qib_pcie_params(dd, minwidth, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bb2439fff8fa..2653064ce9e9 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -2841,10 +2841,10 @@ static void qib_7322_nomsix(struct qib_devdata *dd) reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); #endif irq_set_affinity_hint( - dd->cspec->msix_entries[i].msix.vector, NULL); + dd->cspec->msix_entries[i].irq, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); - free_irq(dd->cspec->msix_entries[i].msix.vector, - dd->cspec->msix_entries[i].arg); + free_irq(dd->cspec->msix_entries[i].irq, + dd->cspec->msix_entries[i].arg); } qib_nomsix(dd); } @@ -3336,9 +3336,9 @@ static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) qib_devinfo(dd->pcidev, "Disabling notifier on HCA %d irq %d\n", dd->unit, - m->msix.vector); + m->irq); irq_set_affinity_notifier( - m->msix.vector, + m->irq, NULL); m->notifier = NULL; } @@ -3354,7 +3354,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) int ret; m->notifier = n; - n->notify.irq = m->msix.vector; + n->notify.irq = m->irq; n->notify.notify = qib_irq_notifier_notify; n->notify.release = qib_irq_notifier_release; n->arg = m->arg; @@ -3500,10 +3500,21 @@ try_intx: - 1, QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq( - dd->cspec->msix_entries[msixnum].msix.vector, - handler, 0, dd->cspec->msix_entries[msixnum].name, - arg); + + dd->cspec->msix_entries[msixnum].irq = pci_irq_vector( + dd->pcidev, msixnum); + if (dd->cspec->msix_entries[msixnum].irq < 0) { + qib_dev_err(dd, + "Couldn't get MSIx irq (vec=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].irq); + qib_7322_nomsix(dd); + goto try_intx; + } + ret = request_irq(dd->cspec->msix_entries[msixnum].irq, + handler, 0, + dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could @@ -3512,7 +3523,7 @@ try_intx: qib_dev_err(dd, "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, ret); qib_7322_nomsix(dd); goto try_intx; @@ -3548,7 +3559,7 @@ try_intx: dd->cspec->msix_entries[msixnum].mask); } irq_set_affinity_hint( - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, dd->cspec->msix_entries[msixnum].mask); } msixnum++; @@ -3744,7 +3755,6 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -3762,8 +3772,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) write_7322_initregs(dd); if (qib_pcie_params(dd, dd->lbus_width, - &dd->cspec->num_msix_entries, - dd->cspec->msix_entries)) + &dd->cspec->num_msix_entries)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -5195,7 +5204,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); qib_setup_7322_interrupt(dd, 0); return 1; } @@ -7327,10 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, if (!dd->cspec->msix_entries) tabsize = 0; - for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].msix.entry = i; - - if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) + if (qib_pcie_params(dd, 8, &tabsize)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c379b8342a09..d90403e31a9d 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2010 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -187,112 +188,84 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct qib_msix_entry *qib_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - ret = pci_msix_vec_count(dd->pcidev); - if (ret < 0) - goto do_intx; - - nvec = min(nvec, ret); - - /* We can't pass qib_msix_entry array to qib_msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the qib_msix_entry array. */ - msix_entry = kcalloc(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) - goto do_intx; - - for (i = 0; i < nvec; i++) - msix_entry[i] = qib_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - else - nvec = ret; - - for (i = 0; i < nvec; i++) - qib_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - qib_dev_err( - dd, - "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - qib_enable_intx(dd->pcidev); -} - /** * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. */ -static int qib_msi_setup(struct qib_devdata *dd, int pos) +static void qib_msi_setup(struct qib_devdata *dd, int pos) { struct pci_dev *pdev = dd->pcidev; u16 control; - int ret; - ret = pci_enable_msi(pdev); - if (ret) - qib_dev_err(dd, - "pci_enable_msi failed: %d, interrupts may not work\n", - ret); - /* continue even if it fails, we may still be OK... */ - - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, - &dd->msi_lo); - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, - &dd->msi_hi); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi); pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); + /* now save the data (vector) info */ - pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) - ? 12 : 8), + pci_read_config_word(pdev, + pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), &dd->msi_data); - return ret; } -int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct qib_msix_entry *entry) +static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec) +{ + unsigned int flags = PCI_IRQ_LEGACY; + + /* Check our capabilities */ + if (dd->pcidev->msix_cap) { + flags |= PCI_IRQ_MSIX; + } else { + if (dd->pcidev->msi_cap) { + flags |= PCI_IRQ_MSI; + /* Get msi_lo and msi_hi */ + qib_msi_setup(dd, dd->pcidev->msi_cap); + } + } + + if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI))) + qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + + return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); +} + +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) { u16 linkstat, speed; - int pos = 0, ret = 1; + int nvec; + int maxvec; + int ret = 0; if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + ret = -1; goto bail; } - pos = dd->pcidev->msix_cap; - if (nent && *nent && pos) { - qib_msix_setup(dd, pos, nent, entry); - ret = 0; /* did it, either MSIx or INTx */ - } else { - pos = dd->pcidev->msi_cap; - if (pos) - ret = qib_msi_setup(dd, pos); - else - qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + maxvec = (nent && *nent) ? *nent : 1; + nvec = qib_allocate_irqs(dd, maxvec); + if (nvec < 0) { + ret = nvec; + goto bail; + } + + /* + * If nent exists, make sure to record how many vectors were allocated + */ + if (nent) { + *nent = nvec; + + /* + * If we requested (nent) MSIX, but msix_enabled is not set, + * pci_alloc_irq_vectors() enabled INTx. + */ + if (!dd->pcidev->msix_enabled) + qib_dev_err(dd, + "no msix vectors allocated, using INTx\n"); } - if (!pos) - qib_enable_intx(dd->pcidev); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* @@ -379,7 +352,7 @@ int qib_reinit_intr(struct qib_devdata *dd) ret = 1; bail: if (!ret && (dd->flags & QIB_HAS_INTX)) { - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); ret = 1; } @@ -397,7 +370,7 @@ bail: void qib_nomsi(struct qib_devdata *dd) { dd->msi_lo = 0; - pci_disable_msi(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* @@ -405,23 +378,21 @@ void qib_nomsi(struct qib_devdata *dd) */ void qib_nomsix(struct qib_devdata *dd) { - pci_disable_msix(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* * Similar to pci_intx(pdev, 1), except that we make sure * msi(x) is off. */ -void qib_enable_intx(struct pci_dev *pdev) +void qib_enable_intx(struct qib_devdata *dd) { u16 cw, new; int pos; + struct pci_dev *pdev = dd->pcidev; - /* first, turn on INTx */ - pci_read_config_word(pdev, PCI_COMMAND, &cw); - new = cw & ~PCI_COMMAND_INTX_DISABLE; - if (new != cw) - pci_write_config_word(pdev, PCI_COMMAND, new); + if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0) + qib_dev_err(dd, "Failed to enable INTx\n"); pos = pdev->msi_cap; if (pos) { -- cgit v1.2.3-59-g8ed1b From fe4e74eeb24286c730672e776ac4c2c3caa19137 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:12 -0700 Subject: IB/hfi1: Initialize TID lists to avoid crash on cleanup The expected receive lists (tid_xxx_list) are not initialized until late in the receive context initialization. If an error happens before the initialization, a NULL pointer access will occur during cleanup. Initialized the lists sooner rather than later to avoid this Oops: IP: unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] RIP: 0010:unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] Call Trace: hfi1_user_exp_rcv_free+0x79/0xb0 [hfi1] hfi1_file_close+0x87/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/exp_rcv.c | 4 ---- drivers/infiniband/hw/hfi1/init.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 08d13ed1b574..0af91675acc6 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -69,10 +69,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) struct tid_group *grp; int i; - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); - tidbase = rcd->expected_base; for (i = 0; i < rcd->expected_count / dd->rcv_entries.group_size; i++) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4a11d4da4c92..a00308ccf016 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -67,6 +67,7 @@ #include "aspm.h" #include "affinity.h" #include "vnic.h" +#include "exp_rcv.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -221,6 +222,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, hfi1_cdbg(PROC, "setting up context %u\n", ctxt); INIT_LIST_HEAD(&rcd->qp_wait_list); + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); -- cgit v1.2.3-59-g8ed1b From f683c80ca68e087b55c6f9ab6ca6beb88ebc6d69 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:19 -0700 Subject: IB/hfi1: Resolve kernel panics by reference counting receive contexts Base receive contexts can be used by sub contexts. Because of this, resources for the context cannot be completely freed until all sub contexts are done using the base context. Introduce a reference count so that the base receive context can be freed only when all sub contexts are done with it. Use the provided function call for setting default send context integrity rather than the manual method. The cleanup path does not set all variables back to NULL after freeing resources. Since the clean up code can get called more than once, (e.g. during context close and on the error path), it is necessary to make sure that all the variables are NULLed. Possible crash are: BUG: unable to handle kernel paging request at 0000000001908900 IP: read_csr+0x24/0x30 [hfi1] RIP: 0010:read_csr+0x24/0x30 [hfi1] Call Trace: sc_disable+0x40/0x110 [hfi1] hfi1_file_close+0x16f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 or kernel BUG at mm/slub.c:3877! RIP: 0010:kfree+0x14f/0x170 Call Trace: hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1] ? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1] hfi1_file_close+0x20f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Fixes: Commit 62239fc6e554 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 39 +++++++++++++++-------- drivers/infiniband/hw/hfi1/hfi.h | 11 +++---- drivers/infiniband/hw/hfi1/init.c | 58 ++++++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/vnic_main.c | 11 ++++--- 4 files changed, 85 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2dd8758f0644..bbf80b1dd9d9 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) *ev = 0; __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; @@ -794,16 +796,15 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); /* - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) + * If a send context is allocated, reset context integrity + * checks to default and disable the send context. */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); - sc_disable(uctxt->sc); + if (uctxt->sc) { + set_pio_integrity(uctxt->sc); + sc_disable(uctxt->sc); + } spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); @@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_stats.sps_ctxts--; if (++dd->freectxts == dd->num_user_contexts) aspm_enable_all(dd); + + /* _rcd_put() should be done after releasing mutex */ + dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); /* dd reference */ done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - return -ENOMEM; - } + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) ret = init_user_ctxt(fd); - if (ret) + if (ret) { clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + hfi1_rcd_put(fd->uctxt); + } } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, fd->uctxt = uctxt; fd->subctxt = subctxt; + + hfi1_rcd_get(uctxt); __set_bit(fd->subctxt, uctxt->in_use_ctxts); return 1; @@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, aspm_disable_all(dd); fd->uctxt = uctxt; + /* Count the reference for the fd */ + hfi1_rcd_get(uctxt); + return 0; ctxdata_free: dd->rcd[ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); return ret; } @@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) return 0; setup_failed: + /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ hfi1_free_ctxtdata(dd, uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bca781c3b5ac..1a33a5087734 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -213,11 +213,9 @@ struct hfi1_ctxtdata { /* dynamic receive available interrupt timeout */ u32 rcvavail_timeout; - /* - * number of opens (including slave sub-contexts) on this instance - * (ignoring forks, dup, etc. for now) - */ - int cnt; + /* Reference count the base context usage */ + struct kref kref; + /* Device context index */ unsigned ctxt; /* @@ -1291,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); - +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a00308ccf016..dfdb4126ca05 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -191,15 +191,45 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; - if (dd->rcd) { - for (i = 0; i < dd->num_rcv_contexts; ++i) - hfi1_free_ctxtdata(dd, dd->rcd[i]); - } + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) + hfi1_rcd_put(dd->rcd[i]); + + /* All the contexts should be freed, free the array */ kfree(dd->rcd); dd->rcd = NULL; return ret; } +/* + * Helper routines for the receive context reference count (rcd and uctxt) + */ +static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) +{ + kref_init(&rcd->kref); +} + +static void hfi1_rcd_free(struct kref *kref) +{ + struct hfi1_ctxtdata *rcd = + container_of(kref, struct hfi1_ctxtdata, kref); + + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); +} + +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) +{ + if (rcd) + return kref_put(&rcd->kref, hfi1_rcd_free); + + return 0; +} + +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +{ + kref_get(&rcd->kref); +} + /* * Common code for user and kernel context setup. */ @@ -332,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, if (!rcd->opstats) goto bail; } + + hfi1_rcd_init(rcd); } return rcd; bail: @@ -931,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd) * @rcd: the ctxtdata structure * * free up any allocated data for a context - * This should not touch anything that would affect a simultaneous - * re-allocation of context data, because it is called after hfi1_mutex - * is released (and can be called from reinit as well). * It should never change any chip state, or global driver state. */ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - unsigned e; + u32 e; if (!rcd) return; @@ -957,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* all the RcvArray entries should have been cleared by now */ kfree(rcd->egrbufs.rcvtids); + rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { if (rcd->egrbufs.buffers[e].dma) @@ -966,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); + rcd->egrbufs.alloced = 0; + rcd->egrbufs.buffers = NULL; sc_free(rcd->sc); + rcd->sc = NULL; + vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); kfree(rcd->opstats); - kfree(rcd); + + rcd->subctxt_uregbase = NULL; + rcd->subctxt_rcvegrbuf = NULL; + rcd->subctxt_rcvhdr_base = NULL; + rcd->opstats = NULL; } /* @@ -1366,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_free_ctxtdata(dd, rcd); + hfi1_rcd_put(rcd); } } kfree(tmp); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b601c2929f8f..950c1b4df442 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_free_ctxtdata() also releases send_context - * structure if uctxt->sc is not null + * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * release send_context structure if uctxt->sc is not null */ dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) @@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); if (rc) break; + hfi1_rcd_get(dd->vnic.ctxt[i]); dd->vnic.ctxt[i]->vnic_q_idx = i; } @@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) */ while (i-- > dd->vnic.num_ctxt) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } goto alloc_fail; @@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) if (--dd->vnic.num_vports == 0) { for (i = 0; i < dd->vnic.num_ctxt; i++) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } hfi1_deinit_vnic_rsm(dd); -- cgit v1.2.3-59-g8ed1b From bc5214ee29220251e5507882696ded5ca183b169 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 16:00:26 -0700 Subject: IB/hfi1: Handle missing magic values in config file Driver does not check whether proper configuration file exist in EPROM, and treats empty partition as possible valid configuration, preventing fallback to default firmware. Change EPROM read function to treat missing magic number as read error. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/eprom.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 26da124c88e2..d46b17107901 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, { void *buffer; void *p; - u32 length; int ret; buffer = kmalloc(P1_SIZE, GFP_KERNEL); @@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, /* scan for image magic that may trail the actual data */ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); - if (p) - length = p - buffer; - else - length = P1_SIZE; + if (!p) { + kfree(buffer); + return -ENOENT; + } *data = buffer; - *size = length; + *size = p - buffer; return 0; } -- cgit v1.2.3-59-g8ed1b From 4619ed40d916d32c253cabaeffa32ac98e0876fa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 1 Jun 2017 09:24:34 +0300 Subject: RDMA/bnxt_re: Delete unsupported modify_port function There is no need to return always zero for function which is not supported. The IB stack treats uninitialized ib_device->functions as not implemented. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Reviewed-by: Yuval Shaia Acked-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 17 ----------------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 --- drivers/infiniband/hw/bnxt_re/main.c | 1 - 3 files changed, 21 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f0e01b3ac711..5dc6e7ce3ab9 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -313,23 +313,6 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, return 0; } -int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num, - int port_modify_mask, - struct ib_port_modify *port_modify) -{ - switch (port_modify_mask) { - case IB_PORT_SHUTDOWN: - break; - case IB_PORT_INIT_TYPE: - break; - case IB_PORT_RESET_QKEY_CNTR: - break; - default: - break; - } - return 0; -} - int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index a0bb7e33d7ca..1df11ed272ea 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -141,9 +141,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev, struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); -int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num, - int port_modify_mask, - struct ib_port_modify *port_modify); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index ceae2d92fb08..deec8002dd36 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -474,7 +474,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->modify_device = bnxt_re_modify_device; ibdev->query_port = bnxt_re_query_port; - ibdev->modify_port = bnxt_re_modify_port; ibdev->get_port_immutable = bnxt_re_get_port_immutable; ibdev->query_pkey = bnxt_re_query_pkey; ibdev->query_gid = bnxt_re_query_gid; -- cgit v1.2.3-59-g8ed1b From f201777a5382e0a4152342365e928c1de99815ad Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 7 Jun 2017 15:42:02 -0500 Subject: IB/qib: remove duplicate code Remove duplicate code. Addresses-Coverity-ID: 1226951 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_mad.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index da295e0392ed..c5eaa3c0ed24 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -874,8 +874,6 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ib_dispatch_event(&event); } - ret = subn_get_portinfo(smp, ibdev, port); - /* restore re-reg bit per o14-12.2.1 */ pip->clientrereg_resv_subnetto |= clientrereg; -- cgit v1.2.3-59-g8ed1b From 44b0b7455f7a6c4bff3cda804719d2ed4494b4da Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 14 Jun 2017 23:13:33 +0300 Subject: IB/usnic: Implement get_netdev hook usnic's get_netdev hook for struct ib_device is missing - add it. Signed-off-by: Yuval Shaia Reviewed-by: Christian Benvenuti Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 10 ++++++++++ drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 1 + 3 files changed, 12 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c0c1e8b027b1..80577b9d2796 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -409,6 +409,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.query_port = usnic_ib_query_port; us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; + us_ibdev->ib_dev.get_netdev = usnic_get_netdev; us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 4996984885c2..f9dc1e80c3b7 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -424,6 +424,16 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } +struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(device); + + if (us_ibdev->netdev) + dev_hold(us_ibdev->netdev); + + return us_ibdev->netdev; +} + int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 172e43b6fa95..1fda94425116 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -48,6 +48,7 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, struct ib_qp_init_attr *qp_init_attr); int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); +struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, -- cgit v1.2.3-59-g8ed1b From d41861942fc55c14b6280d9568a0d0112037f065 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 14 Jun 2017 23:13:34 +0300 Subject: IB/core: Add generic function to extract IB speed from netdev Logic of retrieving netdev speed from net_device and translating it to IB speed is implemented in rxe, in usnic and in bnxt drivers. Define new function which merges all. Signed-off-by: Yuval Shaia Reviewed-by: Christian Benvenuti Reviewed-by: Selvin Xavier Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/core/roce_gid_mgmt.c | 2 + drivers/infiniband/core/verbs.c | 55 ++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 49 ++----------------------- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 31 +++------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 53 +++------------------------ include/rdma/ib_verbs.h | 1 + 6 files changed, 73 insertions(+), 118 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 94a9eefb3cfc..90e3889b7fbe 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -44,6 +44,8 @@ static struct workqueue_struct *gid_cache_wq; +static struct workqueue_struct *gid_cache_wq; + enum gid_op_type { GID_DEL = 0, GID_ADD diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fb98ed67d5bc..40de69bf07cd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1302,6 +1302,61 @@ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) +{ + int rc; + u32 netdev_speed; + struct net_device *netdev; + struct ethtool_link_ksettings lksettings; + + if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + if (!dev->get_netdev) + return -EOPNOTSUPP; + + netdev = dev->get_netdev(dev, port_num); + if (!netdev) + return -ENODEV; + + rtnl_lock(); + rc = __ethtool_get_link_ksettings(netdev, &lksettings); + rtnl_unlock(); + + dev_put(netdev); + + if (!rc) { + netdev_speed = lksettings.base.speed; + } else { + netdev_speed = SPEED_1000; + pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name, + netdev_speed); + } + + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } + + return 0; +} +EXPORT_SYMBOL(ib_get_eth_speed); + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 5dc6e7ce3ab9..b10e1a6dce84 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -223,50 +223,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev, return 0; } -static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width) -{ - struct ethtool_link_ksettings lksettings; - u32 espeed; - - if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) { - memset(&lksettings, 0, sizeof(lksettings)); - rtnl_lock(); - netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings); - rtnl_unlock(); - espeed = lksettings.base.speed; - } else { - espeed = SPEED_UNKNOWN; - } - switch (espeed) { - case SPEED_1000: - *speed = IB_SPEED_SDR; - *width = IB_WIDTH_1X; - break; - case SPEED_10000: - *speed = IB_SPEED_QDR; - *width = IB_WIDTH_1X; - break; - case SPEED_20000: - *speed = IB_SPEED_DDR; - *width = IB_WIDTH_4X; - break; - case SPEED_25000: - *speed = IB_SPEED_EDR; - *width = IB_WIDTH_1X; - break; - case SPEED_40000: - *speed = IB_SPEED_QDR; - *width = IB_WIDTH_4X; - break; - case SPEED_50000: - break; - default: - *speed = IB_SPEED_SDR; - *width = IB_WIDTH_1X; - break; - } -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) @@ -308,8 +264,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, * IB stack to avoid race in the NETDEV_UNREG path */ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - __to_ib_speed_width(rdev->netdev, &port_attr->active_speed, - &port_attr->active_width); + if (!ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width)) + return -EINVAL; return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index f9dc1e80c3b7..e5f57dd49980 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -226,27 +226,6 @@ static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) spin_unlock(&vf->lock); } -static void eth_speed_to_ib_speed(int speed, u8 *active_speed, - u8 *active_width) -{ - if (speed <= 10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (speed <= 20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (speed <= 30000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_QDR; - } else if (speed <= 40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) { if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN || @@ -326,12 +305,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); - struct ethtool_link_ksettings cmd; usnic_dbg("\n"); mutex_lock(&us_ibdev->usdev_lock); - __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd); + if (!ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width)) { + mutex_unlock(&us_ibdev->usdev_lock); + return -EINVAL; + } + /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -355,8 +338,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; - eth_speed_to_ib_speed(cmd.base.speed, &props->active_speed, - &props->active_width); props->max_mtu = IB_MTU_4096; props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu); /* Userspace will adjust for hdrs */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index af90a7d42b96..e6c10e43a6d7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -51,40 +51,16 @@ static int rxe_query_device(struct ib_device *dev, return 0; } -static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, - u8 *active_width) -{ - if (speed <= 1000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_SDR; - } else if (speed <= 10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (speed <= 20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (speed <= 30000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_QDR; - } else if (speed <= 40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - static int rxe_query_port(struct ib_device *dev, u8 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - u32 speed; + int rc = -EINVAL; if (unlikely(port_num != 1)) { pr_warn("invalid port_number %d\n", port_num); - goto err1; + goto out; } port = &rxe->port; @@ -93,29 +69,12 @@ static int rxe_query_port(struct ib_device *dev, *attr = port->attr; mutex_lock(&rxe->usdev_lock); - if (rxe->ndev->ethtool_ops->get_link_ksettings) { - struct ethtool_link_ksettings ks; - - rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); - speed = ks.base.speed; - } else if (rxe->ndev->ethtool_ops->get_settings) { - struct ethtool_cmd cmd; - - rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); - speed = cmd.speed; - } else { - pr_warn("%s speed is unknown, defaulting to 1000\n", - rxe->ndev->name); - speed = 1000; - } - rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, - &attr->active_width); + rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, + &attr->active_width); mutex_unlock(&rxe->usdev_lock); - return 0; - -err1: - return -EINVAL; +out: + return rc; } static int rxe_query_gid(struct ib_device *device, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..68d947dac9a2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3555,6 +3555,7 @@ void ib_drain_qp(struct ib_qp *qp); int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { -- cgit v1.2.3-59-g8ed1b From e1267b01240ab031a9c9dd84c1ffeb23670b590f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 26 Jun 2017 08:58:22 +0300 Subject: RDMA: Remove useless MODULE_VERSION All modules in drivers/infiniband defined and used MODULE_VERSION, which was pointless because the kernel version describes their state more accurate then those arbitrary numbers. Signed-off-by: Leon Romanovsky Acked-by: Sagi Grimbrg Reviewed-by: Sagi Grimberg Acked-by: Dennis Dalessandro Reviewed-by: Dennis Dalessandro Acked-by: Selvin Xavier Acked-by: Ram Amrani Reviewed-by: Johannes Thumshirn Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 1 - drivers/infiniband/hw/cxgb3/iwch.c | 1 - drivers/infiniband/hw/cxgb4/device.c | 1 - drivers/infiniband/hw/hfi1/driver.c | 1 - drivers/infiniband/hw/i40iw/i40iw_main.c | 1 - drivers/infiniband/hw/mlx4/main.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mthca/mthca_main.c | 1 - drivers/infiniband/hw/nes/nes.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/qedr.h | 1 - drivers/infiniband/hw/qib/qib_driver.c | 1 - drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 - drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 1 - drivers/infiniband/sw/rxe/rxe.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/infiniband/ulp/iser/iscsi_iser.c | 1 - drivers/infiniband/ulp/isert/ib_isert.c | 1 - drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 1 - drivers/infiniband/ulp/srp/ib_srp.c | 1 - 21 files changed, 21 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index deec8002dd36..fa123e9d6b59 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -70,7 +70,6 @@ static char version[] = MODULE_AUTHOR("Eddie Wai "); MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(ROCE_DRV_MODULE_VERSION); /* globals */ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 47b2ce2ef203..591de319c178 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -45,7 +45,6 @@ MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static void open_rnic_dev(struct t3cdev *); static void close_rnic_dev(struct t3cdev *); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae0b79aeea2e..fc886f81b885 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -44,7 +44,6 @@ MODULE_AUTHOR("Steve Wise"); MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static int allow_db_fc_on_t5; module_param(allow_db_fc_on_t5, int, 0644); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e455a3..79a8b05855ac 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -96,7 +96,6 @@ MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); -MODULE_VERSION(HFI1_DRIVER_VERSION); /* * MAX_PKT_RCV is the max # if packets processed per receive interrupt. diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index ae8463ff59a7..cc742c3132c6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -77,7 +77,6 @@ MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static struct i40e_client i40iw_client; static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw"; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d1b43cbbfea7..a422df781caf 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -70,7 +70,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int mlx4_ib_sm_guid_assign = 0; module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..faafea0b7c2f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -65,7 +65,6 @@ MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRIVER_VERSION); static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index c309e5c96383..1b10d21c8026 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -49,7 +49,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a30aa6527f7e..26262919f5d1 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -63,7 +63,6 @@ MODULE_AUTHOR("NetEffect"); MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int interrupt_mod_interval = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 57c9a2ad0260..757c65816295 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -58,7 +58,6 @@ #include "ocrdma_stats.h" #include -MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b5851fd67d4f..0ae30f5c8cbc 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -47,7 +47,6 @@ MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver"); MODULE_AUTHOR("QLogic Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(QEDR_MODULE_VERSION); #define QEDR_WQ_MULTIPLIER_DFT (3) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index ab7784bfdac6..392e76e26840 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -41,7 +41,6 @@ #include #include "qedr_hsi_rdma.h" -#define QEDR_MODULE_VERSION "8.10.10.0" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" #define DP_NAME(dev) ((dev)->ibdev.name) diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 2b5982f743ef..719906a9fd51 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -66,7 +66,6 @@ MODULE_PARM_DESC(compat_ddr_negotiate, MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel "); MODULE_DESCRIPTION("Intel IB driver"); -MODULE_VERSION(QIB_DRIVER_VERSION); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 80577b9d2796..e69c8e476a2b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -721,7 +721,6 @@ static void __exit usnic_ib_destroy(void) MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); MODULE_AUTHOR("Upinder Malhi "); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR); module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 34ebc7615411..e76565280afa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1119,5 +1119,4 @@ module_exit(pvrdma_cleanup); MODULE_AUTHOR("VMware, Inc"); MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); -MODULE_VERSION(DRV_VERSION); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index c21c913f911a..8c3d30b3092d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -38,7 +38,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION("0.2"); /* free resources for all ports on a device */ static void rxe_cleanup_ports(struct rxe_dev *rxe) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4ce315c92b48..7dcdbbacbf46 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -60,7 +60,6 @@ const char ipoib_driver_version[] = DRV_VERSION; MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 37b33d708c2d..19624e023ebd 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -77,7 +77,6 @@ MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); -MODULE_VERSION(DRV_VER); static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 0e662656ef42..ceabdb85df8b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2710,7 +2710,6 @@ static void __exit isert_exit(void) } MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure"); -MODULE_VERSION("1.0"); MODULE_AUTHOR("nab@Linux-iSCSI.org"); MODULE_LICENSE("GPL"); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index cf768dd78d1b..6beddef26018 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -1053,4 +1053,3 @@ module_exit(opa_vnic_deinit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); -MODULE_VERSION(DRV_VERSION); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 2354c742caa1..fa5ccdb3bb2a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -62,7 +62,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) -- cgit v1.2.3-59-g8ed1b From 5fac5b1b297fe1702f4c65d3b16aefb7d52967ab Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 29 Jun 2017 12:28:10 -0700 Subject: RDMA/bnxt_re: Add vlan tag for untagged RoCE traffic when PFC is configured Current implementation does not program vlan header insertion in RoCE packet if no vlan is configured. Firmware does not add prority when there is no vlan tag in the packet. Modify the code to insert vlan header when PFC is enabled on the interface. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 53 ++++++++++++++++++--- drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 2 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 77 ++++++++++++++++++++++++------- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 4 +- 6 files changed, 124 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index fa123e9d6b59..22527e35e13d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -833,6 +833,42 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) mutex_unlock(&rdev->qp_lock); } +static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; + struct bnxt_qplib_gid gid; + u16 gid_idx, index; + int rc = 0; + + if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) + return 0; + + if (!sgid_tbl) { + dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated"); + return -EINVAL; + } + + for (index = 0; index < sgid_tbl->active; index++) { + gid_idx = sgid_tbl->hw_id[index]; + + if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, + sizeof(bnxt_qplib_gid_zero))) + continue; + /* need to modify the VLAN enable setting of non VLAN GID only + * as setting is done for VLAN GID while adding GID + */ + if (sgid_tbl->vlan[index]) + continue; + + memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); + + rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, + rdev->qplib_res.netdev->dev_addr); + } + + return rc; +} + static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) { u32 prio_map = 0, tmp_map = 0; @@ -852,8 +888,6 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) tmp_map = dcb_ieee_getapp_mask(netdev, &app); prio_map |= tmp_map; - if (!prio_map) - prio_map = -EFAULT; return prio_map; } @@ -879,10 +913,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) int rc; /* Get priority for roce */ - rc = bnxt_re_get_priority_mask(rdev); - if (rc < 0) - return rc; - prio_map = (u8)rc; + prio_map = bnxt_re_get_priority_mask(rdev); if (prio_map == rdev->cur_prio_map) return 0; @@ -904,6 +935,16 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return rc; } + /* Actual priorities are not programmed as they are already + * done by L2 driver; just enable or disable priority vlan tagging + */ + if ((prio_map == 0 && rdev->qplib_res.prio) || + (prio_map != 0 && !rdev->qplib_res.prio)) { + rdev->qplib_res.prio = prio_map ? true : false; + + bnxt_re_update_gid(rdev); + } + return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 62447b3badec..4e101704e801 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -468,9 +468,11 @@ static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res, kfree(sgid_tbl->tbl); kfree(sgid_tbl->hw_id); kfree(sgid_tbl->ctx); + kfree(sgid_tbl->vlan); sgid_tbl->tbl = NULL; sgid_tbl->hw_id = NULL; sgid_tbl->ctx = NULL; + sgid_tbl->vlan = NULL; sgid_tbl->max = 0; sgid_tbl->active = 0; } @@ -491,8 +493,15 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res, if (!sgid_tbl->ctx) goto out_free2; + sgid_tbl->vlan = kcalloc(max, sizeof(u8), GFP_KERNEL); + if (!sgid_tbl->vlan) + goto out_free3; + sgid_tbl->max = max; return 0; +out_free3: + kfree(sgid_tbl->ctx); + sgid_tbl->ctx = NULL; out_free2: kfree(sgid_tbl->hw_id); sgid_tbl->hw_id = NULL; @@ -514,6 +523,7 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res, } memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max); memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max); + memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max); sgid_tbl->active = 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e4855509719..e87207526d2c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -116,6 +116,7 @@ struct bnxt_qplib_sgid_tbl { u16 max; u16 active; void *ctx; + u8 *vlan; }; struct bnxt_qplib_pkey_tbl { @@ -188,6 +189,7 @@ struct bnxt_qplib_res { struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_pkey_tbl pkey_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; + bool prio; }; #define to_bnxt_qplib(ptr, type, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ef91ab786dd4..e277e54a05eb 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -213,6 +213,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, sizeof(bnxt_qplib_gid_zero)); + sgid_tbl->vlan[index] = 0; sgid_tbl->active--; dev_dbg(&res->pdev->dev, "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", @@ -265,28 +266,32 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct cmdq_add_gid req; struct creq_add_gid_resp resp; u16 cmd_flags = 0; - u32 temp32[4]; - u16 temp16[3]; int rc; RCFW_CMD_PREP(req, ADD_GID, cmd_flags); - memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid)); - req.gid[0] = cpu_to_be32(temp32[3]); - req.gid[1] = cpu_to_be32(temp32[2]); - req.gid[2] = cpu_to_be32(temp32[1]); - req.gid[3] = cpu_to_be32(temp32[0]); - if (vlan_id != 0xFFFF) - req.vlan = cpu_to_le16((vlan_id & - CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) | - CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | - CMDQ_ADD_GID_VLAN_VLAN_EN); + req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]); + req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]); + req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]); + req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]); + /* + * driver should ensure that all RoCE traffic is always VLAN + * tagged if RoCE traffic is running on non-zero VLAN ID or + * RoCE traffic is running on non-zero Priority. + */ + if ((vlan_id != 0xFFFF) || res->prio) { + if (vlan_id != 0xFFFF) + req.vlan = cpu_to_le16 + (vlan_id & CMDQ_ADD_GID_VLAN_VLAN_ID_MASK); + req.vlan |= cpu_to_le16 + (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | + CMDQ_ADD_GID_VLAN_VLAN_EN); + } /* MAC in network format */ - memcpy(temp16, smac, 6); - req.src_mac[0] = cpu_to_be16(temp16[0]); - req.src_mac[1] = cpu_to_be16(temp16[1]); - req.src_mac[2] = cpu_to_be16(temp16[2]); + req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]); + req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); + req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, 0); @@ -297,6 +302,9 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, /* Add GID to the sgid_tbl */ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid)); sgid_tbl->active++; + if (vlan_id != 0xFFFF) + sgid_tbl->vlan[free_idx] = 1; + dev_dbg(&res->pdev->dev, "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); @@ -306,6 +314,43 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, return 0; } +int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + struct bnxt_qplib_gid *gid, u16 gid_idx, + u8 *smac) +{ + struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, + struct bnxt_qplib_res, + sgid_tbl); + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_modify_gid_resp resp; + struct cmdq_modify_gid req; + int rc; + u16 cmd_flags = 0; + + RCFW_CMD_PREP(req, MODIFY_GID, cmd_flags); + + req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]); + req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]); + req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]); + req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]); + if (res->prio) { + req.vlan |= cpu_to_le16 + (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | + CMDQ_ADD_GID_VLAN_VLAN_EN); + } + + /* MAC in network format */ + req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]); + req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); + req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); + + req.gid_index = cpu_to_le16(gid_idx); + + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, + (void *)&resp, NULL, 0); + return rc; +} + /* pkeys */ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 2ce7e2a32cf0..11322582f5e4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -135,6 +135,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, bool update, u32 *index); +int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac); int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, u16 *pkey); diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index fc23477ac52f..eeb55b2db57e 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1473,8 +1473,8 @@ struct cmdq_modify_gid { u8 resp_size; u8 reserved8; __le64 resp_addr; - __le32 gid[4]; - __le16 src_mac[3]; + __be32 gid[4]; + __be16 src_mac[3]; __le16 vlan; #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0 -- cgit v1.2.3-59-g8ed1b From f218d67ef00431728ab7317e829006d00ecd5ca4 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:15 -0700 Subject: RDMA/bnxt_re: Allow posting when QPs are in error This patch allows driver to post send and receive requests on QPs which are in error state. Instead of flushing the QP in the context of polling error CQEs, the QPs will be added to a flush list maintained per CQ. QP state is moved to error. QP is added to flush list if the user moves it to error state using modify_qp also. After polling the HW CQ in poll_cq routine, this flush list is traversed and driver completes work requests on each QP in the flush list, till the budget expires. The QP is moved out of flush list during QP destroy or during modify_QP to RESET. When ULPs post Work Requests while QP is in error state, driver will store the ULP data and then increment the QP producer s/w index, without ringing doorbell. It then schedules a worker to invoke the CQ handler since the interrupts wont be generated from the HW for this request. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 22 ++ drivers/infiniband/hw/bnxt_re/main.c | 3 +- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 465 +++++++++++++++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 25 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 26 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 10 +- 6 files changed, 462 insertions(+), 89 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b10e1a6dce84..d78fedc654d0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -786,6 +786,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) struct bnxt_re_dev *rdev = qp->rdev; int rc; + bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP"); @@ -800,6 +801,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) return rc; } + bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &rdev->qp1_sqp->qplib_qp); if (rc) { @@ -1344,6 +1346,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, } qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state); + + if (!qp->sumem && + qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_dbg(rdev_to_dev(rdev), + "Move QP = %p to flush list\n", + qp); + bnxt_qplib_add_flush_qp(&qp->qplib_qp); + } + if (!qp->sumem && + qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_dbg(rdev_to_dev(rdev), + "Move QP = %p out of flush list\n", + qp); + bnxt_qplib_del_flush_qp(&qp->qplib_qp); + } } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { qp->qplib_qp.modify_flags |= @@ -2354,6 +2371,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } cq->qplib_cq.max_wqe = entries; cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id; + cq->qplib_cq.nq = &rdev->nq; rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2861,6 +2879,10 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) sq->send_phantom = false; } } + if (ncqe < budget) + ncqe += bnxt_qplib_process_flush_list(&cq->qplib_cq, + cqe + ncqe, + budget - ncqe); if (!ncqe) break; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 22527e35e13d..2f71123e0a9f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1037,7 +1037,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ - rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw); + rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + BNXT_RE_MAX_QPC_COUNT); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 9af1514e5944..31e15f31bba5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -51,6 +51,168 @@ #include "qplib_fp.h" static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq); +static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp); + +static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp) +{ + qp->sq.condition = false; + qp->sq.send_phantom = false; + qp->sq.single = false; +} + +/* Flush list */ +static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + struct bnxt_qplib_cq *scq, *rcq; + + scq = qp->scq; + rcq = qp->rcq; + + if (!qp->sq.flushed) { + dev_dbg(&scq->hwq.pdev->dev, + "QPLIB: FP: Adding to SQ Flush list = %p", + qp); + bnxt_qplib_cancel_phantom_processing(qp); + list_add_tail(&qp->sq_flush, &scq->sqf_head); + qp->sq.flushed = true; + } + if (!qp->srq) { + if (!qp->rq.flushed) { + dev_dbg(&rcq->hwq.pdev->dev, + "QPLIB: FP: Adding to RQ Flush list = %p", + qp); + list_add_tail(&qp->rq_flush, &rcq->rqf_head); + qp->rq.flushed = true; + } + } +} + +void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock) +{ + spin_lock_irqsave(&qp->scq->hwq.lock, *flags); + if (qp->scq == qp->rcq) + __acquire(&qp->rcq->hwq.lock); + else + spin_lock(&qp->rcq->hwq.lock); +} + +void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock) +{ + if (qp->scq == qp->rcq) + __release(&qp->rcq->hwq.lock); + else + spin_unlock(&qp->rcq->hwq.lock); + spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags); +} + +static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + if (qp->scq == qp->rcq) + buddy_cq = NULL; + else if (qp->scq == cq) + buddy_cq = qp->rcq; + else + buddy_cq = qp->scq; + return buddy_cq; +} + +static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) + __acquires(&buddy_cq->hwq.lock) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq); + if (!buddy_cq) + __acquire(&cq->hwq.lock); + else + spin_lock(&buddy_cq->hwq.lock); +} + +static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) + __releases(&buddy_cq->hwq.lock) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq); + if (!buddy_cq) + __release(&cq->hwq.lock); + else + spin_unlock(&buddy_cq->hwq.lock); +} + +void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_locks(qp, &flags); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_release_cq_locks(qp, &flags); +} + +static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +{ + struct bnxt_qplib_cq *scq, *rcq; + + scq = qp->scq; + rcq = qp->rcq; + + if (qp->sq.flushed) { + qp->sq.flushed = false; + list_del(&qp->sq_flush); + } + if (!qp->srq) { + if (qp->rq.flushed) { + qp->rq.flushed = false; + list_del(&qp->rq_flush); + } + } +} + +void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_locks(qp, &flags); + __clean_cq(qp->scq, (u64)(unsigned long)qp); + qp->sq.hwq.prod = 0; + qp->sq.hwq.cons = 0; + __clean_cq(qp->rcq, (u64)(unsigned long)qp); + qp->rq.hwq.prod = 0; + qp->rq.hwq.cons = 0; + + __bnxt_qplib_del_flush_qp(qp); + bnxt_qplib_release_cq_locks(qp, &flags); +} + +static void bnxt_qpn_cqn_sched_task(struct work_struct *work) +{ + struct bnxt_qplib_nq_work *nq_work = + container_of(work, struct bnxt_qplib_nq_work, work); + + struct bnxt_qplib_cq *cq = nq_work->cq; + struct bnxt_qplib_nq *nq = nq_work->nq; + + if (cq && nq) { + spin_lock_bh(&cq->compl_lock); + if (atomic_read(&cq->arm_state) && nq->cqn_handler) { + dev_dbg(&nq->pdev->dev, + "%s:Trigger cq = %p event nq = %p\n", + __func__, cq, nq); + nq->cqn_handler(nq, cq); + } + spin_unlock_bh(&cq->compl_lock); + } + kfree(nq_work); +} static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -119,6 +281,7 @@ static void bnxt_qplib_service_nq(unsigned long data) struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; + struct bnxt_qplib_cq *cq; int num_cqne_processed = 0; u32 sw_cons, raw_cons; u16 type; @@ -143,15 +306,17 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle = le32_to_cpu(nqcne->cq_handle_low); q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) << 32; - bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *) - ((unsigned long)q_handle)); - if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *) - ((unsigned long)q_handle))) + cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; + bnxt_qplib_arm_cq_enable(cq); + spin_lock_bh(&cq->compl_lock); + atomic_set(&cq->arm_state, 0); + if (!nq->cqn_handler(nq, (cq))) num_cqne_processed++; else dev_warn(&nq->pdev->dev, "QPLIB: cqn - type 0x%x not handled", type); + spin_unlock_bh(&cq->compl_lock); break; } case NQ_BASE_TYPE_DBQ_EVENT: @@ -190,6 +355,10 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) { + if (nq->cqn_wq) { + destroy_workqueue(nq->cqn_wq); + nq->cqn_wq = NULL; + } /* Make sure the HW is stopped! */ synchronize_irq(nq->vector); tasklet_disable(&nq->worker); @@ -216,7 +385,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, void *, u8 event)) { resource_size_t nq_base; - int rc; + int rc = -1; nq->pdev = pdev; nq->vector = msix_vector; @@ -227,6 +396,11 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq); + /* Have a task to schedule CQ notifiers in post send case */ + nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); + if (!nq->cqn_wq) + goto fail; + nq->requested = false; rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq); if (rc) { @@ -401,8 +575,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; @@ -615,8 +789,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + INIT_LIST_HEAD(&qp->sq_flush); + INIT_LIST_HEAD(&qp->rq_flush); + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; @@ -963,13 +1139,19 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, u16 cmd_flags = 0; int rc; + rcfw->qp_tbl[qp->id].qp_id = BNXT_QPLIB_QP_ID_INVALID; + rcfw->qp_tbl[qp->id].qp_handle = NULL; + RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags); req.qp_cid = cpu_to_le32(qp->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, 0); - if (rc) + if (rc) { + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = qp; return rc; + } /* Must walk the associated CQs to nullified the QP ptr */ spin_lock_irqsave(&qp->scq->hwq.lock, flags); @@ -1074,14 +1256,21 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swq *swq; struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; struct sq_sge *hw_sge; + struct bnxt_qplib_nq_work *nq_work = NULL; + bool sch_handler = false; u32 sw_prod; u8 wqe_size16; int i, rc = 0, data_len = 0, pkt_num = 0; __le32 temp32; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { - rc = -EINVAL; - goto done; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&sq->hwq.pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", + __func__); + goto queue_err; + } } if (bnxt_qplib_queue_full(sq)) { @@ -1301,12 +1490,35 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & SQ_PSN_SEARCH_NEXT_PSN_MASK)); } - +queue_err: + if (sch_handler) { + /* Store the ULP info in the software structures */ + sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + swq = &sq->swq[sw_prod]; + swq->wr_id = wqe->wr_id; + swq->type = wqe->type; + swq->flags = wqe->flags; + if (qp->sig_type) + swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; + swq->start_psn = sq->psn & BTH_PSN_MASK; + } sq->hwq.prod++; - qp->wqe_cnt++; done: + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->scq; + nq_work->nq = qp->scq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->scq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&sq->hwq.pdev->dev, + "QPLIB: FP: Failed to allocate SQ nq_work!"); + rc = -ENOMEM; + } + } return rc; } @@ -1334,15 +1546,17 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *rq = &qp->rq; struct rq_wqe *rqe, **rqe_ptr; struct sq_sge *hw_sge; + struct bnxt_qplib_nq_work *nq_work = NULL; + bool sch_handler = false; u32 sw_prod; int i, rc = 0; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - dev_err(&rq->hwq.pdev->dev, - "QPLIB: FP: QP (0x%x) is in the 0x%x state", - qp->id, qp->state); - rc = -EINVAL; - goto done; + sch_handler = true; + dev_dbg(&rq->hwq.pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", + __func__); + goto queue_err; } if (bnxt_qplib_queue_full(rq)) { dev_err(&rq->hwq.pdev->dev, @@ -1378,7 +1592,27 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, /* Supply the rqe->wr_id index to the wr_id_tbl for now */ rqe->wr_id[0] = cpu_to_le32(sw_prod); +queue_err: + if (sch_handler) { + /* Store the ULP info in the software structures */ + sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + rq->swq[sw_prod].wr_id = wqe->wr_id; + } + rq->hwq.prod++; + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->rcq; + nq_work->nq = qp->rcq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&rq->hwq.pdev->dev, + "QPLIB: FP: Failed to allocate RQ nq_work!"); + rc = -ENOMEM; + } + } done: return rc; } @@ -1471,6 +1705,9 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; cq->period = BNXT_QPLIB_QUEUE_START_PERIOD; init_waitqueue_head(&cq->waitq); + INIT_LIST_HEAD(&cq->sqf_head); + INIT_LIST_HEAD(&cq->rqf_head); + spin_lock_init(&cq->compl_lock); bnxt_qplib_arm_cq_enable(cq); return 0; @@ -1513,9 +1750,13 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, while (*budget) { sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); if (sw_cons == sw_prod) { - sq->flush_in_progress = false; break; } + /* Skip the FENCE WQE completions */ + if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { + bnxt_qplib_cancel_phantom_processing(qp); + goto skip_compl; + } memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; @@ -1525,6 +1766,7 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe->type = sq->swq[sw_cons].type; cqe++; (*budget)--; +skip_compl: sq->hwq.cons++; } *pcqe = cqe; @@ -1536,11 +1778,24 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, } static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, - int opcode, struct bnxt_qplib_cqe **pcqe, int *budget) + struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; u32 sw_prod, sw_cons; int rc = 0; + int opcode = 0; + + switch (qp->type) { + case CMDQ_CREATE_QP1_TYPE_GSI: + opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; + break; + case CMDQ_CREATE_QP_TYPE_RC: + opcode = CQ_BASE_CQE_TYPE_RES_RC; + break; + case CMDQ_CREATE_QP_TYPE_UD: + opcode = CQ_BASE_CQE_TYPE_RES_UD; + break; + } /* Flush the rest of the RQ */ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); @@ -1567,6 +1822,21 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, return rc; } +void bnxt_qplib_mark_qp_error(void *qp_handle) +{ + struct bnxt_qplib_qp *qp = qp_handle; + + if (!qp) + return; + + /* Must block new posting of SQ and RQ */ + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + bnxt_qplib_cancel_phantom_processing(qp); + + /* Add qp to flush list of the CQ */ + __bnxt_qplib_add_flush_qp(qp); +} + /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ @@ -1694,10 +1964,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, cqe_sq_cons, sq->hwq.max_elements); return -EINVAL; } - /* If we were in the middle of flushing the SQ, continue */ - if (sq->flush_in_progress) - goto flush; + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } /* Require to walk the sq's swq to fabricate CQEs for all previously * signaled SWQEs due to CQE aggregation from the current sq cons * to the cqe_sq_cons @@ -1733,11 +2005,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, sw_sq_cons, cqe->wr_id, cqe->status); cqe++; (*budget)--; - sq->flush_in_progress = true; - /* Must block new posting of SQ and RQ */ - qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; - sq->condition = false; - sq->single = false; + bnxt_qplib_lock_buddy_cq(qp, cq); + bnxt_qplib_mark_qp_error(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } else { if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { /* Before we complete, do WA 9060 */ @@ -1768,15 +2038,6 @@ out: * the WC for this CQE */ sq->single = false; - if (!sq->flush_in_progress) - goto done; -flush: - /* Require to walk the sq's swq to fabricate CQEs for all - * previously posted SWQEs due to the error CQE received - */ - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; done: return rc; } @@ -1798,6 +2059,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } + cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->length = le32_to_cpu(hwcqe->length); @@ -1817,8 +2084,6 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1827,12 +2092,13 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } + +done: return rc; } @@ -1853,6 +2119,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->length = le32_to_cpu(hwcqe->length); @@ -1876,8 +2147,6 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1886,12 +2155,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } +done: return rc; } @@ -1932,6 +2201,11 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: process_cq Raw/QP1 qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->flags = le16_to_cpu(hwcqe->flags); @@ -1960,8 +2234,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1970,13 +2242,13 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe, - budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } + +done: return rc; } @@ -1990,7 +2262,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe; u32 sw_cons = 0, cqe_cons; int rc = 0; - u8 opcode = 0; /* Check the Status */ if (hwcqe->status != CQ_TERMINAL_STATUS_OK) @@ -2005,6 +2276,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, "QPLIB: FP: CQ Process terminal qp is NULL"); return -EINVAL; } + /* Must block new posting of SQ and RQ */ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; @@ -2023,9 +2295,12 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons, sq->hwq.max_elements); goto do_rq; } - /* If we were in the middle of flushing, continue */ - if (sq->flush_in_progress) - goto flush_sq; + + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto sq_done; + } /* Terminal CQE can also include aggregated successful CQEs prior. * So we must complete all CQEs from the current sq's cons to the @@ -2055,11 +2330,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, rc = -EAGAIN; goto sq_done; } - sq->flush_in_progress = true; -flush_sq: - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; sq_done: if (rc) return rc; @@ -2075,26 +2345,23 @@ do_rq: cqe_cons, rq->hwq.max_elements); goto done; } + + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + rc = 0; + goto done; + } + /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR * from the current rq->cons to the rq->prod regardless what the * rq->cons the terminal CQE indicates */ - rq->flush_in_progress = true; - switch (qp->type) { - case CMDQ_CREATE_QP1_TYPE_GSI: - opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; - break; - case CMDQ_CREATE_QP_TYPE_RC: - opcode = CQ_BASE_CQE_TYPE_RES_RC; - break; - case CMDQ_CREATE_QP_TYPE_UD: - opcode = CQ_BASE_CQE_TYPE_RES_UD; - break; - } - rc = __flush_rq(rq, qp, opcode, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); done: return rc; } @@ -2115,6 +2382,33 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq, return 0; } +int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, + struct bnxt_qplib_cqe *cqe, + int num_cqes) +{ + struct bnxt_qplib_qp *qp = NULL; + u32 budget = num_cqes; + unsigned long flags; + + spin_lock_irqsave(&cq->hwq.lock, flags); + list_for_each_entry(qp, &cq->sqf_head, sq_flush) { + dev_dbg(&cq->hwq.pdev->dev, + "QPLIB: FP: Flushing SQ QP= %p", + qp); + __flush_sq(&qp->sq, qp, &cqe, &budget); + } + + list_for_each_entry(qp, &cq->rqf_head, rq_flush) { + dev_dbg(&cq->hwq.pdev->dev, + "QPLIB: FP: Flushing RQ QP= %p", + qp); + __flush_rq(&qp->rq, qp, &cqe, &budget); + } + spin_unlock_irqrestore(&cq->hwq.lock, flags); + + return num_cqes - budget; +} + int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { @@ -2205,6 +2499,7 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) spin_lock_irqsave(&cq->hwq.lock, flags); if (arm_type) bnxt_qplib_arm_cq(cq, arm_type); - + /* Using cq->arm_state variable to track whether to issue cq handler */ + atomic_set(&cq->arm_state, 1); spin_unlock_irqrestore(&cq->hwq.lock, flags); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 19176e06c98a..23a26d5cbe18 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -220,19 +220,20 @@ struct bnxt_qplib_q { u16 q_full_delta; u16 max_sge; u32 psn; - bool flush_in_progress; bool condition; bool single; bool send_phantom; u32 phantom_wqe_cnt; u32 phantom_cqe_cnt; u32 next_cq_cons; + bool flushed; }; struct bnxt_qplib_qp { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; u64 qp_handle; +#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF u32 id; u8 type; u8 sig_type; @@ -296,6 +297,8 @@ struct bnxt_qplib_qp { dma_addr_t sq_hdr_buf_map; void *rq_hdr_buf; dma_addr_t rq_hdr_buf_map; + struct list_head sq_flush; + struct list_head rq_flush; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) @@ -351,6 +354,7 @@ struct bnxt_qplib_cq { u16 period; struct bnxt_qplib_hwq hwq; u32 cnq_hw_ring_id; + struct bnxt_qplib_nq *nq; bool resize_in_progress; struct scatterlist *sghead; u32 nmap; @@ -360,6 +364,9 @@ struct bnxt_qplib_cq { unsigned long flags; #define CQ_FLAGS_RESIZE_IN_PROG 1 wait_queue_head_t waitq; + struct list_head sqf_head, rqf_head; + atomic_t arm_state; + spinlock_t compl_lock; /* synch CQ handlers */ }; #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq) @@ -417,6 +424,13 @@ struct bnxt_qplib_nq { (struct bnxt_qplib_nq *nq, void *srq, u8 event); + struct workqueue_struct *cqn_wq; +}; + +struct bnxt_qplib_nq_work { + struct work_struct work; + struct bnxt_qplib_nq *nq; + struct bnxt_qplib_cq *cq; }; void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq); @@ -453,4 +467,13 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq); void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); +void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags); +void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags); +int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, + struct bnxt_qplib_cqe *cqe, + int num_cqes); #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 16e42754dbec..391bb7006e8f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -44,6 +44,9 @@ #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_rcfw.h" +#include "qplib_sp.h" +#include "qplib_fp.h" + static void bnxt_qplib_service_creq(unsigned long data); /* Hardware communication channel */ @@ -279,16 +282,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, struct creq_qp_event *qp_event) { struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + struct creq_qp_error_notification *err_event; struct bnxt_qplib_crsq *crsqe; unsigned long flags; + struct bnxt_qplib_qp *qp; u16 cbit, blocked = 0; u16 cookie; __le16 mcookie; + u32 qp_id; switch (qp_event->event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: + err_event = (struct creq_qp_error_notification *)qp_event; + qp_id = le32_to_cpu(err_event->xid); + qp = rcfw->qp_tbl[qp_id].qp_handle; dev_dbg(&rcfw->pdev->dev, "QPLIB: Received QP error notification"); + dev_dbg(&rcfw->pdev->dev, + "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", + qp_id, err_event->req_err_state_reason, + err_event->res_err_state_reason); + bnxt_qplib_acquire_cq_locks(qp, &flags); + bnxt_qplib_mark_qp_error(qp); + bnxt_qplib_release_cq_locks(qp, &flags); break; default: /* Command Response */ @@ -507,6 +523,7 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { + kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq); bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq); @@ -514,7 +531,8 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) } int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw) + struct bnxt_qplib_rcfw *rcfw, + int qp_tbl_sz) { rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; @@ -541,6 +559,12 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, if (!rcfw->crsqe_tbl) goto fail; + rcfw->qp_tbl_size = qp_tbl_sz; + rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node), + GFP_KERNEL); + if (!rcfw->qp_tbl) + goto fail; + return 0; fail: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 09ce121770cd..0ed312f17c8d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -148,6 +148,11 @@ struct bnxt_qplib_rcfw_sbuf { u32 size; }; +struct bnxt_qplib_qp_node { + u32 qp_id; /* QP id */ + void *qp_handle; /* ptr to qplib_qp */ +}; + /* RCFW Communication Channels */ struct bnxt_qplib_rcfw { struct pci_dev *pdev; @@ -181,11 +186,13 @@ struct bnxt_qplib_rcfw { /* Actual Cmd and Resp Queues */ struct bnxt_qplib_hwq cmdq; struct bnxt_qplib_crsq *crsqe_tbl; + int qp_tbl_size; + struct bnxt_qplib_qp_node *qp_tbl; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw); + struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, @@ -207,4 +214,5 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); +void bnxt_qplib_mark_qp_error(void *qp_handle); #endif /* __BNXT_QPLIB_RCFW_H__ */ -- cgit v1.2.3-59-g8ed1b From c85023e153e3824661d07307138fdeff41f6d86a Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 30 May 2017 09:42:54 +0300 Subject: IB/mlx5: Add raw ethernet local loopback support Currently, unicast/multicast loopback raw ethernet (non-RDMA) packets are sent back to the vport. A unicast loopback packet is the packet with destination MAC address the same as the source MAC address. For multicast, the destination MAC address is in the vport's multicast filter list. Moreover, the local loopback is not needed if there is one or none user space context. After this patch, the raw ethernet unicast and multicast local loopback are disabled by default. When there is more than one user space context, the local loopback is enabled. Note that when local loopback is disabled, raw ethernet packets are not looped back to the vport and are forwarded to the next routing level (eswitch, or multihost switch, or out to the wire depending on the configuration). Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 51 ++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 +++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 19 ++++++++++ 3 files changed, 73 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..85bf17a616d5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -58,6 +58,7 @@ #include #include "mlx5_ib.h" #include "cmd.h" +#include #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "5.0-0" @@ -1187,6 +1188,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con return 0; } +static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) +{ + int err; + + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); + if (err) + return err; + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return err; + + mutex_lock(&dev->lb_mutex); + dev->user_td++; + + if (dev->user_td == 2) + err = mlx5_nic_vport_update_local_lb(dev->mdev, true); + + mutex_unlock(&dev->lb_mutex); + return err; +} + +static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) +{ + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return; + + mutex_lock(&dev->lb_mutex); + dev->user_td--; + + if (dev->user_td < 2) + mlx5_nic_vport_update_local_lb(dev->mdev, false); + + mutex_unlock(&dev->lb_mutex); +} + static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -1295,8 +1335,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->upd_xlt_page_mutex); if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_core_alloc_transport_domain(dev->mdev, - &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); if (err) goto out_page; } @@ -1362,7 +1401,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_page: free_page(context->upd_xlt_page); @@ -1390,7 +1429,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); free_page(context->upd_xlt_page); deallocate_uars(dev, context); @@ -3797,6 +3836,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_umrc; } + if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(mdev, disable_local_lb)) + mutex_init(&dev->lb_mutex); + dev->ib_active = true; return dev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bdcf25410c99..d2b27647f939 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -652,9 +652,13 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; - struct mlx5_sq_bfreg bfreg; - struct mlx5_sq_bfreg fp_bfreg; - u8 umr_fence; + struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg fp_bfreg; + + /* protect the user_td */ + struct mutex lb_mutex; + u32 user_td; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c065132b956d..3cec683fd70f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_RFS_ACCEL #include #endif @@ -579,6 +580,18 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(dev, disable_local_lb)) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; @@ -1155,6 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fs; } + err = mlx5_core_set_hca_defaults(dev); + if (err) { + dev_err(&pdev->dev, "Failed to set hca defaults\n"); + goto err_fs; + } + #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_attach(dev->priv.eswitch); #endif -- cgit v1.2.3-59-g8ed1b From fd65f1b8eefdd20bd70cc920a7306e1e1b113d81 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 30 May 2017 09:56:05 +0300 Subject: IB/mlx5: Change logic for dispatching IB events for port state The old logic ignored link state. This led to missing IB events like when link goes down on the switch while admin state is up or to redundant events like when admin state goes up while link is down. To fix that, probe the port state on NETDEV events and compare to last known state to decide if IB events needs to be dispatched. FIxes: 5ec8c83e3ad3 ("IB/mlx5: Port events in RoCE now rely on netdev events") Signed-off-by: Moni Shoua Reviewed-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 33 +++++++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 85bf17a616d5..bfd9117da10c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -98,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) return mlx5_port_type_cap_to_rdma_ll(port_type_cap); } +static int get_port_state(struct ib_device *ibdev, + u8 port_num, + enum ib_port_state *state) +{ + struct ib_port_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + ret = mlx5_ib_query_port(ibdev, port_num, &attr); + if (!ret) + *state = attr.state; + return ret; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -115,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this, write_unlock(&ibdev->roce.netdev_lock); break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); @@ -128,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; + enum ib_port_state port_state; + if (get_port_state(&ibdev->ib_dev, 1, &port_state)) + return NOTIFY_DONE; + + if (ibdev->roce.last_port_state == port_state) + return NOTIFY_DONE; + + ibdev->roce.last_port_state = port_state; ibev.device = &ibdev->ib_dev; - ibev.event = (event == NETDEV_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + if (port_state == IB_PORT_DOWN) + ibev.event = IB_EVENT_PORT_ERR; + else if (port_state == IB_PORT_ACTIVE) + ibev.event = IB_EVENT_PORT_ACTIVE; + else + return NOTIFY_DONE; + ibev.element.port_num = 1; ib_dispatch_event(&ibev); } @@ -3793,6 +3821,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err = mlx5_enable_eth(dev); if (err) goto err_free_port; + dev->roce.last_port_state = IB_PORT_DOWN; } err = create_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b27647f939..5fb4547bbcb8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -616,6 +616,7 @@ struct mlx5_roce { struct net_device *netdev; struct notifier_block nb; atomic_t next_port; + enum ib_port_state last_port_state; }; struct mlx5_ib_dev { -- cgit v1.2.3-59-g8ed1b From 4a2da0b8c0782816f3ae6846ae7942fcbb0f8172 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 May 2017 10:05:15 +0300 Subject: IB/mlx5: Add debug control parameters for congestion control This patch adds debug control parameters for congestion control which can be read or written through debugfs. They are for reaction point and notification point nodes. These control parameters are as below: +------------------------------+-----------------------------------------+ | Name | Description | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate | When set target rate is updated to | | | current rate | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate_ati | When set update target rate based on | | | timer as well | |------------------------------+-----------------------------------------| |rp_time_reset | time between rate increase if no | | | CNP is received unit in usec | |------------------------------+-----------------------------------------| |rp_byte_reset | Number of bytes between rate inease if | | | no CNP is received | |------------------------------+-----------------------------------------| |rp_threshold | Threshold for reaction point rate | | | control | |------------------------------+-----------------------------------------| |rp_ai_rate | Rate for target rate, unit in Mbps | |------------------------------+-----------------------------------------| |rp_hai_rate | Rate for hyper increase state | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_min_dec_fac | Minimum factor by which the current | | | transmit rate can be changed when | | | processing a CNP, unit is percerntage | |------------------------------+-----------------------------------------| |rp_min_rate | Minimum value for rate limit, | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_rate_to_set_on_first_cnp | Rate that is set when first CNP is | | | received, unit is Mbps | |------------------------------+-----------------------------------------| |rp_dce_tcp_g | Used to calculate alpha | |------------------------------+-----------------------------------------| |rp_dce_tcp_rtt | Time between updates of alpha value, | | | unit is usec | |------------------------------+-----------------------------------------| |rp_rate_reduce_monitor_period | Minimum time between consecutive rate | | | reductions | |------------------------------+-----------------------------------------| |rp_initial_alpha_value | Initial value of alpha | |------------------------------+-----------------------------------------| |rp_gd | When CNP is received, flow rate is | | | reduced based on gd, rp_gd is given as | | | log2(rp_gd) | |------------------------------+-----------------------------------------| |np_cnp_dscp | dscp code point for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio_mode | 802.1p priority for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio | cnp priority mode | +------------------------------+-----------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/cmd.c | 20 ++ drivers/infiniband/hw/mlx5/cmd.h | 4 + drivers/infiniband/hw/mlx5/cong.c | 421 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 9 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 37 +++ include/linux/mlx5/mlx5_ifc.h | 3 +- 7 files changed, 493 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/cong.c (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 90ad2adc752f..bc6299697dda 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 18d5e1db93ed..470995fa38d2 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, MLX5_SET(query_cong_statistics_in, in, clear, reset); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } + +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + + MLX5_SET(query_cong_params_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_PARAMS); + MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, + void *in, int in_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; + + return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index fa09228193a6..af4c24596274 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -39,4 +39,8 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, bool reset, void *out, int out_size); +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size); +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, + void *in, int in_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c new file mode 100644 index 000000000000..2d32b519bb61 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mlx5_ib.h" +#include "cmd.h" + +enum mlx5_ib_cong_node_type { + MLX5_IB_RROCE_ECN_RP = 1, + MLX5_IB_RROCE_ECN_NP = 2, +}; + +static const char * const mlx5_ib_dbg_cc_name[] = { + "rp_clamp_tgt_rate", + "rp_clamp_tgt_rate_ati", + "rp_time_reset", + "rp_byte_reset", + "rp_threshold", + "rp_ai_rate", + "rp_hai_rate", + "rp_min_dec_fac", + "rp_min_rate", + "rp_rate_to_set_on_first_cnp", + "rp_dce_tcp_g", + "rp_dce_tcp_rtt", + "rp_rate_reduce_monitor_period", + "rp_initial_alpha_value", + "rp_gd", + "np_cnp_dscp", + "np_cnp_prio_mode", + "np_cnp_prio", +}; + +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) +#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) +#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) +#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) +#define MLX5_IB_RP_AI_RATE_ATTR BIT(7) +#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) +#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) +#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) +#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) +#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) +#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) +#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) +#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) +#define MLX5_IB_RP_GD_ATTR BIT(16) + +#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) +#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) + +static enum mlx5_ib_cong_node_type +mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset) +{ + if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE && + param_offset <= MLX5_IB_DBG_CC_RP_GD) + return MLX5_IB_RROCE_ECN_RP; + else + return MLX5_IB_RROCE_ECN_NP; +} + +static u32 mlx5_get_cc_param_val(void *field, int offset) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate); + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc); + case MLX5_IB_DBG_CC_RP_TIME_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset); + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset); + case MLX5_IB_DBG_CC_RP_THRESHOLD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_threshold); + case MLX5_IB_DBG_CC_RP_AI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate); + case MLX5_IB_DBG_CC_RP_HAI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate); + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac); + case MLX5_IB_DBG_CC_RP_MIN_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate); + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp); + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g); + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt); + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period); + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value); + case MLX5_IB_DBG_CC_RP_GD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_gd); + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_dscp); + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_prio_mode); + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_802p_prio); + default: + return 0; + } +} + +static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, + u32 var, u32 *attr_mask) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate, var); + break; + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc, var); + break; + case MLX5_IB_DBG_CC_RP_TIME_RESET: + *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset, var); + break; + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset, var); + break; + case MLX5_IB_DBG_CC_RP_THRESHOLD: + *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_threshold, var); + break; + case MLX5_IB_DBG_CC_RP_AI_RATE: + *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_HAI_RATE: + *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_RATE: + *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period, var); + break; + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value, var); + break; + case MLX5_IB_DBG_CC_RP_GD: + *attr_mask |= MLX5_IB_RP_GD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_gd, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); + break; + } +} + +static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); + void *out; + void *field; + int err; + enum mlx5_ib_cong_node_type node; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + node = mlx5_ib_param_to_node(offset); + + err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); + if (err) + goto free; + + field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); + *var = mlx5_get_cc_param_val(field, offset); + +free: + kvfree(out); + return err; +} + +static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); + void *in; + void *field; + enum mlx5_ib_cong_node_type node; + u32 attr_mask = 0; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_cong_params_in, in, opcode, + MLX5_CMD_OP_MODIFY_CONG_PARAMS); + + node = mlx5_ib_param_to_node(offset); + MLX5_SET(modify_cong_params_in, in, cong_protocol, node); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); + mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); + MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, + attr_mask); + + err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); + kvfree(in); + return err; +} + +static ssize_t set_param(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + char lbuf[11] = { }; + u32 var; + int ret; + + if (count > sizeof(lbuf)) + return -EINVAL; + + if (copy_from_user(lbuf, buf, count)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = '\0'; + + if (kstrtou32(lbuf, 0, &var)) + return -EINVAL; + + ret = mlx5_ib_set_cc_params(param->dev, offset, var); + return ret ? ret : count; +} + +static ssize_t get_param(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + u32 var = 0; + int ret; + char lbuf[11]; + + if (*pos) + return 0; + + ret = mlx5_ib_get_cc_params(param->dev, offset, &var); + if (ret) + return ret; + + ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var); + if (ret < 0) + return ret; + + if (copy_to_user(buf, lbuf, ret)) + return -EFAULT; + + *pos += ret; + return ret; +} + +static const struct file_operations dbg_cc_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = set_param, + .read = get_param, +}; + +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root || + !dev->dbg_cc_params || + !dev->dbg_cc_params->root) + return; + + debugfs_remove_recursive(dev->dbg_cc_params->root); + kfree(dev->dbg_cc_params); + dev->dbg_cc_params = NULL; +} + +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + int i; + + if (!mlx5_debugfs_root) + goto out; + + if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) || + !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) + goto out; + + dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL); + if (!dbg_cc_params) + goto out; + + dev->dbg_cc_params = dbg_cc_params; + + dbg_cc_params->root = debugfs_create_dir("cc_params", + dev->mdev->priv.dbg_root); + if (!dbg_cc_params->root) + goto err; + + for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + dbg_cc_params->params[i].offset = i; + dbg_cc_params->params[i].dev = dev; + dbg_cc_params->params[i].dentry = + debugfs_create_file(mlx5_ib_dbg_cc_name[i], + 0600, dbg_cc_params->root, + &dbg_cc_params->params[i], + &dbg_cc_fops); + if (!dbg_cc_params->params[i].dentry) + goto err; + } +out: return 0; + +err: + mlx5_ib_warn(dev, "cong debugfs failure\n"); + mlx5_ib_cleanup_cong_debugfs(dev); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bfd9117da10c..a903728627f1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3838,9 +3838,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_odp; } + err = mlx5_ib_init_cong_debugfs(dev); + if (err) + goto err_cnt; + dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_cnt; + goto err_cong; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3889,6 +3893,8 @@ err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); err_cnt: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cong: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -3923,6 +3929,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); + mlx5_ib_cleanup_cong_debugfs(dev); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5fb4547bbcb8..f0682f383b52 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -619,6 +619,39 @@ struct mlx5_roce { enum ib_port_state last_port_state; }; +struct mlx5_ib_dbg_param { + int offset; + struct mlx5_ib_dev *dev; + struct dentry *dentry; +}; + +enum mlx5_ib_dbg_cc_types { + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE, + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI, + MLX5_IB_DBG_CC_RP_TIME_RESET, + MLX5_IB_DBG_CC_RP_BYTE_RESET, + MLX5_IB_DBG_CC_RP_THRESHOLD, + MLX5_IB_DBG_CC_RP_AI_RATE, + MLX5_IB_DBG_CC_RP_HAI_RATE, + MLX5_IB_DBG_CC_RP_MIN_DEC_FAC, + MLX5_IB_DBG_CC_RP_MIN_RATE, + MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP, + MLX5_IB_DBG_CC_RP_DCE_TCP_G, + MLX5_IB_DBG_CC_RP_DCE_TCP_RTT, + MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD, + MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE, + MLX5_IB_DBG_CC_RP_GD, + MLX5_IB_DBG_CC_NP_CNP_DSCP, + MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, + MLX5_IB_DBG_CC_NP_CNP_PRIO, + MLX5_IB_DBG_CC_MAX, +}; + +struct mlx5_ib_dbg_cc_params { + struct dentry *root; + struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -655,6 +688,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ struct mutex lb_mutex; @@ -909,6 +943,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev); +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev); + /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57c75e8b3c19..3e697f672de5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1188,7 +1188,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_at_c0[0x12]; u8 cnp_dscp[0x6]; - u8 reserved_at_d8[0x5]; + u8 reserved_at_d8[0x4]; + u8 cnp_prio_mode[0x1]; u8 cnp_802p_prio[0x3]; u8 reserved_at_e0[0x720]; -- cgit v1.2.3-59-g8ed1b From 7ecf6d8ff154e6f7471ee537a400d43a5f3b1c57 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 30 May 2017 10:18:24 +0300 Subject: IB/mlx5: Restore IB guid/policy for virtual functions When a user sets port_guid, node_guid or policy of an IB virtual function, save this information in "struct mlx5_vf_context". This information will be restored later when pci_resume is called. To make sure this works, one can use aer-inject to generate PCI errors on mlx5 devices and verify if relevant fields are restored after PCI resume. Signed-off-by: Bodong Wang Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/ib_virt.c | 9 ++++++ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 42 +++++++++++++++++++++++++ include/linux/mlx5/driver.h | 17 +++++----- 3 files changed, 61 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index c1b9de800fe5..649a3364f838 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, } in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].policy = in->policy; out: kfree(in); @@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].node_guid = guid; kfree(in); return err; } @@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].port_guid = guid; kfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index bcdf7779c48d..090b29e05a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" @@ -44,6 +45,38 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -74,6 +107,15 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } sriov->vfs_ctx[vf].enabled = 1; sriov->enabled_vfs++; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..54221be5f69e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -162,6 +162,13 @@ enum dbg_rsc_type { MLX5_DBG_RSC_CQ, }; +enum port_state_policy { + MLX5_POLICY_DOWN = 0, + MLX5_POLICY_UP = 1, + MLX5_POLICY_FOLLOW = 2, + MLX5_POLICY_INVALID = 0xffffffff +}; + struct mlx5_field_desc { struct dentry *dent; int i; @@ -525,6 +532,9 @@ struct mlx5_mkey_table { struct mlx5_vf_context { int enabled; + u64 port_guid; + u64 node_guid; + enum port_state_policy policy; }; struct mlx5_core_sriov { @@ -842,13 +852,6 @@ struct mlx5_pas { u8 log_sz; }; -enum port_state_policy { - MLX5_POLICY_DOWN = 0, - MLX5_POLICY_UP = 1, - MLX5_POLICY_FOLLOW = 2, - MLX5_POLICY_INVALID = 0xffffffff -}; - enum phy_port_state { MLX5_AAA_111 }; -- cgit v1.2.3-59-g8ed1b From 03404e8ae652e02a5e3388224836cef53d7a0988 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:13 +0300 Subject: IB/mlx5: Add support to dropless RQ RQs that were configured for "delay drop" will prevent packet drops when their WQEs are depleted. Marking an RQ to be drop-less is done by setting delay_drop_en in RQ context using CREATE_RQ command. Since this feature is globally activated/deactivated by using the SET_DELAY_DROP command on all the marked RQs, we activated/deactivated it according to the number of RQs with 'delay_drop' enabled. When timeout is expired, then the feature is deactivated. Therefore the driver handles the delay drop timeout event and reactivate it. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 60 +++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 ++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 37 ++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 113 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a903728627f1..ad4b12decc23 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -697,6 +697,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_TSO; } + if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && + MLX5_CAP_GEN(dev->mdev, general_notification_event)) + props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ @@ -2752,6 +2756,24 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); } +static void delay_drop_handler(struct work_struct *work) +{ + int err; + struct mlx5_ib_delay_drop *delay_drop = + container_of(work, struct mlx5_ib_delay_drop, + delay_drop_work); + + mutex_lock(&delay_drop->lock); + err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, + delay_drop->timeout); + if (err) { + mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", + delay_drop->timeout); + delay_drop->activate = false; + } + mutex_unlock(&delay_drop->lock); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -2804,8 +2826,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibev.event = IB_EVENT_CLIENT_REREGISTER; port = (u8)param; break; + case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + goto out; default: - return; + goto out; } ibev.device = &ibdev->ib_dev; @@ -2813,7 +2838,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (port < 1 || port > ibdev->num_ports) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); - return; + goto out; } if (ibdev->ib_active) @@ -2821,6 +2846,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (fatal) ibdev->ib_active = false; + +out: + return; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -3623,6 +3651,26 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void cancel_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); +} + +static void init_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3862,11 +3910,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dev; + init_delay_drop(dev); + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { err = device_create_file(&dev->ib_dev.dev, mlx5_class_attributes[i]); if (err) - goto err_umrc; + goto err_delay_drop; } if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && @@ -3877,7 +3927,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) return dev; -err_umrc: +err_delay_drop: + cancel_delay_drop(dev); destroy_umrc_res(dev); err_dev: @@ -3924,6 +3975,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + cancel_delay_drop(dev); mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f0682f383b52..097f12dc65a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,6 +247,10 @@ struct mlx5_ib_wq { void *qend; }; +enum mlx5_ib_wq_flags { + MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, +}; + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; @@ -264,6 +268,7 @@ struct mlx5_ib_rwq { u32 wqe_count; u32 wqe_shift; int wq_sig; + u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; enum { @@ -652,6 +657,19 @@ struct mlx5_ib_dbg_cc_params { struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; }; +enum { + MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, +}; + +struct mlx5_ib_delay_drop { + struct mlx5_ib_dev *dev; + struct work_struct delay_drop_work; + /* serialize setting of delay drop */ + struct mutex lock; + u32 timeout; + bool activate; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -688,6 +706,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_delay_drop delay_drop; struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0889ff367c86..939553d5c25f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4597,6 +4597,24 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) } } +static int set_delay_drop(struct mlx5_ib_dev *dev) +{ + int err = 0; + + mutex_lock(&dev->delay_drop.lock); + if (dev->delay_drop.activate) + goto out; + + err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout); + if (err) + goto out; + + dev->delay_drop.activate = true; +out: + mutex_unlock(&dev->delay_drop.lock); + return err; +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4651,9 +4669,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(rqc, rqc, scatter_fcs, 1); } + if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + if (!(dev->ib_dev.attrs.raw_packet_caps & + IB_RAW_PACKET_CAP_DELAY_DROP)) { + mlx5_ib_dbg(dev, "Delay drop is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET(rqc, rqc, delay_drop_en, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); + if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + err = set_delay_drop(dev); + if (err) { + mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n", + err); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); + } else { + rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP; + } + } out: kvfree(in); return err; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4bc57647fad8..f350688a12fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2519,7 +2519,7 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x1]; + u8 delay_drop_en[0x1]; u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; -- cgit v1.2.3-59-g8ed1b From fe248c3a5837848717ed566fb4aefe66f43a5e53 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:14 +0300 Subject: IB/mlx5: Add delay drop configuration and statistics Add debugfs interface for monitor the number of delay drop timeout events and the number of existing dropless RQs in the system. In addition add debugfs interface for configuring the global timeout value which is used in the SET_DELAY_DROP command. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 103 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 ++++ drivers/infiniband/hw/mlx5/qp.c | 13 ++++- 3 files changed, 123 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ad4b12decc23..7455f95bf679 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -2763,6 +2764,8 @@ static void delay_drop_handler(struct work_struct *work) container_of(work, struct mlx5_ib_delay_drop, delay_drop_work); + atomic_inc(&delay_drop->events_cnt); + mutex_lock(&delay_drop->lock); err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, delay_drop->timeout); @@ -3651,12 +3654,107 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!dev->delay_drop.dbg) + return; + debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs); + kfree(dev->delay_drop.dbg); + dev->delay_drop.dbg = NULL; +} + static void cancel_delay_drop(struct mlx5_ib_dev *dev) { if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) return; cancel_work_sync(&dev->delay_drop.delay_drop_work); + delay_drop_debugfs_cleanup(dev); +} + +static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + char lbuf[20]; + int len; + + len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout); + return simple_read_from_buffer(buf, count, pos, lbuf, len); +} + +static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + u32 timeout; + u32 var; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS * + 1000); + if (timeout != var) + mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n", + timeout); + + delay_drop->timeout = timeout; + + return count; +} + +static const struct file_operations fops_delay_drop_timeout = { + .owner = THIS_MODULE, + .open = simple_open, + .write = delay_drop_timeout_write, + .read = delay_drop_timeout_read, +}; + +static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_delay_drop *dbg; + + if (!mlx5_debugfs_root) + return 0; + + dbg = kzalloc(sizeof(*dbg), GFP_KERNEL); + if (!dbg) + return -ENOMEM; + + dbg->dir_debugfs = + debugfs_create_dir("delay_drop", + dev->mdev->priv.dbg_root); + if (!dbg->dir_debugfs) + return -ENOMEM; + + dbg->events_cnt_debugfs = + debugfs_create_atomic_t("num_timeout_events", 0400, + dbg->dir_debugfs, + &dev->delay_drop.events_cnt); + if (!dbg->events_cnt_debugfs) + goto out_debugfs; + + dbg->rqs_cnt_debugfs = + debugfs_create_atomic_t("num_rqs", 0400, + dbg->dir_debugfs, + &dev->delay_drop.rqs_cnt); + if (!dbg->rqs_cnt_debugfs) + goto out_debugfs; + + dbg->timeout_debugfs = + debugfs_create_file("timeout", 0600, + dbg->dir_debugfs, + &dev->delay_drop, + &fops_delay_drop_timeout); + if (!dbg->timeout_debugfs) + goto out_debugfs; + + return 0; + +out_debugfs: + delay_drop_debugfs_cleanup(dev); + return -ENOMEM; } static void init_delay_drop(struct mlx5_ib_dev *dev) @@ -3669,6 +3767,11 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) dev->delay_drop.activate = false; dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (delay_drop_debugfs_init(dev)) + mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 097f12dc65a6..0316147a39a8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -661,6 +661,13 @@ enum { MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, }; +struct mlx5_ib_dbg_delay_drop { + struct dentry *dir_debugfs; + struct dentry *rqs_cnt_debugfs; + struct dentry *events_cnt_debugfs; + struct dentry *timeout_debugfs; +}; + struct mlx5_ib_delay_drop { struct mlx5_ib_dev *dev; struct work_struct delay_drop_work; @@ -668,6 +675,9 @@ struct mlx5_ib_delay_drop { struct mutex lock; u32 timeout; bool activate; + atomic_t events_cnt; + atomic_t rqs_cnt; + struct mlx5_ib_dbg_delay_drop *dbg; }; struct mlx5_ib_dev { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 939553d5c25f..c5d8ec839e99 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -675,10 +675,14 @@ err_umem: return err; } -static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq) { struct mlx5_ib_ucontext *context; + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) + atomic_dec(&dev->delay_drop.rqs_cnt); + context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) @@ -4612,6 +4616,9 @@ static int set_delay_drop(struct mlx5_ib_dev *dev) dev->delay_drop.activate = true; out: mutex_unlock(&dev->delay_drop.lock); + + if (!err) + atomic_inc(&dev->delay_drop.rqs_cnt); return err; } @@ -4824,7 +4831,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(pd, rwq); + destroy_user_rq(dev, pd, rwq); err: kfree(rwq); return ERR_PTR(err); @@ -4836,7 +4843,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq); kfree(rwq); return 0; -- cgit v1.2.3-59-g8ed1b From c2e53b2ce1ba351918ede492c0cb207f42e1228f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:08 +0300 Subject: IB/mlx5: Add support for QP with a given source QPN Allow user space applications to accelerate send and receive traffic which is typically handled by IPoIB ULP by creating a UD QP with a given source QPN of the IPoIB UD QP. UD QP with a given source QPN should basically be similar to RAW QP from point of view of its created resources. However, - Its TIS should point to the source QPN. - Modify can be done only on its state as the transport attributes are managed by its source QP. This patch manages below: - Creating/destroying/modifying UD QP with a given source QPN. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/qp.c | 72 ++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0316147a39a8..7ac991070020 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -383,6 +383,7 @@ struct mlx5_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; u32 rate_limit; + u32 underlay_qpn; }; struct mlx5_ib_cq_buf { @@ -404,6 +405,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, + MLX5_IB_QP_UNDERLAY = 1 << 10, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c5d8ec839e99..5c7ce9bd466e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx5_ib.h" /* not supported currently */ @@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - if (attr->qp_type == IB_QPT_RAW_PACKET) { + if (attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { @@ -1025,12 +1027,16 @@ static int is_connected(enum ib_qp_type qp_type) } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, struct mlx5_ib_sq *sq, u32 tdn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (qp->flags & MLX5_IB_QP_UNDERLAY) + MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); + return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } @@ -1233,7 +1239,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 tdn = mucontext->tdn; if (qp->sq.wqe_cnt) { - err = create_raw_packet_qp_tis(dev, sq, tdn); + err = create_raw_packet_qp_tis(dev, qp, sq, tdn); if (err) return err; @@ -1506,10 +1512,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 *in; int err; - base = init_attr->qp_type == IB_QPT_RAW_PACKET ? - &qp->raw_packet_qp.rq.base : - &qp->trans_qp.base; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -1591,10 +1593,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { + if (init_attr->qp_type != IB_QPT_UD || + (MLX5_CAP_GEN(dev->mdev, port_type) != + MLX5_CAP_PORT_TYPE_IB) || + !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) { + mlx5_ib_dbg(dev, "Source QP option isn't supported\n"); + return -EOPNOTSUPP; + } + + qp->flags |= MLX5_IB_QP_UNDERLAY; + qp->underlay_qpn = init_attr->source_qpn; + } } else { qp->wq_sig = !!wq_signature; } + base = (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; + qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, (pd && pd->uobject) ? &ucmd : NULL); @@ -1745,7 +1765,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_LSO; } - if (init_attr->qp_type == IB_QPT_RAW_PACKET) { + if (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, pd); @@ -1897,7 +1918,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; - struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct mlx5_ib_qp_base *base; unsigned long flags; int err; @@ -1906,12 +1927,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) return; } - base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? + base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { - if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) { err = mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); @@ -1950,7 +1973,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev->mdev, &base->mqp); @@ -2706,7 +2730,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; - } else if (ibqp->qp_type == IB_QPT_UD || + } else if ((ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { @@ -2803,6 +2828,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; + + /* Underlay port should be used - index 0 function per port */ + if (qp->flags & MLX5_IB_QP_UNDERLAY) + port_num = 0; + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->cnts.set_id) << 24); @@ -2828,7 +2858,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; @@ -2917,7 +2948,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (qp_type != MLX5_IB_QPT_REG_UMR && + if (qp->flags & MLX5_IB_QP_UNDERLAY) { + if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { + mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", + attr_mask); + goto out; + } + } else if (qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); @@ -4481,9 +4518,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); + /* Not all of output fields are applicable, make sure to zero them */ + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + memset(qp_attr, 0, sizeof(*qp_attr)); + mutex_lock(&qp->mutex); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; -- cgit v1.2.3-59-g8ed1b From 81e308804ba6b389ead57b54a259154a36a560a8 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:09 +0300 Subject: IB/mlx5: Add multicast flow steering support for underlay QP In order to add multicast flow steering support, there is need to block the attaching of mcg flow for underlay QP, recognize multicast IB_FLOW_SPEC_IPV4 based on its IP and enable creating/destroying flow for IB layer. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 43 ++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7455f95bf679..bf74b2ee2f9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2102,21 +2102,32 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, */ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) { - struct ib_flow_spec_eth *eth_spec; + union ib_flow_spec *flow_spec; if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->size < sizeof(struct ib_flow_attr) + - sizeof(struct ib_flow_spec_eth) || ib_attr->num_of_specs < 1) return false; - eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); - if (eth_spec->type != IB_FLOW_SPEC_ETH || - eth_spec->size != sizeof(*eth_spec)) + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); + return false; } static bool is_valid_ethertype(struct mlx5_core_dev *mdev, @@ -2594,8 +2605,14 @@ unlock: static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *mqp = to_mqp(ibqp); int err; + if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); + return -EOPNOTSUPP; + } + err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); if (err) mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", @@ -3941,18 +3958,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | -- cgit v1.2.3-59-g8ed1b From 1d54f89094e2d2067948550ec52aab93e15b4b0c Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:11 +0300 Subject: IB/mlx5: Report RX checksum capabilities for IPoIB Report RX checksum capabilities when 'ipoib_enhanced_offloads' capabilities are set and support checksum. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bf74b2ee2f9c..2757d445b042 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -702,6 +702,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_GEN(dev->mdev, general_notification_event)) props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && + MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap)) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ -- cgit v1.2.3-59-g8ed1b From 4c25b7a39005c9243a492b577c3e940eeac36a25 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 12 Jun 2017 10:36:15 +0300 Subject: IB/mlx5: Fix cached MR allocation flow When we have a miss in one order of the mkey cache, we try to get an mkey from a higher order. We still need to check that the higher order can be used with UMR before using it. Otherwise, we will get an mkey with 0 entries and the post send operation that is used to fill it will complete with the following error: mlx5_0:dump_cqe:275:(pid 0): dump error cqe 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0f007806 25000025 49ce59d2 Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib") Cc: # v4.10+ Signed-off-by: Majd Dibbiny Reviewed-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2c40a2e989d2..a0eb2f96179a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,6 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); +static int max_umr_order(struct mlx5_ib_dev *dev); static int use_umr(struct mlx5_ib_dev *dev, int order); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); @@ -491,16 +492,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent; + int last_umr_cache_entry; int c; int i; c = order2idx(dev, order); - if (c < 0 || c > MAX_UMR_CACHE_ENTRY) { + last_umr_cache_entry = order2idx(dev, max_umr_order(dev)); + if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } - for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) { + for (i = c; i <= last_umr_cache_entry; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); @@ -816,11 +819,16 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int use_umr(struct mlx5_ib_dev *dev, int order) +static int max_umr_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return order <= MAX_UMR_CACHE_ENTRY + 2; - return order <= MLX5_MAX_UMR_SHIFT; + return MAX_UMR_CACHE_ENTRY + 2; + return MLX5_MAX_UMR_SHIFT; +} + +static int use_umr(struct mlx5_ib_dev *dev, int order) +{ + return order <= max_umr_order(dev); } static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, -- cgit v1.2.3-59-g8ed1b From 3fffc82ad6c78fcc9d5d4eca089f00db14ab0358 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 10:36:16 +0300 Subject: IB/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- include/linux/mlx5/qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fff4ec13f620..66d19b611fe4 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit v1.2.3-59-g8ed1b From 58dcb60a226ee48cc66c96c27b751f06ec2bc5a9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 19 Jun 2017 07:19:37 +0300 Subject: IB/mlx5: Expose extended error counters This patch adds below requester and responder side error counters, which will be exposed by hardware counters interface and are supported as part of query Q counters command extension. +---------------------------+-------------------------------------+ | Name | Description | |---------------------------+-------------------------------------| |resp_local_length_error | Number of times responder detected | | | local length errors | |---------------------------+-------------------------------------| |resp_cqe_error | Number of CQEs completed with error | | | at responder | |---------------------------+-------------------------------------| |req_cqe_error | Number of CQEs completed with error | | | at requester | |---------------------------+-------------------------------------| |req_remote_invalid_request | Number of times requester detected | | | remote invalid request error | |---------------------------+-------------------------------------| |req_remote_access_error | Number of times requester detected | | | remote access error | |---------------------------+-------------------------------------| |resp_remote_access_error | Number of times responder detected | | | remote access error | |---------------------------+-------------------------------------| |resp_cqe_flush_error | Number of CQEs completed with | | | flushed with error at responder | |---------------------------+-------------------------------------| |req_cqe_flush_error | Number of CQEs completed with | | | flushed with error at requester | +---------------------------+-------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 ++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 44 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2757d445b042..9dd9759459fb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3432,6 +3432,17 @@ static const struct mlx5_ib_counter cong_cnts[] = { INIT_CONG_COUNTER(np_cnp_sent), }; +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; @@ -3456,6 +3467,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -3505,6 +3520,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f350688a12fa..5bae70eb25af 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; - u8 reserved_at_1b1[0x1]; + u8 enhanced_error_q_counters[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b3[0x1]; u8 disable_link_up[0x1]; @@ -3953,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0x4e0]; + u8 reserved_at_320[0xa0]; + + u8 resp_local_length_error[0x20]; + + u8 req_local_length_error[0x20]; + + u8 resp_local_qp_error[0x20]; + + u8 local_operation_error[0x20]; + + u8 resp_local_protection[0x20]; + + u8 req_local_protection[0x20]; + + u8 resp_cqe_error[0x20]; + + u8 req_cqe_error[0x20]; + + u8 req_mw_binding[0x20]; + + u8 req_bad_response[0x20]; + + u8 req_remote_invalid_request[0x20]; + + u8 resp_remote_invalid_request[0x20]; + + u8 req_remote_access_errors[0x20]; + + u8 resp_remote_access_errors[0x20]; + + u8 req_remote_operation_errors[0x20]; + + u8 req_transport_retries_exceeded[0x20]; + + u8 cq_overflow[0x20]; + + u8 resp_cqe_flush_error[0x20]; + + u8 req_cqe_flush_error[0x20]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_query_q_counter_in_bits { -- cgit v1.2.3-59-g8ed1b From ea30b966f7dd6bcfb20c98a7f99608c7bb10bfac Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 21 Jun 2017 09:26:28 +0300 Subject: IB/mlx4: Add inline-receive support When inline-receive is enabled, the HCA may write received data into the receive WQE. Inline-receive is enabled by setting its matching bit in the QP context and each single-packet message with payload not exceeding the receive WQE size will be delivered to the WQE. The completion report will indicate that the payload was placed to the WQE. It includes: 1) Return maximum supported size of inline-receive by the hardware in query_device vendor's data part. 2) Enable the feature when requested by the vendor data input. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 +++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 +++ drivers/infiniband/hw/mlx4/qp.c | 32 +++++++++++++++++++++++++------- include/uapi/rdma/mlx4-abi.h | 3 ++- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d1b43cbbfea7..e8c290edb1e1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,6 +563,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, } } + if (uhw->outlen >= resp.response_length + + sizeof(resp.max_inl_recv_sz)) { + resp.response_length += sizeof(resp.max_inl_recv_sz); + resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9db82e67e959..a6337f3161cf 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -318,6 +318,7 @@ struct mlx4_ib_qp { u8 sq_no_prefetch; u8 state; int mlx_type; + u32 inl_recv_sz; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; @@ -623,6 +624,8 @@ struct mlx4_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; __u64 hca_core_clock_offset; + __u32 max_inl_recv_sz; + __u32 reserved; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 75c0e6c5dd56..d1caa39a3943 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -377,7 +377,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp, + u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || @@ -385,18 +386,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; if (!has_rq) { - if (cap->max_recv_wr) + if (cap->max_recv_wr || inl_recv_sz) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { + u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + u32 wqe_size; + /* HW requires >= 1 RQ entry with >= 1 gather entry */ - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge || + inl_recv_sz > max_inl_recv_sz)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); - qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg); + qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz)); } /* leave userspace return values as they were, so as not to break ABI */ @@ -719,9 +726,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); - if (err) - goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; @@ -731,6 +735,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); + if (err) + goto err; + + qp->inl_recv_sz = ucmd.inl_recv_sz; qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); @@ -760,6 +770,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; } } else { + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, 0); + if (err) + goto err; + qp->sq_no_prefetch = 0; if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) @@ -1651,6 +1666,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (qp->inl_recv_sz) + context->param3 |= cpu_to_be32(1 << 25); + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index af431752655c..bf3bdba2f326 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -101,7 +101,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u8 reserved[5]; + __u32 inl_recv_sz; + __u8 reserved; }; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3-59-g8ed1b From f3301870161ca293cd14b20a802c5646da02407f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Jun 2017 09:29:36 +0300 Subject: (IB, net)/mlx4: Add resource utilization support Adding visibility of resource usage of QPs, CQs and counters used by virtual functions. This feature will be used to give the PF administrator more data while debugging VF status. Usage info was added to ALLOC_RES command, to notify the PF if the resource which is being reserved or allocated for the VF will be used by kernel driver or by user verbs. Updated reservation and allocation functions of QP, CQ and counter with additional usage parameter. Signed-off-by: Moshe Shemesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 2 ++ drivers/infiniband/hw/mlx4/main.c | 6 ++++-- drivers/infiniband/hw/mlx4/qp.c | 13 +++++++++---- drivers/net/ethernet/mellanox/mlx4/cq.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/main.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/qp.c | 5 +++-- include/linux/mlx4/device.h | 12 ++++++++++-- 11 files changed, 45 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ff931c580557..95382faa7ad1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_mtt; uar = &to_mucontext(context)->uar; + cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) @@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_db; uar = &dev->priv_uar; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } if (dev->eq_table) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e8c290edb1e1..0944e224c0df 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2779,7 +2779,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) allocated = 0; if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, &counter_index); + err = mlx4_counter_alloc(ibdev->dev, &counter_index, + MLX4_RES_USAGE_DRIVER); /* if failed to allocate a new counter, use default */ if (err) counter_index = @@ -2834,7 +2835,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, - &ibdev->steer_qpn_base, 0); + &ibdev->steer_qpn_base, 0, + MLX4_RES_USAGE_DRIVER); if (err) goto err_counter; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d1caa39a3943..247b9132e9de 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -769,6 +769,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; } + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp, 0); @@ -841,6 +842,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_wrid; } + qp->mqp.usage = MLX4_RES_USAGE_DRIVER; } if (sqpn) { @@ -860,13 +862,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, (init_attr->cap.max_send_wr ? MLX4_RESERVE_ETH_BF_QP : 0) | (init_attr->cap.max_recv_wr ? - MLX4_RESERVE_A0_QP : 0)); + MLX4_RESERVE_A0_QP : 0), + qp->mqp.usage); else if (qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, - &qpn, 0); + &qpn, 0, qp->mqp.usage); if (err) goto err_proxy; } @@ -1218,7 +1221,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { - int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); + int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, + 1, 1, &sqpn, 0, + MLX4_RES_USAGE_DRIVER); if (res) return ERR_PTR(res); @@ -1581,7 +1586,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) return 0; - err = mlx4_counter_alloc(dev->dev, &tmp_idx); + err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index c56a511b918e..72eb50cd5ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -241,13 +241,14 @@ err_out: return err; } -static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) +static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage) { + u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -303,7 +304,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->vector = vector; - err = mlx4_cq_alloc_icm(dev, &cq->cqn); + err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 85fe17e4dcfb..f849eec21824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -140,6 +140,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, (cq->type == RX && priv->hwtstamp_config.rx_filter)) timestamp_en = 1; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3a291fc1780a..e3e6d9fa69fd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -651,7 +651,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return 0; } - err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 436f7689a032..ad1ffd5857cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1081,7 +1081,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, - MLX4_RESERVE_A0_QP); + MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; @@ -1127,7 +1128,8 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn, flags); + &rss_map->base_qpn, flags, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 73faa3d77921..a81db2582555 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -105,7 +105,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, (unsigned long long) ring->sp_wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, - MLX4_RESERVE_ETH_BF_QP); + MLX4_RESERVE_ETH_BF_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_hwq_res; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a27c9c13a36e..fb2591d0e735 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2475,7 +2475,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) priv->def_counter[port] = -1; for (port = 0; port < dev->caps.num_ports; port++) { - err = mlx4_counter_alloc(dev, &idx); + err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER); if (!err || err == -ENOSPC) { priv->def_counter[port] = idx; @@ -2517,13 +2517,14 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return 0; } -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage) { + u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 26747212526b..5e5b4475b85e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -245,8 +245,9 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags) + int *base, u8 flags, u8 usage) { + u32 in_modifier = RES_QP | (((u32)usage & 3) << 30); u64 in_param = 0; u64 out_param; int err; @@ -258,7 +259,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, - RES_QP, RES_OP_RESERVE, + in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..3607da001ad3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -428,6 +428,12 @@ enum mlx4_steer_type { MLX4_NUM_STEERS }; +enum mlx4_resource_usage { + MLX4_RES_USAGE_NONE, + MLX4_RES_USAGE_DRIVER, + MLX4_RES_USAGE_USER_VERBS, +}; + enum { MLX4_NUM_FEXCH = 64 * 1024, }; @@ -748,6 +754,7 @@ struct mlx4_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + u8 usage; }; struct mlx4_qp { @@ -757,6 +764,7 @@ struct mlx4_qp { atomic_t refcount; struct completion free; + u8 usage; }; struct mlx4_srq { @@ -1120,7 +1128,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags); + int *base, u8 flags, u8 usage); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); @@ -1417,7 +1425,7 @@ int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); -- cgit v1.2.3-59-g8ed1b From 400b1ebcfe31279895f1baa8ecaa390d9a4a0eef Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:24 +0300 Subject: IB/mlx4: Add support for WQ related verbs Support create/modify/destroy WQ related verbs. The base IB object to enable RSS functionality is a WQ (i.e. ib_wq). This patch implements the related WQ verbs as of create, modify and destroy. In downstream patches the WQ will be used as part of an indirection table (i.e. ib_rwq_ind_table) to enable RSS QP creation. Notes: ConnectX-3 hardware requires consecutive WQNs list as receive descriptor queues for the RSS QP. Hence, the driver manages consecutive ranges lists per context which the user must respect. Destroying the WQ does not return its WQN back to its range for reusing. However, destroying all WQs from the same range releases the range and in turn releases its WQNs for reusing. Since the WQ object is not a natural object in the hardware, the driver implements the WQ by the hardware QP. As such, the WQ inherits its port from its RSS QP parent upon its RST->INIT transition and by that time its state is applied to the hardware. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 24 ++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 25 +- drivers/infiniband/hw/mlx4/qp.c | 505 +++++++++++++++++++++++++++++++---- include/uapi/rdma/mlx4-abi.h | 14 + 4 files changed, 511 insertions(+), 57 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0944e224c0df..7c6f929ebd3e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -81,6 +81,8 @@ static const char mlx4_ib_version[] = DRV_VERSION "\n"; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); +static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, + u8 port_num); static struct workqueue_struct *wq; @@ -552,6 +554,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0xFFFFFFFFFFFFULL; props->max_ah = INT_MAX; + if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) + props->max_wq_type_rq = props->max_qp; + if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); @@ -1076,6 +1083,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); + INIT_LIST_HEAD(&context->wqn_ranges_list); + mutex_init(&context->wqn_ranges_mutex); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else @@ -2720,6 +2730,20 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == + IB_LINK_LAYER_ETHERNET) || + (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == + IB_LINK_LAYER_ETHERNET))) { + ibdev->ib_dev.create_wq = mlx4_ib_create_wq; + ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; + ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + } + if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index a6337f3161cf..ba4e78c064d2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -88,6 +88,8 @@ struct mlx4_ib_ucontext { struct list_head db_page_list; struct mutex db_page_mutex; struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; + struct list_head wqn_ranges_list; + struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ }; struct mlx4_ib_pd { @@ -289,8 +291,19 @@ struct mlx4_roce_smac_vlan_info { int update_vid; }; +struct mlx4_wqn_range { + int base_wqn; + int size; + int refcount; + bool dirty; + struct list_head list; +}; + struct mlx4_ib_qp { - struct ib_qp ibqp; + union { + struct ib_qp ibqp; + struct ib_wq ibwq; + }; struct mlx4_qp mqp; struct mlx4_buf buf; @@ -329,6 +342,9 @@ struct mlx4_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; struct counter_index *counter_index; + struct mlx4_wqn_range *wqn_range; + /* Number of RSS QP parents that uses this WQ */ + u32 rss_usecnt; }; struct mlx4_ib_srq { @@ -893,4 +909,11 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq); +int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 247b9132e9de..65e4ec549368 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -116,6 +116,11 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; +enum mlx4_ib_source_type { + MLX4_IB_QP_SRC = 0, + MLX4_IB_RWQ_SRC = 1, +}; + static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); @@ -330,6 +335,12 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } +static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type) +{ + pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n", + type, qp->qpn); +} + static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* @@ -639,7 +650,91 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +/* + * This function allocates a WQN from a range which is consecutive and aligned + * to its size. In case the range is full, then it creates a new range and + * allocates WQN from it. The new range will be used for following allocations. + */ +static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, int range_size, int *wqn) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + int err = 0; + + mutex_lock(&context->wqn_ranges_mutex); + + range = list_first_entry_or_null(&context->wqn_ranges_list, + struct mlx4_wqn_range, list); + + if (!range || (range->refcount == range->size) || range->dirty) { + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + err = -ENOMEM; + goto out; + } + + err = mlx4_qp_reserve_range(dev->dev, range_size, + range_size, &range->base_wqn, 0, + qp->mqp.usage); + if (err) { + kfree(range); + goto out; + } + + range->size = range_size; + list_add(&range->list, &context->wqn_ranges_list); + } else if (range_size != 1) { + /* + * Requesting a new range (>1) when last range is still open, is + * not valid. + */ + err = -EINVAL; + goto out; + } + + qp->wqn_range = range; + + *wqn = range->base_wqn + range->refcount; + + range->refcount++; + +out: + mutex_unlock(&context->wqn_ranges_mutex); + + return err; +} + +static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, bool dirty_release) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + + mutex_lock(&context->wqn_ranges_mutex); + + range = qp->wqn_range; + + range->refcount--; + if (!range->refcount) { + mlx4_qp_release_range(dev->dev, range->base_wqn, + range->size); + list_del(&range->list); + kfree(range); + } else if (dirty_release) { + /* + * A range which one of its WQNs is destroyed, won't be able to be + * reused for further WQN allocations. + * The next created WQ will allocate a new range. + */ + range->dirty = 1; + } + + mutex_unlock(&context->wqn_ranges_mutex); +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + enum mlx4_ib_source_type src, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) @@ -652,6 +747,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; unsigned long flags; + int range_size = 0; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { @@ -728,27 +824,69 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (pd->uobject) { - struct mlx4_ib_create_qp ucmd; + union { + struct mlx4_ib_create_qp qp; + struct mlx4_ib_create_wq wq; + } ucmd; + size_t copy_len; + + copy_len = (src == MLX4_IB_QP_SRC) ? + sizeof(struct mlx4_ib_create_qp) : + min(sizeof(struct mlx4_ib_create_wq), udata->inlen); - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + if (ib_copy_from_udata(&ucmd, udata, copy_len)) { err = -EFAULT; goto err; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, - qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); - if (err) - goto err; + if (src == MLX4_IB_RWQ_SRC) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || + ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || + ucmd.wq.reserved[2]) { + pr_debug("user command isn't supported\n"); + err = -EOPNOTSUPP; + goto err; + } - qp->inl_recv_sz = ucmd.inl_recv_sz; - qp->sq_no_prefetch = ucmd.sq_no_prefetch; + if (ucmd.wq.log_range_size > + ilog2(dev->dev->caps.max_rss_tbl_sz)) { + pr_debug("WQN range size must be equal or smaller than %d\n", + dev->dev->caps.max_rss_tbl_sz); + err = -EOPNOTSUPP; + goto err; + } + range_size = 1 << ucmd.wq.log_range_size; + } else { + qp->inl_recv_sz = ucmd.qp.inl_recv_sz; + } - err = set_user_sq_size(dev, qp, &ucmd); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); + if (src == MLX4_IB_QP_SRC) { + qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch; + + err = set_user_sq_size(dev, qp, + (struct mlx4_ib_create_qp *) + &ucmd); + if (err) + goto err; + } else { + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + /* Allocated buffer expects to have at least that SQ + * size. + */ + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + } + + qp->umem = ib_umem_get(pd->uobject->context, + (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : + ucmd.wq.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -765,7 +903,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &qp->db); + (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : + ucmd.wq.db_addr, &qp->db); if (err) goto err_mtt; } @@ -853,6 +992,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_wrid; } } + } else if (src == MLX4_IB_RWQ_SRC) { + err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp, + range_size, &qpn); + if (err) + goto err_wrid; } else { /* Raw packet QPNs may not have bits 6,7 set in their qp_num; * otherwise, the WQE BlueFlame setup flow wrongly causes @@ -891,7 +1035,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); - qp->mqp.event = mlx4_ib_qp_event; + qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event : + mlx4_ib_wq_event; + if (!*caller_qp) *caller_qp = qp; @@ -918,6 +1064,9 @@ err_qpn: if (!sqpn) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext(pd->uobject->context), + qp, 0); else mlx4_qp_release_range(dev->dev, qpn, 1); } @@ -1016,7 +1165,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx4_ib_qp *qp, +static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { @@ -1029,14 +1178,16 @@ static void get_cqs(struct mlx4_ib_qp *qp, *recv_cq = *send_cq; break; default: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) : + to_mcq(qp->ibwq.cq); + *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) : + *recv_cq; break; } } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - int is_user) + enum mlx4_ib_source_type src, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1069,7 +1220,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -1095,6 +1246,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext( + qp->ibwq.uobject->context), qp, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } @@ -1102,9 +1256,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (qp->rq.wqe_cnt) - mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), - &qp->db); + if (qp->rq.wqe_cnt) { + struct mlx4_ib_ucontext *mcontext = !src ? + to_mucontext(qp->ibqp.uobject->context) : + to_mucontext(qp->ibwq.uobject->context); + mlx4_ib_db_unmap_user(mcontext, &qp->db); + } ib_umem_release(qp->umem); } else { kvfree(qp->sq.wrid); @@ -1200,8 +1357,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_UD: { - err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1231,8 +1388,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, sqpn, &qp); if (err) return ERR_PTR(err); @@ -1303,7 +1460,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) mlx4_ib_free_qp_counter(dev, mqp); pd = get_pd(mqp); - destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1626,12 +1783,15 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } -static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, +static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { - struct mlx4_ib_dev *dev = to_mdev(ibqp->device); - struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct ib_uobject *ibuobject; + struct ib_srq *ibsrq; + enum ib_qp_type qp_type; + struct mlx4_ib_dev *dev; + struct mlx4_ib_qp *qp; struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; @@ -1641,6 +1801,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int err = -EINVAL; int counter_index; + if (src_type == MLX4_IB_RWQ_SRC) { + struct ib_wq *ibwq; + + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); + } else { + struct ib_qp *ibqp; + + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); + } + /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && rdma_port_get_link_layer(&dev->ib_dev, qp->port) == @@ -1674,11 +1856,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->inl_recv_sz) context->param3 |= cpu_to_be32(1 << 25); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; - else if (ibqp->qp_type == IB_QPT_RAW_PACKET) + else if (qp_type == IB_QPT_RAW_PACKET) context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; - else if (ibqp->qp_type == IB_QPT_UD) { + else if (qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); @@ -1708,14 +1890,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); - if (ibqp->qp_type == IB_QPT_RAW_PACKET) + if (qp_type == IB_QPT_RAW_PACKET) context->param3 |= cpu_to_be32(1 << 30); } - if (qp->ibqp.uobject) + if (ibuobject) context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, - to_mucontext(ibqp->uobject->context)->uar.index)); + to_mucontext(ibuobject->context) + ->uar.index)); else context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); @@ -1759,7 +1942,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, steer_qp = 1; } - if (ibqp->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; u8 qpc_roce_mode = gid_type_to_qpc(gid_type); @@ -1776,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; @@ -1791,7 +1974,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int index = rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - status = ib_get_cached_gid(ibqp->device, port_num, + status = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &gid_attr); if (!status && !memcmp(&gid, &zgid, sizeof(gid))) status = -ENOENT; @@ -1848,15 +2031,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src_type, &send_cq, &recv_cq); context->pd = cpu_to_be32(pd->pdn); context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ - if (!qp->ibqp.uobject) + if (!ibuobject) context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { @@ -1891,7 +2073,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } - if (ibqp->srq) + if (ibsrq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { @@ -1922,17 +2104,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_Q_KEY; } - if (ibqp->srq) - context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); + if (ibsrq) + context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibsrq)->msrq.srqn); - if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && - (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD || - ibqp->qp_type == IB_QPT_RAW_PACKET)) { + (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI || + qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type & @@ -1965,7 +2149,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp_type == IB_QPT_RAW_PACKET) { context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { @@ -1975,7 +2159,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { + if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { int is_eth = rdma_port_get_link_layer( &dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; @@ -1985,14 +2169,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->rlkey_roce_mode |= (1 << 4); /* @@ -2001,7 +2186,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; @@ -2058,9 +2245,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET) { - if (!ibqp->uobject) { + if (!ibuobject) { mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq) : NULL); + ibsrq ? to_msrq(ibsrq) : NULL); if (send_cq != recv_cq) mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); @@ -2265,7 +2452,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, + cur_state, new_state); if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3550,3 +3738,208 @@ out: return err; } +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev; + struct ib_qp_init_attr ib_qp_init_attr; + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_wq ucmd; + int err, required_cmd_sz; + + if (!(udata && pd->uobject)) + return ERR_PTR(-EINVAL); + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + dev = to_mdev(pd->device); + + if (init_attr->wq_type != IB_WQT_RQ) { + pr_debug("unsupported wq type %d\n", init_attr->wq_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("unsupported create_flags %u\n", + init_attr->create_flags); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr)); + ib_qp_init_attr.qp_context = init_attr->wq_context; + ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET; + ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr; + ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge; + ib_qp_init_attr.recv_cq = init_attr->cq; + ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */ + + err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr, + udata, 0, &qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibwq.event_handler = init_attr->event_handler; + qp->ibwq.wq_num = qp->mqp.qpn; + qp->ibwq.state = IB_WQS_RESET; + + return &qp->ibwq; +} + +static int ib_wq2qp_state(enum ib_wq_state state) +{ + switch (state) { + case IB_WQS_RESET: + return IB_QPS_RESET; + case IB_WQS_RDY: + return IB_QPS_RTR; + default: + return IB_QPS_ERR; + } +} + +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + enum ib_qp_state qp_cur_state; + enum ib_qp_state qp_new_state; + int attr_mask; + int err; + + /* ib_qp.state represents the WQ HW state while ib_wq.state represents + * the WQ logic state. + */ + qp_cur_state = qp->state; + qp_new_state = ib_wq2qp_state(new_state); + + if (ib_wq2qp_state(new_state) == qp_cur_state) + return 0; + + if (new_state == IB_WQS_RDY) { + struct ib_qp_attr attr = {}; + + attr.port_num = qp->port; + attr_mask = IB_QP_PORT; + + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr, + attr_mask, IB_QPS_RESET, IB_QPS_INIT); + if (err) { + pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n", + ibwq->wq_num); + return err; + } + + qp_cur_state = IB_QPS_INIT; + } + + attr_mask = 0; + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask, + qp_cur_state, qp_new_state); + + if (err && (qp_cur_state == IB_QPS_INIT)) { + qp_new_state = IB_QPS_RESET; + if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, + attr_mask, IB_QPS_INIT, IB_QPS_RESET)) { + pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n", + ibwq->wq_num); + qp_new_state = IB_QPS_INIT; + } + } + + qp->state = qp_new_state; + + return err; +} + +int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + struct mlx4_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + enum ib_wq_state cur_state, new_state; + int err = 0; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + if (wq_attr_mask & IB_WQ_FLAGS) + return -EOPNOTSUPP; + + cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state : + ibwq->state; + new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state; + + if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR || + new_state < IB_WQS_RESET || new_state > IB_WQS_ERR) + return -EINVAL; + + if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR)) + return -EINVAL; + + if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) + return -EINVAL; + + /* Can update HW state only if a RSS QP has already associated to this + * WQ, so we can apply its port on the WQ. + */ + if (qp->rss_usecnt) + err = _mlx4_ib_modify_wq(ibwq, new_state); + + if (!err) + ibwq->state = new_state; + + return err; +} + +int mlx4_ib_destroy_wq(struct ib_wq *ibwq) +{ + struct mlx4_ib_dev *dev = to_mdev(ibwq->device); + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + + if (qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + + destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1); + + kfree(qp); + + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index bf3bdba2f326..c9702a5f0bda 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -105,4 +105,18 @@ struct mlx4_ib_create_qp { __u8 reserved; }; +struct mlx4_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u8 log_range_size; + __u8 reserved[3]; + __u32 comp_mask; + __u32 reserved1; +}; + +struct mlx4_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3-59-g8ed1b From b8d46ca035060e70f5f0da849d86720752d5aa17 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:25 +0300 Subject: IB/mlx4: Add support for WQ indirection table related verbs To enable RSS functionality the IB indirection table object (i.e. ib_rwq_ind_table) should be used. This patch implements the related verbs as of create and destroy an indirection table. In downstream patches the indirection table will be used as part of RSS QP creation. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 12 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 6 ++++ drivers/infiniband/hw/mlx4/qp.c | 70 ++++++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx4-abi.h | 4 +++ 4 files changed, 89 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7c6f929ebd3e..b42234571a8c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2738,10 +2738,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.create_wq = mlx4_ib_create_wq; ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.create_rwq_ind_table = + mlx4_ib_create_rwq_ind_table; + ibdev->ib_dev.destroy_rwq_ind_table = + mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } if (!mlx4_is_slave(ibdev->dev)) { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ba4e78c064d2..85525bc400a0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -916,4 +916,10 @@ int mlx4_ib_destroy_wq(struct ib_wq *wq); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 65e4ec549368..519919d15474 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3943,3 +3943,73 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq) return 0; } + +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_rwq_ind_table *rwq_ind_table; + struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; + unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + unsigned int base_wqn; + size_t min_resp_len; + int i; + int err; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + if (ind_tbl_size > + device->attrs.rss_caps.max_rwq_indirection_table_size) { + pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", + ind_tbl_size, + device->attrs.rss_caps.max_rwq_indirection_table_size); + return ERR_PTR(-EINVAL); + } + + base_wqn = init_attr->ind_tbl[0]->wq_num; + + if (base_wqn % ind_tbl_size) { + pr_debug("WQN=0x%x isn't aligned with indirection table size\n", + base_wqn); + return ERR_PTR(-EINVAL); + } + + for (i = 1; i < ind_tbl_size; i++) { + if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { + pr_debug("indirection table's WQNs aren't consecutive\n"); + return ERR_PTR(-EINVAL); + } + } + + rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); + if (!rwq_ind_table) + return ERR_PTR(-ENOMEM); + + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err; + } + + return rwq_ind_table; + +err: + kfree(rwq_ind_table); + return ERR_PTR(err); +} + +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + kfree(ib_rwq_ind_tbl); + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index c9702a5f0bda..5591d955ba00 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -119,4 +119,8 @@ struct mlx4_ib_modify_wq { __u32 reserved; }; +struct mlx4_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3-59-g8ed1b From 3078f5f1bd8b6c8aef77b8ef4d49671fa6eb058e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:26 +0300 Subject: IB/mlx4: Add support for RSS QP Add support to work with a RSS QP by using an indirection table object upon QP creation. Other related QP verbs (e.g. modify/destroy/query) were updated as well for that QP mode. Notes: - The RX hash properties are supplied as driver private data. - The RSS QP port is used on the associated WQs in its indirection table. Applying different ports during WQ life time is not allowed. - The expected RSS QP flow is: create, modify(RST->INIT), modify(RST->RTR), destroy. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 + drivers/infiniband/hw/mlx4/qp.c | 453 +++++++++++++++++++++++++++++++++-- include/uapi/rdma/mlx4-abi.h | 33 +++ 3 files changed, 472 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 85525bc400a0..1fa19820355a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -46,6 +46,7 @@ #include #include +#include #define MLX4_IB_DRV_NAME "mlx4_ib" @@ -299,6 +300,12 @@ struct mlx4_wqn_range { struct list_head list; }; +struct mlx4_ib_rss { + unsigned int base_qpn_tbl_sz; + u8 flags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -345,6 +352,7 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; + struct mlx4_ib_rss *rss_ctx; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 519919d15474..e42acfb20588 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -53,6 +53,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state); enum { MLX4_IB_ACK_REQ_FREQ = 8, @@ -650,6 +651,212 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd) +{ + rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num | + (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24); + + if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) && + (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) { + memcpy(rss_ctx->rss_key, ucmd->rx_hash_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + pr_debug("RX Hash function is not supported\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + rss_ctx->flags = MLX4_RSS_IPV4; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + rss_ctx->flags |= MLX4_RSS_IPV6; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) { + pr_debug("RX Hash fields_mask for UDP is not supported\n"); + return (-EOPNOTSUPP); + } + + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + return 0; +} + +static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd, + struct mlx4_ib_qp *qp) +{ + int qpn; + int err; + + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; + + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage); + if (err) + return err; + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + if (err) + goto err_qpn; + + mutex_init(&qp->mutex); + + INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); + + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE; + qp->state = IB_QPS_RESET; + + /* Set dummy send resources to be compatible with HV and PRM */ + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE; + qp->mtt = (to_mqp( + (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt; + + qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL); + if (!qp->rss_ctx) { + err = -ENOMEM; + goto err_qp_alloc; + } + + err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd); + if (err) + goto err; + + return 0; + +err: + kfree(qp->rss_ctx); + +err_qp_alloc: + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + +err_qpn: + mlx4_qp_release_range(dev->dev, qpn, 1); + return err; +} + +static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + int err; + + if (!udata) { + pr_debug("RSS QP with NULL udata\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + pr_debug("copy failed\n"); + return ERR_PTR(-EFAULT); + } + + if (ucmd.comp_mask || ucmd.reserved1) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + pr_debug("RSS QP with unsupported QP type %d\n", + init_attr->qp_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("RSS QP doesn't support create flags\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->send_cq || init_attr->cap.max_send_wr) { + pr_debug("RSS QP with unsupported send attributes\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibqp.qp_num = qp->mqp.qpn; + + return &qp->ibqp; +} + /* * This function allocates a WQN from a range which is consecutive and aligned * to its size. In case the range is full, then it creates a new range and @@ -1186,6 +1393,36 @@ static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, } } +static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + if (qp->state != IB_QPS_RESET) { + int i; + + for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size); + i++) { + struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + + if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), + MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) + pr_warn("modify QP %06x to RESET failed.\n", + qp->mqp.qpn); + } + + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + del_gid_entries(qp); + kfree(qp->rss_ctx); +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, int is_user) { @@ -1303,6 +1540,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; + if (init_attr->rwq_ind_tbl) + return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1444,7 +1684,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); - struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); @@ -1459,8 +1698,14 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (mqp->counter_index) mlx4_ib_free_qp_counter(dev, mqp); - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + if (qp->rwq_ind_tbl) { + destroy_qp_rss(dev, mqp); + } else { + struct mlx4_ib_pd *pd; + + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + } if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1783,12 +2028,116 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } +/* + * Go over all RSS QP's childes (WQs) and apply their HW state according to + * their logic state if the RSS QP is the first RSS QP associated for the WQ. + */ +static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num) +{ + int i; + int err; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + /* Mlx4_ib restrictions: + * WQ's is associated to a port according to the RSS QP it is + * associates to. + * In case the WQ is associated to a different port by another + * RSS QP, return a failure. + */ + if ((wq->rss_usecnt > 0) && (wq->port != port_num)) { + err = -EINVAL; + mutex_unlock(&wq->mutex); + break; + } + wq->port = port_num; + if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) { + err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY); + if (err) { + mutex_unlock(&wq->mutex); + break; + } + } + wq->rss_usecnt++; + + mutex_unlock(&wq->mutex); + } + + if (i && err) { + int j; + + for (j = (i - 1); j >= 0; j--) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[j]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && + (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=0x%06x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + } + + return err; +} + +static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl) +{ + int i; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=%x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } +} + +static void fill_qp_rss_context(struct mlx4_qp_context *context, + struct mlx4_ib_qp *qp) +{ + struct mlx4_rss_context *rss_context; + + rss_context = (void *)context + offsetof(struct mlx4_qp_context, + pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; + + rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz); + rss_context->default_qpn = + cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff); + if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6)) + rss_context->base_qpn_udp = rss_context->default_qpn; + rss_context->flags = qp->rss_ctx->flags; + /* Currently support just toeplitz */ + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + + memcpy(rss_context->rss_key, qp->rss_ctx->rss_key, + MLX4_EN_RSS_KEY_SIZE); +} + static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; struct mlx4_ib_qp *qp; @@ -1804,23 +2153,25 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (src_type == MLX4_IB_RWQ_SRC) { struct ib_wq *ibwq; - ibwq = (struct ib_wq *)src; - ibuobject = ibwq->uobject; - ibsrq = NULL; - qp_type = IB_QPT_RAW_PACKET; - qp = to_mqp((struct ib_qp *)ibwq); - dev = to_mdev(ibwq->device); - pd = to_mpd(ibwq->pd); + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + rwq_ind_tbl = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); } else { struct ib_qp *ibqp; - ibqp = (struct ib_qp *)src; - ibuobject = ibqp->uobject; - ibsrq = ibqp->srq; - qp_type = ibqp->qp_type; - qp = to_mqp(ibqp); - dev = to_mdev(ibqp->device); - pd = get_pd(qp); + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + rwq_ind_tbl = ibqp->rwq_ind_tbl; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); } /* APM is not supported under RoCE */ @@ -1836,6 +2187,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); + if (rwq_ind_tbl) { + fill_qp_rss_context(context, qp); + context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); + } + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { @@ -1876,9 +2232,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, ilog2(dev->dev->caps.max_msg_sz); } - if (qp->rq.wqe_cnt) - context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; - context->rq_size_stride |= qp->rq.wqe_shift - 4; + if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */ + if (qp->rq.wqe_cnt) + context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; + context->rq_size_stride |= qp->rq.wqe_shift - 4; + } if (qp->sq.wqe_cnt) context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; @@ -2031,8 +2389,14 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - get_cqs(qp, src_type, &send_cq, &recv_cq); - context->pd = cpu_to_be32(pd->pdn); + context->pd = cpu_to_be32(pd->pdn); + + if (!rwq_ind_tbl) { + get_cqs(qp, src_type, &send_cq, &recv_cq); + } else { /* Set dummy CQs to be compatible with HV and PRM */ + send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq); + recv_cq = send_cq; + } context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); @@ -2358,6 +2722,11 @@ out: return err; } +enum { + MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE | + IB_QP_PORT), +}; + static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -2388,6 +2757,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl) { + if (!(((cur_state == IB_QPS_RESET) && + (new_state == IB_QPS_INIT)) || + ((cur_state == IB_QPS_INIT) && + (new_state == IB_QPS_RTR)))) { + pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n", + ibqp->qp_num, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + + if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) { + pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n", + ibqp->qp_num, attr_mask, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + } + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { if ((ibqp->qp_type == IB_QPT_RC) || @@ -2452,9 +2842,18 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) { + err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num); + if (err) + goto out; + } + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, cur_state, new_state); + if (ibqp->rwq_ind_tbl && err) + bring_down_rss_rwqs(ibqp->rwq_ind_tbl); + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3643,6 +4042,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx4_state; int err = 0; + if (ibqp->rwq_ind_tbl) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { @@ -3917,6 +4319,11 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) return -EINVAL; + /* Need to protect against the parent RSS which also may modify WQ + * state. + */ + mutex_lock(&qp->mutex); + /* Can update HW state only if a RSS QP has already associated to this * WQ, so we can apply its port on the WQ. */ @@ -3926,6 +4333,8 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if (!err) ibwq->state = new_state; + mutex_unlock(&qp->mutex); + return err; } diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 5591d955ba00..d915cab37ec3 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -95,6 +95,16 @@ struct mlx4_ib_create_srq_resp { __u32 reserved; }; +struct mlx4_ib_create_qp_rss { + __u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 rx_key_len; + __u8 reserved[6]; + __u8 rx_hash_key[40]; + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; @@ -123,4 +133,27 @@ struct mlx4_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; + +/* RX Hash function flags */ +enum mlx4_ib_rx_hash_function_flags { + MLX4_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + */ +enum mlx4_ib_rx_hash_fields { + MLX4_IB_RX_HASH_SRC_IPV4 = 1 << 0, + MLX4_IB_RX_HASH_DST_IPV4 = 1 << 1, + MLX4_IB_RX_HASH_SRC_IPV6 = 1 << 2, + MLX4_IB_RX_HASH_DST_IPV6 = 1 << 3, + MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3-59-g8ed1b From 6afff1c7152f6f0787d7ad5de2e124f8643f0a45 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:27 +0300 Subject: IB/mlx4: Expose RSS capabilities As a final step to support RSS feature, expose the RSS capabilities in query device verb. It includes: - Max rwq indirection tables. - Max rwq indirection table size. - Supported qp types. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b42234571a8c..1dee0a51ef67 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -556,8 +556,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || - mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) { + props->rss_caps.max_rwq_indirection_tables = props->max_qp; + props->rss_caps.max_rwq_indirection_table_size = + dev->dev->caps.max_rss_tbl_sz; + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; props->max_wq_type_rq = props->max_qp; + } if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); -- cgit v1.2.3-59-g8ed1b From ff8d836efe0629ad473b06a94a379682baf43cb9 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 17 Jun 2017 10:37:34 -0700 Subject: IB/hfi1: Add receiving queue info to qp_stats This patch adds qp->s_ack_queue indices and size to qp_stats printout. This information will provide information about the receiving side. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a7af9f60c13..ee2c74dce386 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -607,7 +607,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -630,6 +630,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_last, qp->s_acked, qp->s_cur, qp->s_tail, qp->s_head, qp->s_size, qp->s_avail, + /* ack_queue ring pointers, size */ + qp->s_tail_ack_queue, qp->r_head_ack_queue, + HFI1_MAX_RDMA_ATOMIC, + /* remote QP info */ qp->remote_qpn, rdma_ah_get_dlid(&qp->remote_ah_attr), rdma_ah_get_sl(&qp->remote_ah_attr), -- cgit v1.2.3-59-g8ed1b From 9c2d33d44dccfb0cdac987ddc3716527ebf971e0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Jun 2017 08:40:59 +0100 Subject: net/mlx5: fix spelling mistake: "alloated" -> "allocated" Trivial fix to spelling mistake in mlx5_ib_dbg message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index faafea0b7c2f..fe60c363d95b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1137,7 +1137,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) return -EINVAL; - mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n", + mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n", MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", lib_uar_4k ? "yes" : "no", ref_bfregs, req->total_num_bfregs, *num_sys_pages); -- cgit v1.2.3-59-g8ed1b From ad84dad2160d5f36bb471b391462d651c887d693 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:05 +0300 Subject: RDMA/qedr: notify user application if DPM is supported Direct Packet Mode support may be disabled, e.g, due to limited resources. Notifying the user application prevents wasting cycles on attempting to send these kind of packets. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/qedr.h | 2 ++ drivers/infiniband/hw/qedr/verbs.c | 1 + include/uapi/rdma/qedr-abi.h | 1 + 4 files changed, 5 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 0ae30f5c8cbc..199b6edbef92 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -777,6 +777,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, if (rc) goto init_err; + dev->user_dpm_enabled = dev_info.user_dpm_enabled; dev->num_hwfns = dev_info.common.num_hwfns; dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 392e76e26840..b2bb42e2805d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -162,6 +162,8 @@ struct qedr_dev { struct qedr_qp *gsi_qp; unsigned long enet_state; + + u8 user_dpm_enabled; }; #define QEDR_MAX_SQ_PBL (0x8000) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 2ae71b8f1ba8..4322ee00498e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -376,6 +376,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); + uresp.dpm_enabled = dev->user_dpm_enabled; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 75c270d839c8..2684004ec4fd 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -49,6 +49,7 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_recv_wr; __u32 sges_per_srq_wr; __u32 max_cqes; + __u8 dpm_enabled; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3-59-g8ed1b From 67cbe3532c2cd84303a2073cedad6b8bcad13be3 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:06 +0300 Subject: RDMA/qedr: notify user application of supported WIDs The number of supported WIDs, if they are supported at all, can be limited due to resources. Notifying the user space application the number of available WIDs allows it to utilize them correctly. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 ++ include/uapi/rdma/qedr-abi.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4322ee00498e..9ee2dce3e5bb 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -377,6 +377,8 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); uresp.dpm_enabled = dev->user_dpm_enabled; + uresp.wids_enabled = 1; + uresp.wid_count = oparams.wid_count; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 2684004ec4fd..54b64357ab24 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -50,6 +50,8 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_srq_wr; __u32 max_cqes; __u8 dpm_enabled; + __u8 wids_enabled; + __u16 wid_count; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3-59-g8ed1b From e89bf462b6bece63a60723af88c76dce9dbe6b85 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 8 Jun 2017 17:23:50 +0300 Subject: IB/hns: Support compile test for hns RoCE driver Compiling the hns RoCE driver requires ARM architecture. In order to simplify development of IB/core, support compile test. Add the necessary includes for that too. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 2 +- drivers/infiniband/hw/hns/hns_roce_alloc.c | 1 + drivers/infiniband/hw/hns/hns_roce_mr.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index e1a6e055cd60..cbe6b51394fd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,7 +1,7 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on ARM64 && HNS && HNS_DSAF && HNS_ENET + depends on (ARM64 || COMPILE_TEST) && HNS && HNS_DSAF && HNS_ENET ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hi1610 and more further ICT SoC. diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 605962f2828c..e1b433cdd5e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -32,6 +32,7 @@ */ #include +#include #include "hns_roce_device.h" int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 80fc01ffd8bd..e387360e3780 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" -- cgit v1.2.3-59-g8ed1b From 3756c7f59a4538bb40d77a25221dc411cc7dd2e9 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:24 +0800 Subject: IB/hns: fix boolreturn.cocci warnings drivers/infiniband/hw/hns/hns_roce_qp.c:802:9-10: WARNING: return of 0/1 in function 'hns_roce_wq_overflow' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 054c52699090..f5dd21c2d275 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -799,7 +799,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; if (likely(cur + nreq < hr_wq->max_post)) - return 0; + return false; hr_cq = to_hr_cq(ib_cq); spin_lock(&hr_cq->lock); -- cgit v1.2.3-59-g8ed1b From 87809f83a4ba0bfe2b7f6b7d4611994fdbed8c8f Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:25 +0800 Subject: IB/hns: fix returnvar.cocci warnings drivers/infiniband/hw/hns/hns_roce_hw_v1.c:2026:5-8: Unneeded variable: "ret". Return "0" on line 2046 Remove unneeded variable used to store return value. Generated by: scripts/coccinelle/misc/returnvar.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 23fad6d96944..38f5c77baabf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2023,7 +2023,6 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; u32 doorbell[2]; - int ret = 0; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2043,7 +2042,7 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) hns_roce_write64_k(doorbell, hr_cq->cq_db_l); - return ret; + return 0; } static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, -- cgit v1.2.3-59-g8ed1b From d1d71499a65f8d0c7aa776aa70c56974ead45e56 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:25 +0800 Subject: IB/hns: fix semicolon.cocci warnings drivers/infiniband/hw/hns/hns_roce_eq.c:295:3-4: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 50f864935a0e..0881813c9f14 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -292,7 +292,7 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n", event_type, eq->eqn, eq->cons_index); break; - }; + } eq->cons_index++; aeqes_found = 1; -- cgit v1.2.3-59-g8ed1b From ecd840ff9b793ac60e3e6658414525535349a17b Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 28 Jul 2017 15:17:18 -0400 Subject: RDMA/hns: fix build regression The 0day build system flags implicit includes as errors. A patch from Matan Barak to allow hns_roce, an aarch64 specific RDMA driver, to be built on other arches, but it resulted in build regressions. The problem is that hns_roce_device.h needs a definition for __raw_writeq but did not have an include to provide it. Add as an include to resolve the issue. Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e493a61e14e1..aeef2adf1a45 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,6 +33,7 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H +#include #include #define DRV_NAME "hns_roce" -- cgit v1.2.3-59-g8ed1b From 967de35826070026e51e66dcf020059ef6ce26b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jul 2017 15:35:22 +0200 Subject: IB/hns: include linux/interrupt.h I ran into this build error on linux-next: drivers/infiniband/hw/hns/hns_roce_eq.c:477:8: error: unknown type name 'irqreturn_t' static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr) ^~~~~~~~~~~ drivers/infiniband/hw/hns/hns_roce_eq.c: In function 'hns_roce_msi_x_interrupt': drivers/infiniband/hw/hns/hns_roce_eq.c:485:9: error: implicit declaration of function 'IRQ_RETVAL'; did you mean 'BPF_RVAL'? [-Werror=implicit-function-declaration] return IRQ_RETVAL(int_work); I have bisected this to a seemingly unrelated change that happened to remove some indirect header inclusions. Simply including the required header explicitly fixes the build failure. Fixes: 09c7570480f7 ("xfrm: remove flow cache") Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_eq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 0881813c9f14..b0f43735de1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -31,6 +31,7 @@ */ #include +#include #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_eq.h" -- cgit v1.2.3-59-g8ed1b From c53df62c7a9a82c7375308af270f90a08e94f6b6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 30 Jun 2017 13:14:40 -0700 Subject: IB/hfi1: Check return values from PCI config API calls Ensure that return values from kernel PCI config access API calls in HFI driver are checked and react properly if they are not expected (i.e. not successful). Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 22 +++- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/pcie.c | 238 +++++++++++++++++++++++++++++++------- 3 files changed, 215 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 937350d9deab..efc84810e821 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13846,9 +13846,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd) * a reset following the (possible) FLR in this routine. * */ -static void init_chip(struct hfi1_devdata *dd) +static int init_chip(struct hfi1_devdata *dd) { int i; + int ret = 0; /* * Put the HFI CSRs in a known state. @@ -13896,12 +13897,22 @@ static void init_chip(struct hfi1_devdata *dd) pcie_flr(dd->pcidev); /* restore command and BARs */ - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } if (is_ax(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); pcie_flr(dd->pcidev); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } } } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); @@ -13929,6 +13940,7 @@ static void init_chip(struct hfi1_devdata *dd) write_csr(dd, ASIC_QSFP1_OUT, 0x1f); write_csr(dd, ASIC_QSFP2_OUT, 0x1f); init_chip_resources(dd); + return ret; } static void init_early_variables(struct hfi1_devdata *dd) @@ -14914,7 +14926,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, goto bail_cleanup; /* obtain chip sizes, reset chip CSRs */ - init_chip(dd); + ret = init_chip(dd); + if (ret) + goto bail_cleanup; /* read in the PCIe link speed information */ ret = pcie_speeds(dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1a33a5087734..62f843d7b099 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1854,7 +1854,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); -void restore_pci_variables(struct hfi1_devdata *dd); +int restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index f01841b51946..903ea45bf650 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -68,7 +68,7 @@ /* * Code to adjust PCIe capabilities. */ -static void tune_pcie_caps(struct hfi1_devdata *); +static int tune_pcie_caps(struct hfi1_devdata *); /* * Do all the common PCIe setup and initialization. @@ -161,6 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; + int ret = 0; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -207,19 +208,56 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) /* * Save BARs and command to rewrite after device reset. */ - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3); - pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto read_error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto read_error; return 0; + +read_error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; } /* @@ -270,8 +308,14 @@ static u32 extract_width(u16 linkstat) static void update_lbus_info(struct hfi1_devdata *dd) { u16 linkstat; + int ret; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return; + } - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); dd->lbus_width = extract_width(linkstat); dd->lbus_speed = extract_speed(linkstat); snprintf(dd->lbus_info, sizeof(dd->lbus_info), @@ -286,6 +330,7 @@ int pcie_speeds(struct hfi1_devdata *dd) { u32 linkcap; struct pci_dev *parent = dd->pcidev->bus->self; + int ret; if (!pci_is_pcie(dd->pcidev)) { dd_dev_err(dd, "Can't find PCI Express capability!\n"); @@ -295,7 +340,12 @@ int pcie_speeds(struct hfi1_devdata *dd) /* find if our max speed is Gen3 and parent supports Gen3 speeds */ dd->link_gen3_capable = 1; - pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) { dd_dev_info(dd, "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n", @@ -327,7 +377,7 @@ int pcie_speeds(struct hfi1_devdata *dd) */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int nvec; + int nvec, ret; nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, PCI_IRQ_MSIX | PCI_IRQ_LEGACY); @@ -336,7 +386,12 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) return nvec; } - tune_pcie_caps(dd); + ret = tune_pcie_caps(dd); + if (ret) { + dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret); + pci_free_irq_vectors(dd->pcidev); + return ret; + } /* check for legacy IRQ */ if (nvec == 1 && !dd->pcidev->msix_enabled) @@ -346,19 +401,61 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) } /* restore command and BARs after a reset has wiped them out */ -void restore_pci_variables(struct hfi1_devdata *dd) +int restore_pci_variables(struct hfi1_devdata *dd) { - pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1); - pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, - dd->pcie_devctl2); - pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); - pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3); - pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + int ret = 0; + + ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + dd->pcibar0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + dd->pcibar1); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, + dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, + dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, + dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; } /* @@ -373,21 +470,33 @@ uint aspm_mode = ASPM_MODE_DISABLED; module_param_named(aspm, aspm_mode, uint, S_IRUGO); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); -static void tune_pcie_caps(struct hfi1_devdata *dd) +static int tune_pcie_caps(struct hfi1_devdata *dd) { struct pci_dev *parent; u16 rc_mpss, rc_mps, ep_mpss, ep_mps; u16 rc_mrrs, ep_mrrs, max_mrrs, ectl; + int ret; /* * Turn on extended tags in DevCtl in case the BIOS has turned it off * to improve WFR SDMA bandwidth */ - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); + ret = pcie_capability_read_word(dd->pcidev, + PCI_EXP_DEVCTL, &ectl); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) { dd_dev_info(dd, "Enabling PCIe extended tags\n"); ectl |= PCI_EXP_DEVCTL_EXT_TAG; - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); + ret = pcie_capability_write_word(dd->pcidev, + PCI_EXP_DEVCTL, ectl); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; + } } /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; @@ -396,14 +505,14 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) * access to the upstream component. */ if (!parent) - return; + return -EINVAL; if (!pci_is_root_bus(parent->bus)) { dd_dev_info(dd, "Parent not root\n"); - return; + return -EINVAL; } if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) - return; + return -EINVAL; rc_mpss = parent->pcie_mpss; rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ @@ -449,6 +558,8 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) ep_mrrs = max_mrrs; pcie_set_readrq(dd->pcidev, ep_mrrs); } + + return 0; } /* End of PCIe capability tuning */ @@ -680,6 +791,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, u32 violation; u32 i; u8 c_minus1, c0, c_plus1; + int ret; for (i = 0; i < 11; i++) { /* set index */ @@ -691,8 +803,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, pci_write_config_dword(pdev, PCIE_CFG_REG_PL102, eq_value(c_minus1, c0, c_plus1)); /* check if these coefficients violate EQ rules */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105, - &violation); + ret = pci_read_config_dword(dd->pcidev, + PCIE_CFG_REG_PL105, &violation); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + hit_error = 1; + break; + } + if (violation & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){ if (hit_error == 0) { @@ -1146,7 +1264,13 @@ retry: * that it is Gen3 capable earlier. */ dd_dev_info(dd, "%s: setting parent target link speed\n", __func__); - pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); /* only write to parent if target is not as high as ours */ @@ -1155,20 +1279,37 @@ retry: lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(parent, + PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } } else { dd_dev_info(dd, "%s: ..target speed is OK\n", __func__); } dd_dev_info(dd, "%s: setting target link speed\n", __func__); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK; lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } /* step 5h: arm gasket logic */ /* hold DC in reset across the SBR */ @@ -1218,7 +1359,14 @@ retry: /* restore PCI space registers we know were reset */ dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return_error = 1; + goto done; + } + /* restore firmware control */ write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl); @@ -1248,7 +1396,13 @@ retry: setextled(dd, 0); /* check for any per-lane errors */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32); /* extract status, look for our HFI */ -- cgit v1.2.3-59-g8ed1b From cb51c5d2cda855302910ab352f3d391c1a00aba0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 24 Jul 2017 07:45:31 -0700 Subject: IB/hfi1: Fix bar0 mapping to use write combining When the debugpat kernel boot flag is turned on the following traces are printed: [ 1884.793168] x86/PAT: Overlap at 0x90000000-0x92000000 [ 1884.803510] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track uncached-minus, req write-combining, ret uncached-minus [ 1884.818167] hfi1 0000:05:00.0: hfi1_0: WC Remapped RcvArray: ffffc9000a980000 The ioremap_wc() clearly is not returning a write combining mapping due to an overlap where the RcvArray is mapped in a uncached mapping prior to creating the proposed write combining mapping. The patch replaces the single base register for uncached CSRs that used to overlap the RcvArray with two mappings. One, kregbase1, from the bar0 up to the RcvArray and another, kregbase2, from the end of the RcvArray to the pio send buffer space. A new dd field, base2_start, is used to convert the zero-based offset in the CSR routines to the correct kregbase1/kregbase2 mapping. A single direct write of the RcvArray CSRs is replaced with hfi1_put_tid() to insure correct access using the new disjoint mapping. Additionally, the kregend field is deleted since it is only ever written. patdebug now shows the RcvArray as write combining: [ 35.688990] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track write-combining, req write-combining, ret write-combining To insulate from any potential issues with write combining, all writeq are now flushed in hfi1_put_tid() and rcv_array_wc_fill(). Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 79 ++++++++++++++++++++++++------- drivers/infiniband/hw/hfi1/chip.h | 4 +- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/exp_rcv.h | 5 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 20 ++++---- drivers/infiniband/hw/hfi1/pcie.c | 58 ++++++++++++++++------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 +- 8 files changed, 126 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index efc84810e821..4fce173c8667 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1297,25 +1297,71 @@ CNTR_ELEM(#name, \ CNTR_SYNTH, \ access_ibp_##cntr) +/** + * hfi_addr_from_offset - return addr for readq/writeq + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * This routine selects the appropriate base address + * based on the indicated offset. + */ +static inline void __iomem *hfi1_addr_from_offset( + const struct hfi1_devdata *dd, + u32 offset) +{ + if (offset >= dd->base2_start) + return dd->kregbase2 + (offset - dd->base2_start); + return dd->kregbase1 + offset; +} + +/** + * read_csr - read CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: the value read or all FF's if there + * is no mapping + */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) { - if (dd->flags & HFI1_PRESENT) { - return readq((void __iomem *)dd->kregbase + offset); - } + if (dd->flags & HFI1_PRESENT) + return readq(hfi1_addr_from_offset(dd, offset)); return -1; } +/** + * write_csr - write CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * @value - value to write + */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { - if (dd->flags & HFI1_PRESENT) - writeq(value, (void __iomem *)dd->kregbase + offset); + if (dd->flags & HFI1_PRESENT) { + void __iomem *base = hfi1_addr_from_offset(dd, offset); + + /* avoid write to RcvArray */ + if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start)) + return; + writeq(value, base); + } } +/** + * get_csr_addr - return te iomem address for offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: The iomem address to use in subsequent + * writeq/readq operations. + */ void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset) { - return (void __iomem *)dd->kregbase + offset; + if (dd->flags & HFI1_PRESENT) + return hfi1_addr_from_offset(dd, offset); + return NULL; } static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, @@ -9752,14 +9798,13 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order) { u64 reg; - void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc : - (dd->kregbase + RCV_ARRAY)); if (!(dd->flags & HFI1_PRESENT)) goto done; - if (type == PT_INVALID) { + if (type == PT_INVALID || type == PT_INVALID_FLUSH) { pa = 0; + order = 0; } else if (type > PT_INVALID) { dd_dev_err(dd, "unexpected receive array type %u for index %u, not handled\n", @@ -9773,13 +9818,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; - trace_hfi1_write_rcvarray(base + (index * 8), reg); - writeq(reg, base + (index * 8)); + trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg); + writeq(reg, dd->rcvarray_wc + (index * 8)); - if (type == PT_EAGER) + if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3) /* - * Eager entries are written one-by-one so we have to push them - * after we write the entry. + * Eager entries are written and flushed + * + * Expected entries are flushed every 4 writes */ flush_wc(); done: @@ -13411,8 +13457,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) - write_csr(dd, RCV_ARRAY + (8 * i), - RCV_ARRAY_RT_WRITE_ENABLE_SMASK); + hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ for (i = 0; i < 32; i++) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 3dab3156ba4a..d3622cb74b3f 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -605,11 +605,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data); int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data); void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset); static inline void __iomem *get_kctxt_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, int ctxt, u32 offset0) { diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 26628cb61437..4a149cca4718 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -195,7 +195,7 @@ int hfi1_count_active_units(void) spin_lock_irqsave(&hfi1_devs_lock, flags); list_for_each_entry(dd, &hfi1_dev_list, list) { - if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase) + if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1282,7 +1282,7 @@ int hfi1_reset_device(int unit) dd_dev_info(dd, "Reset on unit %u requested\n", unit); - if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { + if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) { dd_dev_info(dd, "Invalid unit number %u or not initialized or not present\n", unit); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index c7d02bcddded..08719047628a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -137,8 +137,11 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * Doing the WC fill writes only makes sense if the device is * present and the RcvArray has been mapped as WC memory. */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) { writeq(0, dd->rcvarray_wc + (index * 8)); + if ((index & 3) == 3) + flush_wc(); + } } static inline void tid_group_add_tail(struct tid_group *grp, diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index bbf80b1dd9d9..fbf52841aea4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -181,7 +181,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); - if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) return -EINVAL; if (!atomic_inc_not_zero(&dd->user_refcount)) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 62f843d7b099..2e5137b336ce 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -867,12 +867,15 @@ struct hfi1_devdata { struct device *diag_device; struct device *ui_device; - /* mem-mapped pointer to base of chip regs */ - u8 __iomem *kregbase; - /* end of mem-mapped chip space excluding sendbuf and user regs */ - u8 __iomem *kregend; - /* physical address of chip for io_remap, etc. */ + /* first mapping up to RcvArray */ + u8 __iomem *kregbase1; resource_size_t physaddr; + + /* second uncached mapping from RcvArray to pio send buffers */ + u8 __iomem *kregbase2; + /* for detecting offset above kregbase2 address */ + u32 base2_start; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ @@ -1236,9 +1239,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) #define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ -#define PT_EXPECTED 0 -#define PT_EAGER 1 -#define PT_INVALID 2 +#define PT_EXPECTED 0 +#define PT_EAGER 1 +#define PT_INVALID_FLUSH 2 +#define PT_INVALID 3 struct tid_rb_node; struct mmu_rb_node; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 903ea45bf650..cc7be224095d 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -180,31 +180,47 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -EINVAL; } - dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND); - if (!dd->kregbase) + dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY); + if (!dd->kregbase1) { + dd_dev_err(dd, "UC mapping of kregbase1 failed\n"); return -ENOMEM; + } + dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); + dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + + dd->kregbase2 = ioremap_nocache( + addr + dd->base2_start, + TXE_PIO_SEND - dd->base2_start); + if (!dd->kregbase2) { + dd_dev_err(dd, "UC mapping of kregbase2 failed\n"); + goto nomem; + } + dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2, + TXE_PIO_SEND - dd->base2_start); dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE); if (!dd->piobase) { - iounmap(dd->kregbase); - return -ENOMEM; + dd_dev_err(dd, "WC mapping of send buffers failed\n"); + goto nomem; } + dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); - dd->flags |= HFI1_PRESENT; /* now register routines work */ - - dd->kregend = dd->kregbase + TXE_PIO_SEND; dd->physaddr = addr; /* used for io_remap, etc. */ /* - * Re-map the chip's RcvArray as write-combining to allow us + * Map the chip's RcvArray as write-combining to allow us * to write an entire cacheline worth of entries in one shot. - * If this re-map fails, just continue - the RcvArray programming - * function will handle both cases. */ - dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT); dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, dd->chip_rcv_array_count * 8); - dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc); + if (!dd->rcvarray_wc) { + dd_dev_err(dd, "WC mapping of receive array failed\n"); + goto nomem; + } + dd_dev_info(dd, "WC RcvArray: %p for %x\n", + dd->rcvarray_wc, dd->chip_rcv_array_count * 8); /* * Save BARs and command to rewrite after device reset. */ @@ -253,10 +269,16 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) if (ret) goto read_error; + dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; read_error: dd_dev_err(dd, "Unable to read from PCI config\n"); + goto bail_error; +nomem: + ret = -ENOMEM; +bail_error: + hfi1_pcie_ddcleanup(dd); return ret; } @@ -267,15 +289,19 @@ read_error: */ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) { - u64 __iomem *base = (void __iomem *)dd->kregbase; - dd->flags &= ~HFI1_PRESENT; - dd->kregbase = NULL; - iounmap(base); + if (dd->kregbase1) + iounmap(dd->kregbase1); + dd->kregbase1 = NULL; + if (dd->kregbase2) + iounmap(dd->kregbase2); + dd->kregbase2 = NULL; if (dd->rcvarray_wc) iounmap(dd->rcvarray_wc); + dd->rcvarray_wc = NULL; if (dd->piobase) iounmap(dd->piobase); + dd->piobase = NULL; } /* return the PCIe link speed from the given link status */ diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6318e6ca1b18..3baf71944471 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -814,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) node->npages, node->mmu.addr, node->phys, node->dma_addr); - hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); /* * Make sure device has seen the write before we unpin the * pages. */ - flush_wc(); + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); -- cgit v1.2.3-59-g8ed1b From bcad29137a9731bfa5e16d64bf8e8a71a268ac88 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 24 Jul 2017 07:45:37 -0700 Subject: IB/hfi1: Serve the most starved iowait entry first When an egress resource(SDMA descriptors, pio credits) is not available, a sending thread will be put on the resource's wait queue. When the resource becomes available again, up to a fixed number of sending threads can be awakened sequentially and removed from the wait queue, depending on the number of waiting threads and the number of free resources. Since each awakened sending thread will send as many packets as possible, it is highly likely that the first sending thread will consume all the egress resources. Subsequently, it will be put back to the end of the wait queue. Depending on the timing when the later sending threads wake up, they may not be able to send any packet and be again put back to the end of the wait queue sequentially, right behind the first sending thread. This starvation cycle continues until some sending threads exceed their retry limit and consequently fail. This patch fixes the issue by two simple approaches: (1) Any starved sending thread will be put to the head of the wait queue while a served sending thread will be put to the tail; (2) The most starved sending thread will be served first. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/iowait.h | 70 +++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/pio.c | 13 +++++-- drivers/infiniband/hw/hfi1/qp.c | 9 +++-- drivers/infiniband/hw/hfi1/ruc.c | 5 ++- drivers/infiniband/hw/hfi1/sdma.c | 34 ++++++++++++----- drivers/infiniband/hw/hfi1/sdma.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 8 ++-- drivers/infiniband/hw/hfi1/verbs.c | 6 ++- drivers/infiniband/hw/hfi1/verbs.h | 1 + drivers/infiniband/hw/hfi1/vnic.h | 1 + drivers/infiniband/hw/hfi1/vnic_sdma.c | 14 +++++-- 11 files changed, 136 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index d9740ddea6f1..591697d85eed 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -106,7 +106,9 @@ struct iowait { struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq); + uint seq, + bool pkts_sent + ); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); seqlock_t *lock; @@ -118,6 +120,7 @@ struct iowait { u32 count; u32 tx_limit; u32 tx_count; + u8 starved_cnt; }; #define SDMA_AVAIL_REASON 0 @@ -143,7 +146,8 @@ static inline void iowait_init( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq), + uint seq, + bool pkts_sent), void (*wakeup)(struct iowait *wait, int reason), void (*sdma_drained)(struct iowait *wait)) { @@ -305,4 +309,66 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) return tx; } +/** + * iowait_queue - Put the iowait on a wait queue + * @pkts_sent: have some packets been sent before queuing? + * @w: the iowait struct + * @wait_head: the wait queue + * + * This function is called to insert an iowait struct into a + * wait queue after a resource (eg, sdma decriptor or pio + * buffer) is run out. + */ +static inline void iowait_queue(bool pkts_sent, struct iowait *w, + struct list_head *wait_head) +{ + /* + * To play fair, insert the iowait at the tail of the wait queue if it + * has already sent some packets; Otherwise, put it at the head. + */ + if (pkts_sent) { + list_add_tail(&w->list, wait_head); + w->starved_cnt = 0; + } else { + list_add(&w->list, wait_head); + w->starved_cnt++; + } +} + +/** + * iowait_starve_clear - clear the wait queue's starve count + * @pkts_sent: have some packets been sent? + * @w: the iowait struct + * + * This function is called to clear the starve count. If no + * packets have been sent, the starve count will not be cleared. + */ +static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w) +{ + if (pkts_sent) + w->starved_cnt = 0; +} + +/** + * iowait_starve_find_max - Find the maximum of the starve count + * @w: the iowait struct + * @max: a variable containing the max starve count + * @idx: the index of the current iowait in an array + * @max_idx: a variable containing the array index for the + * iowait entry that has the max starve count + * + * This function is called to compare the starve count of a + * given iowait with the given max starve count. The max starve + * count and the index will be updated if the iowait's start + * count is larger. + */ +static inline void iowait_starve_find_max(struct iowait *w, u8 *max, + uint idx, uint *max_idx) +{ + if (w->starved_cnt > *max) { + *max = w->starved_cnt; + *max_idx = idx; + } +} + #endif diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ed72b5aca139..adb6a4da6107 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1568,7 +1568,8 @@ static void sc_piobufavail(struct send_context *sc) struct rvt_qp *qp; struct hfi1_qp_priv *priv; unsigned long flags; - unsigned i, n = 0; + uint i, n = 0, max_idx = 0; + u8 max_starved_cnt = 0; if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && dd->send_contexts[sc->sw_index].type != SC_VL15) @@ -1591,6 +1592,7 @@ static void sc_piobufavail(struct send_context *sc) priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; + iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx); /* refcount held until actual wake up */ qps[n++] = qp; } @@ -1605,9 +1607,14 @@ static void sc_piobufavail(struct send_context *sc) } write_sequnlock_irqrestore(&dev->iowait_lock, flags); - for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], + /* Wake up the most starved one first */ + if (n) + hfi1_qp_wakeup(qps[max_idx], RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + for (i = 0; i < n; i++) + if (i != max_idx) + hfi1_qp_wakeup(qps[i], + RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 806d166cf6ee..b801d8469956 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -68,7 +68,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq); + unsigned int seq, + bool pkts_sent); static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); @@ -371,7 +372,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq) + uint seq, + bool pkts_sent) { struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); struct rvt_qp *qp; @@ -402,7 +404,8 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; - list_add_tail(&priv->s_iowait.list, &sde->dmawait); + iowait_queue(pkts_sent, &priv->s_iowait, + &sde->dmawait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cf506a9a796..f13ddb273d77 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -811,6 +811,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, static bool schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { + ps->pkts_sent = true; + if (unlikely(time_after(jiffies, ps->timeout))) { if (!ps->in_thread || workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { @@ -907,6 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ps.timeout = jiffies + ps.timeout_int; ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + ps.pkts_sent = false; /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); @@ -929,7 +932,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); - + iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); spin_unlock_irqrestore(&qp->s_lock, ps.flags); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index d82ff57214c5..71b4258545c4 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -246,7 +246,7 @@ static void __sdma_process_event( enum sdma_events event); static void dump_sdma_state(struct sdma_engine *sde); static void sdma_make_progress(struct sdma_engine *sde, u64 status); -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail); +static void sdma_desc_avail(struct sdma_engine *sde, uint avail); static void sdma_flush_descq(struct sdma_engine *sde); /** @@ -1762,13 +1762,14 @@ retry: * * This is called with head_lock held. */ -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) +static void sdma_desc_avail(struct sdma_engine *sde, uint avail) { struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; - unsigned i, n = 0, seq; + uint i, n = 0, seq, max_idx = 0; struct sdma_txreq *stx; struct hfi1_ibdev *dev = &sde->dd->verbs_dev; + u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, @@ -1803,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) if (num_desc > avail) break; avail -= num_desc; + /* Find the most starved wait memeber */ + iowait_starve_find_max(wait, &max_starved_cnt, + n, &max_idx); list_del_init(&wait->list); waits[n++] = wait; } @@ -1811,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) } } while (read_seqretry(&dev->iowait_lock, seq)); + /* Schedule the most starved one first */ + if (n) + waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON); + for (i = 0; i < n; i++) - waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); + if (i != max_idx) + waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); } /* head_lock must be held */ @@ -2349,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx) static int sdma_check_progress( struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret; @@ -2362,7 +2372,7 @@ static int sdma_check_progress( seq = raw_seqcount_begin( (const seqcount_t *)&sde->head_lock.seqcount); - ret = wait->sleep(sde, wait, tx, seq); + ret = wait->sleep(sde, wait, tx, seq, pkts_sent); if (ret == -EAGAIN) sde->desc_avail = sdma_descq_freecnt(sde); } else { @@ -2376,6 +2386,7 @@ static int sdma_check_progress( * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx: sdma_txreq to submit + * @pkts_sent: has any packet been sent yet? * * The call submits the tx into the ring. If a iowait structure is non-NULL * the packet will be queued to the list in wait. @@ -2387,7 +2398,8 @@ static int sdma_check_progress( */ int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret = 0; u16 tail; @@ -2429,7 +2441,7 @@ unlock_noconn: ret = -ECOMM; goto unlock; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, pkts_sent); if (ret == -EAGAIN) { ret = 0; goto retry; @@ -2498,8 +2510,10 @@ retry: } update_tail: total_count = submit_count + flush_count; - if (wait) + if (wait) { iowait_sdma_add(wait, total_count); + iowait_starve_clear(submit_count > 0, wait); + } if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); @@ -2527,7 +2541,7 @@ unlock_noconn: ret = -ECOMM; goto update_tail; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, submit_count > 0); if (ret == -EAGAIN) { ret = 0; goto retry; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 64f10b8b5db8..107011d8613b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -852,7 +852,8 @@ struct iowait; int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx); + struct sdma_txreq *tx, + bool pkts_sent); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, struct list_head *tx_list, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8f7cfdd9e9ec..ea2993f93647 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -272,7 +272,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq); + uint seq, + bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); @@ -294,7 +295,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned seq) + uint seq, + bool pkts_sent) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); @@ -314,7 +316,7 @@ static int defer_packet_queue( xchg(&pq->state, SDMA_PKT_Q_DEFERRED); write_seqlock(&dev->iowait_lock); if (list_empty(&pq->busy.list)) - list_add_tail(&pq->busy.list, &sde->dmawait); + iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; eagain: diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9c9ded643ed4..3ef6384eae40 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -864,7 +864,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; } - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, + ps->pkts_sent); if (unlikely(ret < 0)) { if (ret == -ECOMM) goto bail_ecomm; @@ -921,7 +922,8 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); - list_add_tail(&priv->s_iowait.list, &sc->piowait); + iowait_queue(ps->pkts_sent, &priv->s_iowait, + &sc->piowait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index fdf1e1fb880c..34267c7ef85a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -143,6 +143,7 @@ struct hfi1_pkt_state { unsigned long timeout_int; int cpu; bool in_thread; + bool pkts_sent; }; #define HFI1_PSN_CREDIT 16 diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 4a621cde4abb..eec7c1424991 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -103,6 +103,7 @@ struct hfi1_vnic_sdma { struct sdma_txreq stx; unsigned int state; u8 q_idx; + bool pkts_sent; }; /** diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 51a817d3aa14..7815d7405462 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -198,11 +198,16 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, goto free_desc; tx->retry_count = 0; - ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, + vnic_sdma->pkts_sent); /* When -ECOMM, sdma callback will be called with ABORT status */ if (unlikely(ret && unlikely(ret != -ECOMM))) goto free_desc; + if (!ret) { + vnic_sdma->pkts_sent = true; + iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait); + } return ret; free_desc: @@ -211,6 +216,8 @@ free_desc: tx_err: if (ret != -EBUSY) dev_kfree_skb_any(skb); + else + vnic_sdma->pkts_sent = false; return ret; } @@ -225,7 +232,8 @@ tx_err: static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq) + uint seq, + bool pkts_sent) { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait, struct hfi1_vnic_sdma, wait); @@ -239,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) - list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + iowait_queue(pkts_sent, wait, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; } -- cgit v1.2.3-59-g8ed1b From 42492011ab23f44c63dad0c7096492313dc207e3 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:43 -0700 Subject: IB/hfi1: Assign context does not clean up file descriptor correctly on error In the error path for context allocation, the file descriptor pointer should not point to a context when an error occurs. Clean up the appropriate references on error. Fixes: Commit 62239fc6e5545b2e59f83dfbc5db231a81f37a45 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index fbf52841aea4..d36e17722e6d 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -94,6 +94,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, @@ -813,15 +814,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) uctxt->rcvnowait = 0; uctxt->pionowait = 0; uctxt->event_flags = 0; - - hfi1_stats.sps_ctxts--; - if (++dd->freectxts == dd->num_user_contexts) - aspm_enable_all(dd); - - /* _rcd_put() should be done after releasing mutex */ - dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_rcd_put(uctxt); /* dd reference */ + + deallocate_ctxt(uctxt); done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -898,10 +893,9 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) if (!ret) ret = init_user_ctxt(fd); - if (ret) { + if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - hfi1_rcd_put(fd->uctxt); - } + } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -917,6 +911,14 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) &fd->uctxt->event_flags); wake_up(&fd->uctxt->wait); } + if (ret) + deallocate_ctxt(fd->uctxt); + } + + /* If an error occurred, clear the reference */ + if (ret && fd->uctxt) { + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; } return ret; @@ -1087,6 +1089,19 @@ ctxdata_free: return ret; } +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) +{ + mutex_lock(&hfi1_mutex); + hfi1_stats.sps_ctxts--; + if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) + aspm_enable_all(uctxt->dd); + + /* _rcd_put() should be done after releasing mutex */ + uctxt->dd->rcd[uctxt->ctxt] = NULL; + mutex_unlock(&hfi1_mutex); + hfi1_rcd_put(uctxt); /* dd reference */ +} + static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { -- cgit v1.2.3-59-g8ed1b From 91d970abe8d756843eaac57da903bf27f834b091 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:49 -0700 Subject: IB/hfi1: Remove unused user context data members Several data members of the user context have become unused over time. Cleaning them up. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 6 ------ drivers/infiniband/hw/hfi1/hfi.h | 24 ------------------------ drivers/infiniband/hw/hfi1/user_sdma.c | 11 ----------- drivers/infiniband/hw/hfi1/user_sdma.h | 1 - 4 files changed, 42 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d36e17722e6d..e10f526d2dc1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -809,10 +809,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); - uctxt->rcvwait_to = 0; - uctxt->piowait_to = 0; - uctxt->rcvnowait = 0; - uctxt->pionowait = 0; uctxt->event_flags = 0; mutex_unlock(&hfi1_mutex); @@ -1067,8 +1063,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); - INIT_LIST_HEAD(&uctxt->sdma_queues); - spin_lock_init(&uctxt->sdma_qlock); hfi1_stats.sps_ctxts++; /* * Disable ASPM when there are open user/PSM contexts to avoid diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2e5137b336ce..7877ebc0b7ae 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -243,24 +243,10 @@ struct hfi1_ctxtdata { /* lock protecting all Expected TID data */ struct mutex exp_lock; - /* number of pio bufs for this ctxt (all procs, if shared) */ - u32 piocnt; - /* first pio buffer for this ctxt */ - u32 pio_base; - /* chip offset of PIO buffers for this ctxt */ - u32 piobufs; /* per-context configuration flags */ unsigned long flags; /* per-context event flags for fileops/intr communication */ unsigned long event_flags; - /* WAIT_RCV that timed out, no interrupt */ - u32 rcvwait_to; - /* WAIT_PIO that timed out, no interrupt */ - u32 piowait_to; - /* WAIT_RCV already happened, no wait */ - u32 rcvnowait; - /* WAIT_PIO already happened, no wait */ - u32 pionowait; /* total number of polled urgent packets */ u32 urgent; /* saved total number of polled urgent packets for poll edge trigger */ @@ -290,7 +276,6 @@ struct hfi1_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* QPs waiting for context processing */ struct list_head qp_wait_list; /* interrupt handling */ @@ -299,15 +284,6 @@ struct hfi1_ctxtdata { unsigned numa_id; /* numa node of this context */ /* verbs stats per CTX */ struct hfi1_opcode_stats_perctx *opstats; - /* - * This is the kernel thread that will keep making - * progress on the user sdma requests behind the scenes. - * There is one per context (shared contexts use the master's). - */ - struct task_struct *progress; - struct list_head sdma_queues; - /* protect sdma queues */ - spinlock_t sdma_qlock; /* Is ASPM interrupt supported for this context */ bool aspm_intr_supported; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index ea2993f93647..64d31eb9bd82 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -346,7 +346,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; if (!uctxt || !fd) return -EBADF; @@ -360,7 +359,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq) return -ENOMEM; - INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -421,10 +419,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, fd->pq = pq; fd->cq = cq; - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - list_add(&pq->list, &uctxt->sdma_queues); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - return 0; pq_mmu_fail: @@ -447,7 +441,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); @@ -455,10 +448,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) if (pq) { if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - if (!list_empty(&pq->list)) - list_del_init(&pq->list); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index e5b10aefe212..161fdfddbb30 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,6 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - struct list_head list; unsigned ctxt; u16 subctxt; u16 n_max_reqs; -- cgit v1.2.3-59-g8ed1b From e6f7622df177d594f11d93343c3dda7637c761e0 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:55 -0700 Subject: IB/hfi1: Size rcd array index correctly and consistently The array index for the rcd array is sized several different ways throughout the code. Use the user interface size (u16) as the standard size and update the necessary code to reflect this. u16 is large enough for the largest amount of supported contexts. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 6 +++--- drivers/infiniband/hw/hfi1/chip.c | 10 +++++----- drivers/infiniband/hw/hfi1/chip.h | 8 ++++---- drivers/infiniband/hw/hfi1/driver.c | 15 ++++++++------- drivers/infiniband/hw/hfi1/file_ops.c | 6 +++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++-- drivers/infiniband/hw/hfi1/init.c | 9 +++++---- drivers/infiniband/hw/hfi1/trace_rx.h | 2 +- drivers/infiniband/hw/hfi1/user_sdma.h | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 2 +- 10 files changed, 33 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 794e6814a531..3f9a071ccac3 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -237,7 +237,7 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; @@ -256,7 +256,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; aspm_enable(dd); @@ -284,7 +284,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) static inline void aspm_init(struct hfi1_devdata *dd) { - unsigned i; + u16 i; spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4fce173c8667..06a79a268d2e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6802,7 +6802,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { u32 rcvmask; - int i; + u16 i; /* enable all kernel contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) { @@ -11722,7 +11722,7 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) { struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; @@ -14542,7 +14542,7 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14579,7 +14579,7 @@ done: return ret; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14608,7 +14608,7 @@ done: return ret; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) { struct hfi1_ctxtdata *rcd; unsigned sctxt; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index d3622cb74b3f..008c970412ec 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,14 +1352,14 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 4a149cca4718..01235d4757a5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -837,9 +837,9 @@ bail: return last; } -static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -857,9 +857,9 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) &handle_receive_interrupt_nodma_rtail; } -static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -879,7 +879,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) void set_all_slowpath(struct hfi1_devdata *dd) { - int i; + u16 i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { @@ -1068,7 +1068,7 @@ void receive_interrupt_work(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, linkstate_active_work); struct hfi1_devdata *dd = ppd->dd; - int i; + u16 i; /* Received non-SC15 packet implies neighbor_normal */ ppd->neighbor_normal = 1; @@ -1269,7 +1269,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, */ int hfi1_reset_device(int unit) { - int ret, i; + int ret; + u16 i; struct hfi1_devdata *dd = hfi1_lookup(unit); struct hfi1_pportdata *ppd; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e10f526d2dc1..c8f34bc6b620 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -927,7 +927,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { - int i; + u16 i; struct hfi1_devdata *dd = fd->dd; u16 subctxt; @@ -978,7 +978,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -1429,7 +1429,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) { struct hfi1_ctxtdata *uctxt; struct hfi1_devdata *dd = ppd->dd; - unsigned ctxt; + u16 ctxt; int ret = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7877ebc0b7ae..2ce3fc58b61b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -217,7 +217,7 @@ struct hfi1_ctxtdata { struct kref kref; /* Device context index */ - unsigned ctxt; + u16 ctxt; /* * non-zero if ctxt can be shared, and defines the maximum number of * sub-contexts for this device context. @@ -1264,7 +1264,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index dfdb4126ca05..818a4b87af60 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -131,7 +131,7 @@ unsigned long *hfi1_cpulist; */ int hfi1_create_ctxts(struct hfi1_devdata *dd) { - unsigned i; + u16 i; int ret; /* Control context has to be always 0 */ @@ -233,7 +233,7 @@ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa) { struct hfi1_devdata *dd = ppd->dd; @@ -615,7 +615,7 @@ static int init_after_reset(struct hfi1_devdata *dd) static void enable_chip(struct hfi1_devdata *dd) { u32 rcvmask; - u32 i; + u16 i; /* enable PIO send */ pio_send_control(dd, PSC_GLOBAL_ENABLE); @@ -692,7 +692,8 @@ wq_error: int hfi1_init(struct hfi1_devdata *dd, int reinit) { int ret = 0, pidx, lastfail = 0; - unsigned i, len; + unsigned long len; + u16 i; struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 84929578cfe6..bebf0a8f214d 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -114,7 +114,7 @@ TRACE_EVENT(hfi1_rcvhdr, ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt), TP_ARGS(dd, ctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(u32, ctxt) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 161fdfddbb30..73c2455189d3 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - unsigned ctxt; + u16 ctxt; u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 5a3f80ba9752..9bd274408f32 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -106,7 +106,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata **vnic_ctxt) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret; if (dd->flags & HFI1_FROZEN) -- cgit v1.2.3-59-g8ed1b From 17573972f44d6293ed4fe561816b701241cb0847 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:01 -0700 Subject: IB/hfi1: Use context pointer rather than context index The hfi1__ctxt_key functions take a context index and look up the context based on that index. Since the context index is being retrieved from the context, this doesn't seem optimal. Pass the context pointer for use, rather than the context index. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 90 ++++++++++++++--------------------- drivers/infiniband/hw/hfi1/chip.h | 8 ++-- drivers/infiniband/hw/hfi1/file_ops.c | 6 +-- 3 files changed, 45 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 06a79a268d2e..2f747023444b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14542,99 +14542,86 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */ ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) << SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT); /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */ if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY)) reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg); /* * Enable send-side J_KEY integrity check, unless this is A0 h/w */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Enable J_KEY check on receive context. */ reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK | ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) << RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT); - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg); + + return 0; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0); + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0); /* * Disable send-side J_KEY integrity check, unless this is A0 h/w. * This check would not have been enabled for A0 h/w, see * set_ctxt_jkey(). */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Turn off the J_KEY on the receive side */ - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0); + + return 0; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 pkey) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) << SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + + return 0; } int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) @@ -14645,9 +14632,6 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) if (!ctxt || !ctxt->sc) return -EINVAL; - if (ctxt->ctxt >= dd->num_rcv_contexts) - return -EINVAL; - hw_ctxt = ctxt->sc->hw_context; reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 008c970412ec..995105214953 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1357,9 +1357,11 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt, + u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c8f34bc6b620..51e6a6f0e10f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -795,7 +795,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); /* Clear the context's J_KEY */ - hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_jkey(dd, uctxt); /* * If a send context is allocated, reset context integrity * checks to default and disable the send context. @@ -1172,7 +1172,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) clear_rcvhdrtail(uctxt); /* Setup J_KEY before enabling the context */ - hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); + hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) @@ -1545,7 +1545,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) } if (intable) - ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); + ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); done: return ret; } -- cgit v1.2.3-59-g8ed1b From 2250563e2c935d6401a2203be4de4ca2cf0db183 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:06 -0700 Subject: IB/hfi1: Pass the context pointer rather than the index The hfi1_rcvctrl() function receives an index which it then converts to an rcd. Since most functions have the rcd, use that instead. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/chip.h | 3 ++- drivers/infiniband/hw/hfi1/file_ops.c | 11 +++++------ drivers/infiniband/hw/hfi1/init.c | 14 +++++++------- drivers/infiniband/hw/hfi1/intr.c | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 4 ++-- 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2f747023444b..a62ef4acdca4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6790,7 +6790,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) /* disable all receive contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]); } /* @@ -6814,9 +6814,9 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, rcd); } /* enable port */ @@ -11722,16 +11722,18 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; int did_enable = 0; + u16 ctxt; - rcd = dd->rcd[ctxt]; if (!rcd) return; + ctxt = rcd->ctxt; + hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op); rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 995105214953..bef6301d825d 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,7 +1352,8 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 51e6a6f0e10f..7be75e0d4f7e 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -389,8 +389,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sc_disable(sc); ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, - uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); } else { ret = sc_restart(sc); } @@ -793,7 +792,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt); /* @@ -1198,7 +1197,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); } static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, @@ -1410,7 +1409,7 @@ static unsigned int poll_next(struct file *fp, spin_lock_irq(&dd->uctxt_lock); if (hdrqempty(uctxt)) { set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); pollflag = 0; } else { pollflag = POLLIN | POLLRDNORM; @@ -1495,7 +1494,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, } else { rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; } - hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); + hfi1_rcvctrl(dd, rcvctrl_op, uctxt); /* always; new head should be equal to new tail; see above */ bail: return 0; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 818a4b87af60..7f5e4c7dca89 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -603,8 +603,8 @@ static int init_after_reset(struct hfi1_devdata *dd) */ for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_TAILUPD_DIS, i); + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]); pio_send_control(dd, PSC_GLOBAL_DISABLE); for (i = 0; i < dd->num_send_contexts; i++) sc_disable(dd->send_contexts[i].sc); @@ -634,7 +634,7 @@ static void enable_chip(struct hfi1_devdata *dd) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]); sc_enable(dd->rcd[i]->sc); } } @@ -915,10 +915,10 @@ static void shutdown_device(struct hfi1_devdata *dd) ppd = dd->pport + pidx; for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | - HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_PKEY_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i); + HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_PKEY_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]); /* * Gracefully stop all sends allowing any in progress to * trickle out first. diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 04a5082d5ac5..9469be9940e0 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -196,7 +196,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { wake_up_interruptible(&rcd->wait); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd); } else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG, &rcd->event_flags)) { rcd->urgent++; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 9bd274408f32..89b056489769 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -95,7 +95,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); uctxt->is_vnic = true; done: @@ -186,7 +186,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_INTRAVAIL_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* * VNIC contexts are allocated from user context pool. * Release them back to user context pool. -- cgit v1.2.3-59-g8ed1b From bf90aadd630c2c9f7f965ba1e90d41b5b46db7c9 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:12 -0700 Subject: IB/hfi1: Send MAD traps until repressed A trap should be sent to the FM until the FM sends a repress message. This is in line with the IBTA 13.4.9. Add the ability to resend traps until a repress message is received. Reviewed-by: Dennis Dalessandro Reviewed-by: Michael N. Henry Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 373 ++++++++++++++++++++++++++++--------- drivers/infiniband/hw/hfi1/mad.h | 3 +- drivers/infiniband/hw/hfi1/verbs.c | 5 + include/rdma/rdma_vt.h | 17 ++ 4 files changed, 310 insertions(+), 88 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a081a98d728a..0a3e2dfdf56e 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,16 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +struct trap_node { + struct list_head list; + struct opa_mad_notice_attr data; + __be64 tid; + int len; + u32 retry; + u8 in_use; + u8 repress; +}; + static int smp_length_check(u32 data_size, u32 request_len) { if (unlikely(request_len < data_size)) @@ -97,28 +107,156 @@ void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) ib_dispatch_event(&event); } -static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) +/* + * If the port is down, clean up all pending traps. We need to be careful + * with the given trap, because it may be queued. + */ +static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap) +{ + struct trap_node *node, *q; + unsigned long flags; + struct list_head trap_list; + int i; + + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + spin_lock_irqsave(&ibp->rvp.lock, flags); + list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list); + ibp->rvp.trap_lists[i].list_len = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + /* + * Remove all items from the list, freeing all the non-given + * traps. + */ + list_for_each_entry_safe(node, q, &trap_list, list) { + list_del(&node->list); + if (node != trap) + kfree(node); + } + } + + /* + * If this wasn't on one of the lists it would not be freed. If it + * was on the list, it is now safe to free. + */ + kfree(trap); +} + +static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, + struct trap_node *trap) +{ + struct trap_node *node; + struct trap_list *trap_list; + unsigned long flags; + unsigned long timeout; + int found = 0; + + /* + * Since the retry (handle timeout) does not remove a trap request + * from the list, all we have to do is compare the node. + */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F]; + + list_for_each_entry(node, &trap_list->list, list) { + if (node == trap) { + node->retry++; + found = 1; + break; + } + } + + /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */ + if (!found) { + if (trap_list->list_len < RVT_MAX_TRAP_LEN) { + trap_list->list_len++; + list_add_tail(&trap->list, &trap_list->list); + } else { + pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n", + trap->data.generic_type); + kfree(trap); + } + } + + /* + * Next check to see if there is a timer pending. If not, set it up + * and get the first trap from the list. + */ + node = NULL; + if (!timer_pending(&ibp->rvp.trap_timer)) { + /* + * o14-2 + * If the time out is set we have to wait until it expires + * before the trap can be sent. + * This should be > RVT_TRAP_TIMEOUT + */ + timeout = (RVT_TRAP_TIMEOUT * + (1UL << ibp->rvp.subnet_timeout)) / 1000; + mod_timer(&ibp->rvp.trap_timer, + jiffies + usecs_to_jiffies(timeout)); + node = list_first_entry(&trap_list->list, struct trap_node, + list); + node->in_use = 1; + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + return node; +} + +static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp, + struct opa_smp *smp) +{ + struct trap_list *trap_list; + struct trap_node *trap; + unsigned long flags; + int i; + + if (smp->attr_id != IB_SMP_ATTR_NOTICE) + return; + + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + trap_list = &ibp->rvp.trap_lists[i]; + trap = list_first_entry_or_null(&trap_list->list, + struct trap_node, list); + if (trap && trap->tid == smp->tid) { + if (trap->in_use) { + trap->repress = 1; + } else { + trap_list->list_len--; + list_del(&trap->list); + kfree(trap); + } + break; + } + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); +} + +static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) { struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent; struct opa_smp *smp; - int ret; unsigned long flags; - unsigned long timeout; int pkey_idx; u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp; agent = ibp->rvp.send_agent; - if (!agent) + if (!agent) { + cleanup_traps(ibp, trap); return; + } /* o14-3.2.1 */ - if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) { + cleanup_traps(ibp, trap); return; + } - /* o14-2 */ - if (ibp->rvp.trap_timeout && time_before(jiffies, - ibp->rvp.trap_timeout)) + /* Add the trap to the list if necessary and see if we can send it */ + trap = check_and_add_trap(ibp, trap); + if (!trap) return; pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); @@ -139,11 +277,21 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; smp->class_version = OPA_SM_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; - ibp->rvp.tid++; - smp->tid = cpu_to_be64(ibp->rvp.tid); + + /* Only update the transaction ID for new traps (o13-5). */ + if (trap->tid == 0) { + ibp->rvp.tid++; + /* make sure that tid != 0 */ + if (ibp->rvp.tid == 0) + ibp->rvp.tid++; + trap->tid = cpu_to_be64(ibp->rvp.tid); + } + smp->tid = trap->tid; + smp->attr_id = IB_SMP_ATTR_NOTICE; /* o14-1: smp->mkey = 0; */ - memcpy(smp->route.lid.data, data, len); + + memcpy(smp->route.lid.data, &trap->data, trap->len); spin_lock_irqsave(&ibp->rvp.lock, flags); if (!ibp->rvp.sm_ah) { @@ -152,31 +300,72 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid); if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - } else { - send_buf->ah = ah; - ibp->rvp.sm_ah = ibah_to_rvtah(ah); - ret = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } + send_buf->ah = ah; + ibp->rvp.sm_ah = ibah_to_rvtah(ah); } else { - ret = -EINVAL; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } } else { send_buf->ah = &ibp->rvp.sm_ah->ibah; - ret = 0; } + + /* + * If the trap was repressed while things were getting set up, don't + * bother sending it. This could happen for a retry. + */ + if (trap->repress) { + list_del(&trap->list); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + kfree(trap); + ib_free_send_mad(send_buf); + return; + } + + trap->in_use = 0; spin_unlock_irqrestore(&ibp->rvp.lock, flags); - if (!ret) - ret = ib_post_send_mad(send_buf, NULL); - if (!ret) { - /* 4.096 usec. */ - timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000; - ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout); - } else { + if (ib_post_send_mad(send_buf, NULL)) ib_free_send_mad(send_buf); - ibp->rvp.trap_timeout = 0; +} + +void hfi1_handle_trap_timer(unsigned long data) +{ + struct hfi1_ibport *ibp = (struct hfi1_ibport *)data; + struct trap_node *trap = NULL; + unsigned long flags; + int i; + + /* Find the trap with the highest priority */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) { + trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list, + struct trap_node, list); } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + if (trap) + send_trap(ibp, trap); +} + +static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid) +{ + struct trap_node *trap; + + trap = kzalloc(sizeof(*trap), GFP_ATOMIC); + if (!trap) + return NULL; + + INIT_LIST_HEAD(&trap->list); + trap->data.generic_type = type; + trap->data.prod_type_lsb = IB_NOTICE_PROD_CA; + trap->data.trap_num = trap_num; + trap->data.issuer_lid = cpu_to_be32(lid); + + return trap; } /* @@ -185,28 +374,29 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, u32 qp1, u32 qp2, u16 lid1, u16 lid2) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; u32 _lid1 = lid1; u32 _lid2 = lid2; - memset(&data, 0, sizeof(data)); ibp->rvp.n_pkt_drops++; ibp->rvp.pkey_violations++; + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_P_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_257_258.lid1 = cpu_to_be32(_lid1); - data.ntc_257_258.lid2 = cpu_to_be32(_lid2); - data.ntc_257_258.key = cpu_to_be32(key); - data.ntc_257_258.sl = sl << 3; - data.ntc_257_258.qp1 = cpu_to_be32(qp1); - data.ntc_257_258.qp2 = cpu_to_be32(qp2); - - send_trap(ibp, &data, sizeof(data)); + trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1); + trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2); + trap->data.ntc_257_258.key = cpu_to_be32(key); + trap->data.ntc_257_258.sl = sl << 3; + trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1); + trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2); + + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -215,34 +405,36 @@ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_M_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_256.lid = data.issuer_lid; - data.ntc_256.method = mad->method; - data.ntc_256.attr_id = mad->attr_id; - data.ntc_256.attr_mod = mad->attr_mod; - data.ntc_256.mkey = mkey; + trap->data.ntc_256.lid = trap->data.issuer_lid; + trap->data.ntc_256.method = mad->method; + trap->data.ntc_256.attr_id = mad->attr_id; + trap->data.ntc_256.attr_mod = mad->attr_mod; + trap->data.ntc_256.mkey = mkey; if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - data.ntc_256.dr_slid = dr_slid; - data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; - if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) { - data.ntc_256.dr_trunc_hop |= + trap->data.ntc_256.dr_slid = dr_slid; + trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; + if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) { + trap->data.ntc_256.dr_trunc_hop |= IB_NOTICE_TRAP_DR_TRUNC; - hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path); + hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path); } - data.ntc_256.dr_trunc_hop |= hop_cnt; - memcpy(data.ntc_256.dr_rtn_path, return_path, + trap->data.ntc_256.dr_trunc_hop |= hop_cnt; + memcpy(trap->data.ntc_256.dr_rtn_path, return_path, hop_cnt); } - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + + send_trap(ibp, trap); } /* @@ -250,23 +442,24 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, */ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) { - struct opa_mad_notice_attr data; + struct trap_node *trap; struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); - data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -274,19 +467,19 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) */ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_SYSGUID; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; - data.ntc_145.lid = data.issuer_lid; + trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; + trap->data.ntc_145.lid = trap->data.issuer_lid; - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -294,20 +487,21 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) */ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.change_flags = + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.change_flags = cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, @@ -4144,6 +4338,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, */ ret = IB_MAD_RESULT_SUCCESS; break; + case IB_MGMT_METHOD_TRAP_REPRESS: + subn_handle_opa_trap_repress(ibp, smp); + /* Always successful */ + ret = IB_MAD_RESULT_SUCCESS; + break; default: smp->status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index a4e2506bd5ca..4c1245072093 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -428,5 +428,6 @@ struct sc2vlnt { COUNTER_MASK(1, 4)) void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); +void hfi1_handle_trap_timer(unsigned long data); #endif /* _HFI1_MAD_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3ef6384eae40..dc51bf247006 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1535,6 +1535,11 @@ static void init_ibport(struct hfi1_pportdata *ppd) ibp->sc_to_sl[i] = i; } + for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) + INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); + setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, + (unsigned long)ibp); + spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 22fb15ff5e8b..fdfac0fd2f82 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -57,11 +57,21 @@ #include #include #include +#include #include #include #define RVT_MAX_PKEY_VALUES 16 +#define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ +#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ + +struct trap_list { + u32 list_len; + struct list_head list; +}; + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -128,6 +138,13 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; + + /* + * Keep a list of traps that have not been repressed. They will be + * resent based on trap_timer. + */ + struct trap_list trap_lists[RVT_MAX_TRAP_LISTS]; + struct timer_list trap_timer; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit v1.2.3-59-g8ed1b From 59ec8736956456b7429bbfd613b288928416a0f0 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 24 Jul 2017 07:46:18 -0700 Subject: IB/hfi1: Fix code consistency for if/else blocks in chip.c Code structure is not consistent for if/else blocks and break instructions in set_link_state for case HLS_UP_INIT. Physical state uses break in case of an error and if/else blocks for logical use cases. These blocks should be implemented consistently. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a62ef4acdca4..789dbc40ebd0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10569,18 +10569,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); - } else { - /* clear old transient LINKINIT_REASON code */ - if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) - ppd->linkinit_reason = - OPA_LINKINIT_REASON_LINKUP; + break; + } - /* enable the port */ - add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + /* clear old transient LINKINIT_REASON code */ + if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) + ppd->linkinit_reason = + OPA_LINKINIT_REASON_LINKUP; - handle_linkup_change(dd, 1); - ppd->host_link_state = HLS_UP_INIT; - } + /* enable the port */ + add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + + handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) -- cgit v1.2.3-59-g8ed1b From a156abb3cf14112d3aa3f3a9ba880d48cf064cef Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 24 Jul 2017 07:46:24 -0700 Subject: IB/hfi1: Fix initialization failure for debug firmware Loading debug signed firmware fails if started immediately after failed attempt to load production firmware. A short delay is required so add about a 100us delay after RSA check failure. Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Reviewed-by: Mike Marciniszyn Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 4042c11b2742..83efd6bec657 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -615,6 +615,14 @@ retry: fw_fabric_serdes_name = ALT_FW_FABRIC_NAME; fw_sbus_name = ALT_FW_SBUS_NAME; fw_pcie_serdes_name = ALT_FW_PCIE_NAME; + + /* + * Add a delay before obtaining and loading debug firmware. + * Authorization will fail if the delay between firmware + * authorization events is shorter than 50us. Add 100us to + * make a delay time safe. + */ + usleep_range(100, 120); } if (fw_sbus_load) { -- cgit v1.2.3-59-g8ed1b From a618b7e40af2b2b751790d602ffa93800b594eca Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Mon, 24 Jul 2017 07:46:30 -0700 Subject: IB/hfi1: Move saving PCI values to a separate function During PCIe initialization some registers' values from PCI config space are saved in order to restore them later (i.e. after reset). Restoring those value is done by a function called restore_pci_variables, while saving them is put directly into function hfi1_pcie_ddinit. Move saving values to a separate function in the image of restoring functionality. Reviewed-by: Jakub Byczkowski Reviewed-by: Mike Marciniszyn Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 ++ drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/pcie.c | 110 ++++++++++++++++++++------------------ 3 files changed, 64 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 789dbc40ebd0..4a4405ee9302 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14866,6 +14866,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_free; + /* Save PCI space registers to rewrite after device reset */ + ret = save_pci_variables(dd); + if (ret < 0) + goto bail_cleanup; + /* verify that reads actually work, save revision for reset check */ dd->revision = read_csr(dd, CCE_REVISION); if (dd->revision == ~(u64)0) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2ce3fc58b61b..2d32c5c314a3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1835,6 +1835,7 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); int restore_pci_variables(struct hfi1_devdata *dd); +int save_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index cc7be224095d..82447b7cdda1 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -221,63 +221,11 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) } dd_dev_info(dd, "WC RcvArray: %p for %x\n", dd->rcvarray_wc, dd->chip_rcv_array_count * 8); - /* - * Save BARs and command to rewrite after device reset. - */ - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - if (ret) - goto read_error; - - ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, - &dd->pcie_devctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, - &dd->pcie_lnkctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto read_error; dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; - -read_error: - dd_dev_err(dd, "Unable to read from PCI config\n"); - goto bail_error; nomem: ret = -ENOMEM; -bail_error: hfi1_pcie_ddcleanup(dd); return ret; } @@ -484,6 +432,64 @@ error: return ret; } +/* Save BARs and command to rewrite after device reset */ +int save_pci_variables(struct hfi1_devdata *dd) +{ + int ret = 0; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + &dd->pcibar0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + &dd->pcibar1); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; +} + /* * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. -- cgit v1.2.3-59-g8ed1b From 5e2d6764a729bf8d43894b0368b0ae11a19485c0 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 24 Jul 2017 07:46:36 -0700 Subject: IB/hfi1: Verify port data VLs credits on transition to Armed There is a window where the FM can read the buffer control table and decide not to program buffers. When a port goes down, the code clears the table and if it is not programmed, posted SDMA descriptors will never complete due to no buffer credits. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++++++++++++- drivers/infiniband/hw/hfi1/intr.c | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a4405ee9302..8f7ce74a15de 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10489,6 +10489,14 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, } } +/* + * Verify if BCT for data VLs is non-zero. + */ +static inline bool data_vls_operational(struct hfi1_pportdata *ppd) +{ + return !!ppd->actual_vls_operational; +} + /* * Change the physical and/or logical link state. * @@ -10587,6 +10595,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_INIT) goto unexpected; + if (!data_vls_operational(ppd)) { + dd_dev_err(dd, + "%s: data VLs not operational\n", __func__); + ret = -EINVAL; + break; + } + ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); @@ -14832,7 +14847,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, } ppd->vls_supported = num_vls; ppd->vls_operational = ppd->vls_supported; - ppd->actual_vls_operational = ppd->vls_supported; /* Set the default MTU. */ for (vl = 0; vl < num_vls; vl++) dd->vld[vl].mtu = hfi1_max_mtu; diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 9469be9940e0..96845dfed5c5 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -164,6 +164,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) ppd->linkup = 0; /* clear HW details of the previous connection */ + ppd->actual_vls_operational = 0; reset_link_credits(dd); /* freeze after a link down to guarantee a clean egress */ -- cgit v1.2.3-59-g8ed1b From f13a6e5e2e0192737c3bdbdb16c5cc0181cc86e5 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:42 -0700 Subject: IB/hfi1: Split copy_to_user data copy for better security A copy_to_user() call assumes that two members of a data structure are sequential. Since this may not always be true, separate the copies to ensure a safe copy. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7be75e0d4f7e..650c1e578775 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -268,12 +268,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, /* * Copy the number of tidlist entries we used * and the length of the buffer we registered. - * These fields are adjacent in the structure so - * we can copy them at the same time. */ addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt) + + sizeof(tinfo.tidcnt))) + return -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); + if (copy_to_user((void __user *)addr, &tinfo.length, sizeof(tinfo.length))) ret = -EFAULT; } -- cgit v1.2.3-59-g8ed1b From 5efd40cad4bf98f0d1f6b726d67e39decd85edc9 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Sat, 29 Jul 2017 08:43:20 -0700 Subject: IB/hfi1: Harden state transition to Armed and Active There is a window that allows other threads to read state of 'host_link_state' as a new, before the hardware actual state is set. This patch closes the window by indicating a new state only after hardware transition is complete. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8f7ce74a15de..eca31dd49cae 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10602,16 +10602,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; } - ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); if (ret) { - /* logical state didn't change, stay at init */ - ppd->host_link_state = HLS_UP_INIT; dd_dev_err(dd, "%s: logical state did not change to ARMED\n", __func__); + break; } + ppd->host_link_state = HLS_UP_ARMED; /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first @@ -10624,18 +10623,16 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_ARMED) goto unexpected; - ppd->host_link_state = HLS_UP_ACTIVE; set_logical_state(dd, LSTATE_ACTIVE); ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000); if (ret) { - /* logical state didn't change, stay at armed */ - ppd->host_link_state = HLS_UP_ARMED; dd_dev_err(dd, "%s: logical state did not change to ACTIVE\n", __func__); } else { /* tell all engines to go running */ sdma_all_running(dd); + ppd->host_link_state = HLS_UP_ACTIVE; /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.rdi.ibdev; -- cgit v1.2.3-59-g8ed1b From 96603ed865b3c5a9a0d1c9ed434661519d49fde3 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:26 -0700 Subject: IB/hfi1: Do not enable disabled port on cable insert Fix issue where a disabled port can be enabled by inserting a cable. The port should be explicitly enabled instead. Reviewed-by: Michael J. Ruhl Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index eca31dd49cae..bd594e7b0078 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9306,12 +9306,6 @@ int start_link(struct hfi1_pportdata *ppd) */ tune_serdes(ppd); - if (!ppd->link_enabled) { - dd_dev_info(ppd->dd, - "%s: stopping link start because link is disabled\n", - __func__); - return 0; - } if (!ppd->driver_link_ready) { dd_dev_info(ppd->dd, "%s: stopping link start because driver is not ready\n", @@ -9529,6 +9523,13 @@ void qsfp_event(struct work_struct *work) if (!qsfp_mod_present(ppd)) return; + if (ppd->host_link_state == HLS_DN_DISABLE) { + dd_dev_info(ppd->dd, + "%s: stopping link start because link is disabled\n", + __func__); + return; + } + /* * Turn DC back on after cable has been re-inserted. Up until * now, the DC has been in reset to save power. -- cgit v1.2.3-59-g8ed1b From e87473bc1b6c2cb08f1b760cfc8cd012822241a6 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Sat, 29 Jul 2017 08:43:32 -0700 Subject: IB/hfi1: Only set fd pointer when base context is completely initialized The allocate_ctxt() function adds the context to the fd data structure. Since the context is not completely initialized, this can cause confusion as to whether the context is valid or not. Move the fd reference from allocate_ctxt() to setup_base_ctxt(). Update the necessary functions to be aware of this move. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 55 ++++++++++++++++++------------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 4 +-- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 4 +-- drivers/infiniband/hw/hfi1/user_sdma.h | 3 +- 5 files changed, 40 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 650c1e578775..a0c13fa5babb 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -81,19 +81,22 @@ static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); -static int init_user_ctxt(struct hfi1_filedata *fd); +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); -static int setup_base_ctxt(struct hfi1_filedata *fd); +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo); + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd); static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); @@ -759,7 +762,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) flush_wc(); /* drain user sdma queue */ - hfi1_user_sdma_free_queues(fdata); + hfi1_user_sdma_free_queues(fdata, uctxt); /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); @@ -845,6 +848,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { int ret; unsigned int swmajor, swminor; + struct hfi1_ctxtdata *uctxt = NULL; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) @@ -870,7 +874,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) * couldn't find a sub context. */ if (!ret) - ret = allocate_ctxt(fd, fd->dd, uinfo); + ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); mutex_unlock(&hfi1_mutex); @@ -888,28 +892,27 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, fd->uctxt); if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); } else if (!ret) { - ret = setup_base_ctxt(fd); - if (fd->uctxt->subctxt_cnt) { + ret = setup_base_ctxt(fd, uctxt); + if (uctxt->subctxt_cnt) { /* If there is an error, set the failed bit. */ if (ret) set_bit(HFI1_CTXT_BASE_FAILED, - &fd->uctxt->event_flags); + &uctxt->event_flags); /* * Base context is done, notify anybody using a * sub-context that is waiting for this completion */ - clear_bit(HFI1_CTXT_BASE_UNINIT, - &fd->uctxt->event_flags); - wake_up(&fd->uctxt->wait); + clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); + wake_up(&uctxt->wait); } if (ret) - deallocate_ctxt(fd->uctxt); + deallocate_ctxt(uctxt); } /* If an error occurred, clear the reference */ @@ -976,7 +979,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, } static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo) + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd) { struct hfi1_ctxtdata *uctxt; u16 ctxt; @@ -1071,14 +1075,13 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, */ if (dd->freectxts-- == dd->num_user_contexts) aspm_disable_all(dd); - fd->uctxt = uctxt; - /* Count the reference for the fd */ - hfi1_rcd_get(uctxt); + *cd = uctxt; return 0; ctxdata_free: + *cd = NULL; dd->rcd[ctxt] = NULL; hfi1_rcd_put(uctxt); return ret; @@ -1243,23 +1246,25 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, return ret; } -static int init_user_ctxt(struct hfi1_filedata *fd) +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret; ret = hfi1_user_sdma_alloc_queues(uctxt, fd); if (ret) return ret; - ret = hfi1_user_exp_rcv_init(fd); + ret = hfi1_user_exp_rcv_init(fd, uctxt); + if (ret) + hfi1_user_sdma_free_queues(fd, uctxt); return ret; } -static int setup_base_ctxt(struct hfi1_filedata *fd) +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; @@ -1284,12 +1289,16 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, uctxt); if (ret) goto setup_failed; user_init(uctxt); + /* Now that the context is set up, the fd can get a reference. */ + fd->uctxt = uctxt; + hfi1_rcd_get(uctxt); + return 0; setup_failed: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3baf71944471..d9036ba3f4eb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -104,9 +104,9 @@ static struct mmu_rb_ops tid_rb_ops = { * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 1bdc61be53cb..6cbaa4cf4970 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,7 +51,8 @@ #include "exp_rcv.h" -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 64d31eb9bd82..8f02707e9c95 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -437,9 +437,9 @@ pq_reqs_nomem: return ret; } -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 73c2455189d3..84c199d3e003 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -81,7 +81,8 @@ struct hfi1_user_sdma_comp_q { int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); -- cgit v1.2.3-59-g8ed1b From c822652ea669f8f46e4e8e2fc0618f2f72103f14 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:37 -0700 Subject: IB/hfi1: Disambiguate corruption and uninitialized error cases The error messages when checksum validation of the platform configuration fields populated into the ASIC scratch registers fails are ambiguous. Disambiguate them. Reviewed-by: Jakub Byczkowski Signed-off-by: Easwar Hariharan Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 41307e474525..5c38a24484ab 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -58,8 +58,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; /* Prevent power on default of all zeroes from passing checksum */ - if (!version) + if (!version) { + dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); return 0; + } /* * ASIC scratch 0 only contains the checksum and bitmap version as @@ -84,6 +89,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) if (checksum + temp_scratch == 0xFFFF) return 1; + + dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__); return 0; } @@ -144,11 +151,6 @@ void get_platform_config(struct hfi1_devdata *dd) save_platform_config_fields(dd); return; } - dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", - __func__); - dd_dev_err(dd, - "%s: Please update your BIOS to support active channels\n", - __func__); } else { ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, -- cgit v1.2.3-59-g8ed1b From 3ffea7d8cd9e3f8f96514ac499f2510ad2f31d11 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 29 Jul 2017 08:43:43 -0700 Subject: IB/{rdmavt, hfi1, qib}: Fix panic with post receive and SGE compression The server side of qperf panics as follows: [242446.336860] IP: report_bug+0x64/0x10 [242446.341031] PGD 1c0c067 [242446.341032] P4D 1c0c067 [242446.343951] PUD 1c0d063 [242446.346870] PMD 8587ea067 [242446.349788] PTE 800000083e14016 [242446.352901] [242446.358352] Oops: 0003 [#1] SM [242446.437919] CPU: 1 PID: 7442 Comm: irq/92-hfi1_0 k Not tainted 4.12.0-mam-asm #1 [242446.446365] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0018.C4.072020161249 07/20/201 [242446.458397] task: ffff8808392d2b80 task.stack: ffffc9000664000 [242446.465097] RIP: 0010:report_bug+0x64/0x10 [242446.469859] RSP: 0018:ffffc900066439c0 EFLAGS: 0001000 [242446.475784] RAX: ffffffffa06647e4 RBX: ffffffffa06461e1 RCX: 000000000000000 [242446.483840] RDX: 0000000000000907 RSI: ffffffffa0675040 RDI: ffffffffffff740 [242446.491897] RBP: ffffc900066439e0 R08: 0000000000000001 R09: 000000000000025 [242446.499953] R10: ffffffff81a253df R11: 0000000000000133 R12: ffffc90006643b3 [242446.508010] R13: ffffffffa065bbf0 R14: 00000000000001e5 R15: 000000000000000 [242446.516067] FS: 0000000000000000(0000) GS:ffff88085f640000(0000) knlGS:000000000000000 [242446.525191] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003 [242446.531698] CR2: ffffffffa06647ee CR3: 0000000001c09000 CR4: 00000000001406e [242446.539756] Call Trace [242446.542582] fixup_bug+0x2c/0x5 [242446.546277] do_trap+0x12b/0x18 [242446.549972] do_error_trap+0x89/0x11 [242446.554171] ? hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.559324] ? ttwu_do_wakeup+0x1e/0x14 [242446.563795] ? ttwu_do_activate+0x77/0x8 [242446.568363] do_invalid_op+0x20/0x3 [242446.572448] invalid_op+0x1e/0x3 [242446.576247] RIP: 0010:hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.582075] RSP: 0018:ffffc90006643be8 EFLAGS: 0001004 [242446.587999] RAX: 0000000000000000 RBX: ffff88083e0fa240 RCX: 000000000000000 [242446.596058] RDX: 0000000000000000 RSI: ffff880842508000 RDI: ffff88083e0fa24 [242446.604116] RBP: ffffc90006643c28 R08: 0000000000000000 R09: 000000000000000 [242446.612172] R10: ffffc90009473640 R11: 0000000000000133 R12: 000000000000000 [242446.620228] R13: 0000000000000000 R14: 0000000000002000 R15: ffff88084250800 [242446.628293] ? hfi1_copy_sge+0x1a1/0x2b0 [hfi1 [242446.633449] hfi1_rc_rcv+0x3da/0x1270 [hfi1 [242446.638312] ? sc_buffer_alloc+0x113/0x150 [hfi1 [242446.643662] hfi1_ib_rcv+0x1c9/0x2e0 [hfi1 [242446.648428] process_receive_ib+0x19a/0x270 [hfi1 [242446.653866] ? process_rcv_qp_work+0xd2/0x160 [hfi1 [242446.659505] handle_receive_interrupt_nodma_rtail+0x184/0x2e0 [hfi1 [242446.666693] ? irq_finalize_oneshot+0x100/0x10 [242446.671846] receive_context_thread+0x1b/0x140 [hfi1 [242446.677576] irq_thread_fn+0x1e/0x4 [242446.681659] irq_thread+0x13c/0x1b [242446.685646] ? irq_forced_thread_fn+0x60/0x6 [242446.690604] kthread+0x112/0x15 [242446.694298] ? irq_thread_check_affinity+0xe0/0xe [242446.699738] ? kthread_park+0x60/0x6 [242446.703919] ? do_syscall_64+0x67/0x15 [242446.708292] ret_from_fork+0x25/0x3 [242446.712374] Code: 63 78 04 44 0f b7 70 08 41 89 d0 4c 8d 2c 38 41 83 e0 01 f6 c2 02 74 17 66 45 85 c0 74 11 f6 c2 04 b9 01 00 00 00 75 bb 83 ca 04 <66> 89 50 0a 66 45 85 c0 74 52 0f b6 48 0b 41 0f b7 f6 4d 89 e0 [242446.733527] RIP: report_bug+0x64/0x100 RSP: ffffc900066439c [242446.739935] CR2: ffffffffa06647e [242446.743763] ---[ end trace 0e90a20d0aa494f7 ]-- The root cause is that the qib/hfi1 post receive call to rvt_lkey_ok() doesn't interpret the new return value from rvt_lkey_ok() properly leading to an mr reference count underrun. Additionally, remove an unused argument in rvt_sge_adjacent() aw well as an unneeded incr local in rvt_post_one_wr(). Fixes: Commit 14fe13fcd3af ("IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok()") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 6 ++++-- drivers/infiniband/hw/qib/qib_ruc.c | 6 ++++-- drivers/infiniband/sw/rdmavt/mr.c | 8 +++----- drivers/infiniband/sw/rdmavt/qp.c | 16 +++++++--------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index f13ddb273d77..4afa00f921f2 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -74,8 +74,10 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 28528459a052..e6a42a8972f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -58,8 +58,10 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ea95672d9675..1b3801f78e78 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -779,7 +779,6 @@ out: /** * rvt_sge_adjacent - is isge compressible - * @isge: outgoing internal SGE * @last_sge: last outgoing SGE written * @sge: SGE to check * @@ -787,8 +786,7 @@ out: * * Return: true if isge is adjacent to last sge */ -static inline bool rvt_sge_adjacent(struct rvt_sge *isge, - struct rvt_sge *last_sge, +static inline bool rvt_sge_adjacent(struct rvt_sge *last_sge, struct ib_sge *sge) { if (last_sge && sge->lkey == last_sge->mr->lkey && @@ -840,7 +838,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, if (pd->user) return -EINVAL; - if (rvt_sge_adjacent(isge, last_sge, sge)) + if (rvt_sge_adjacent(last_sge, sge)) return 0; rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); @@ -857,7 +855,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - if (rvt_sge_adjacent(isge, last_sge, sge)) + if (rvt_sge_adjacent(last_sge, sge)) return 0; rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 740611e4692a..1878a97364aa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1613,7 +1613,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; - int ret, incr; + int ret; size_t cplen; bool reserved_op; int local_ops_delayed = 0; @@ -1695,14 +1695,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp, if (length == 0) continue; - incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, - &wr->sg_list[i], acc); - if (unlikely(incr < 0)) - goto bail_lkey_error; + ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, + &wr->sg_list[i], acc); + if (unlikely(ret < 0)) + goto bail_inval_free; wqe->length += length; - if (incr) + if (ret) last_sge = &wqe->sg_list[j]; - j += incr; + j += ret; } wqe->wr.num_sge = j; } @@ -1755,8 +1755,6 @@ static int rvt_post_one_wr(struct rvt_qp *qp, return 0; -bail_lkey_error: - ret = incr; bail_inval_free: /* release mr holds */ while (j) { -- cgit v1.2.3-59-g8ed1b From 71d47008ca1b2ab10e0432e72e572c7ce5d8d63b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:49 -0700 Subject: IB/hfi1: Create workqueue for link events Currently, link down interrupts queue link entries on a workqueue intended for sending events only. Create a workqueue for queuing link events. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 20 ++++++++++---------- drivers/infiniband/hw/hfi1/driver.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 26 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/pio.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 6 files changed, 40 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bd594e7b0078..8443d41c6e35 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5545,7 +5545,7 @@ static void update_rcverr_timer(unsigned long opaque) set_link_down_reason( ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; @@ -6100,7 +6100,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) * will not happen. We have to do it here * before turning the DC off. */ - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } else { dd_dev_info(dd, "%s: QSFP module inserted\n", @@ -6135,7 +6135,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) /* Schedule the QSFP work only if there is a cable attached. */ if (qsfp_mod_present(ppd)) - queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work); + queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work); } static int request_host_lcb_access(struct hfi1_devdata *dd) @@ -7738,12 +7738,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)HOST_REQ_DONE; } if (host_msg & BC_SMA_MSG) { - queue_work(ppd->hfi1_wq, &ppd->sma_message_work); + queue_work(ppd->link_wq, &ppd->sma_message_work); host_msg &= ~(u64)BC_SMA_MSG; } if (host_msg & LINKUP_ACHIEVED) { dd_dev_info(dd, "8051: Link up\n"); - queue_work(ppd->hfi1_wq, &ppd->link_up_work); + queue_work(ppd->link_wq, &ppd->link_up_work); host_msg &= ~(u64)LINKUP_ACHIEVED; } if (host_msg & EXT_DEVICE_CFG_REQ) { @@ -7751,7 +7751,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; } if (host_msg & VERIFY_CAP_FRAME) { - queue_work(ppd->hfi1_wq, &ppd->link_vc_work); + queue_work(ppd->link_wq, &ppd->link_vc_work); host_msg &= ~(u64)VERIFY_CAP_FRAME; } if (host_msg & LINK_GOING_DOWN) { @@ -7766,7 +7766,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)LINK_GOING_DOWN; } if (host_msg & LINK_WIDTH_DOWNGRADED) { - queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work); + queue_work(ppd->link_wq, &ppd->link_downgrade_work); host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED; } if (host_msg) { @@ -7809,7 +7809,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } } @@ -8017,7 +8017,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n", __func__); set_link_down_reason(ppd, lcl_reason, 0, lcl_reason); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } } @@ -9685,7 +9685,7 @@ static void try_start_link(struct hfi1_pportdata *ppd) "QSFP not responding, waiting and retrying %d\n", (int)ppd->qsfp_retry_count); ppd->qsfp_retry_count++; - queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + queue_delayed_work(ppd->link_wq, &ppd->start_link_work, msecs_to_jiffies(QSFP_RETRY_WAIT)); return; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01235d4757a5..0b7ca0e05320 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -914,7 +914,7 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, return 0; } - queue_work(rcd->ppd->hfi1_wq, lsaw); + queue_work(rcd->ppd->link_wq, lsaw); return 1; } return 0; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2d32c5c314a3..ee6c389f9515 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -576,6 +576,7 @@ struct hfi1_pportdata { /* SendDMA related entries */ struct workqueue_struct *hfi1_wq; + struct workqueue_struct *link_wq; /* move out of interrupt context */ struct work_struct link_vc_work; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7f5e4c7dca89..be027c9c880e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -660,6 +660,20 @@ static int create_workqueues(struct hfi1_devdata *dd) if (!ppd->hfi1_wq) goto wq_error; } + if (!ppd->link_wq) { + /* + * Make the link workqueue single-threaded to enforce + * serialization. + */ + ppd->link_wq = + alloc_workqueue( + "hfi_link_%d_%d", + WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, + 1, /* max_active */ + dd->unit, pidx); + if (!ppd->link_wq) + goto wq_error; + } } return 0; wq_error: @@ -670,6 +684,10 @@ wq_error: destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } return -ENOMEM; } @@ -954,6 +972,10 @@ static void shutdown_device(struct hfi1_devdata *dd) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } sdma_exit(dd); } @@ -1575,6 +1597,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } if (!j) hfi1_device_remove(dd); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index adb6a4da6107..7108a4b5e94c 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1012,7 +1012,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sc->sw_index, sc->hw_context, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 71b4258545c4..6781bcdb10b3 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -325,7 +325,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde, /* timed out - bounce the link */ dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sde->this_idx, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } -- cgit v1.2.3-59-g8ed1b From 626c077c025f9da6dce809b5a8300ed44ef02b6f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:55 -0700 Subject: IB/hfi1: Prevent link down request double queuing When link interrupts occur, multiple link down requests could be queued up when only one is needed. This could get the hfi1 out of sync with its link partner during LNI. Only allow one link down request to be queued at any one time. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 +++- drivers/infiniband/hw/hfi1/hfi.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8443d41c6e35..a3af46cbbf8d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7045,6 +7045,7 @@ void handle_link_down(struct work_struct *work) /* Go offline first, then deal with reading/writing through 8051 */ was_up = !!(ppd->host_link_state & HLS_UP); set_link_state(ppd, HLS_DN_OFFLINE); + xchg(&ppd->is_link_down_queued, 0); if (was_up) { lcl_reason = 0; @@ -7805,10 +7806,11 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || - ppd->link_enabled == 0) { + ppd->link_enabled == 0 || ppd->is_link_down_queued) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { + xchg(&ppd->is_link_down_queued, 1); queue_work(ppd->link_wq, &ppd->link_down_work); } } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee6c389f9515..fb5f8394fbed 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -644,6 +644,7 @@ struct hfi1_pportdata { /* placeholders for IB MAD packet settings */ u8 overrun_threshold; u8 phy_error_threshold; + unsigned int is_link_down_queued; /* Used to override LED behavior for things like maintenance beaconing*/ /* -- cgit v1.2.3-59-g8ed1b From 913cc67159bc85a96d94df301ca39c1b2c540dca Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:44:01 -0700 Subject: IB/hfi1: Always perform offline transition Always initiate an offline transition request when a link down occurs. The firmware will use this request to confirm that the driver has seen the link down message. A host version is set to indicate this driver behavior to the firmware. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 66 +++++++++++++++++------------------ drivers/infiniband/hw/hfi1/chip.h | 5 +++ drivers/infiniband/hw/hfi1/firmware.c | 8 +++++ 3 files changed, 46 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a3af46cbbf8d..101fbbbc2522 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8832,6 +8832,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } +int write_host_interface_version(struct hfi1_devdata *dd, u8 version) +{ + u32 frame; + u32 mask; + + mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); + read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); + /* Clear, then set field */ + frame &= ~mask; + frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); + return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, + frame); +} + void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch) { @@ -10262,49 +10276,35 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; - u32 pstate, previous_state; + u32 previous_state; int ret; - int do_transition; - int do_wait; update_lcb_cache(dd); previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; - pstate = read_physical_state(dd); - if (pstate == PLS_OFFLINE) { - do_transition = 0; /* in right state */ - do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xf0) == PLS_OFFLINE) { - do_transition = 0; /* in an offline transient state */ - do_wait = 1; /* ...wait for it to settle */ - } else { - do_transition = 1; /* need to move to offline */ - do_wait = 1; /* ...will need to wait */ - } - if (do_transition) { - ret = set_physical_link_state(dd, - (rem_reason << 8) | PLS_OFFLINE); + /* start offline transition */ + ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE); - if (ret != HCMD_SUCCESS) { - dd_dev_err(dd, - "Failed to transition to Offline link state, return %d\n", - ret); - return -EINVAL; - } - if (ppd->offline_disabled_reason == - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) - ppd->offline_disabled_reason = - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to transition to Offline link state, return %d\n", + ret); + return -EINVAL; } + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); - if (do_wait) { - /* it can take a while for the link to go down */ - ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); - if (ret < 0) - return ret; - } + /* + * Wait for offline transition. It can take a while for + * the link to go down. + */ + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); + if (ret < 0) + return ret; /* * Now in charge of LCB - must be after the physical state is diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index bef6301d825d..6a0c691f1385 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -384,6 +384,7 @@ #define VERIFY_CAP_LOCAL_FABRIC 0x08 #define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 #define LOCAL_DEVICE_ID 0x0a +#define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c #define REMOTE_LNI_INFO 0x0d #define MISC_STATUS 0x0e @@ -506,6 +507,9 @@ #define DOWN_REMOTE_REASON_SHIFT 16 #define DOWN_REMOTE_REASON_MASK 0xff +#define HOST_INTERFACE_VERSION_SHIFT 16 +#define HOST_INTERFACE_VERSION_MASK 0xff + /* verify capability PHY power management bits */ #define PWRM_BER_CONTROL 0x1 #define PWRM_BANDWIDTH_CONTROL 0x2 @@ -704,6 +708,7 @@ int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch); +int write_host_interface_version(struct hfi1_devdata *dd, u8 version); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 83efd6bec657..885714b93557 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -69,6 +69,7 @@ #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" +#define HOST_INTERFACE_VERSION 1 static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; @@ -1087,6 +1088,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd, dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", (int)ver_major, (int)ver_minor, (int)ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); + ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to set host interface version, return 0x%x\n", + ret); + return -EIO; + } return 0; } -- cgit v1.2.3-59-g8ed1b From 582faf3150f57b8364ac9d2aa731d7368ada7a4b Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:47 -0400 Subject: IB/core: Change port_attr.lid size from 16 to 32 bits lid field in struct ib_port_attr is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 5 ++++- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- include/rdma/ib_verbs.h | 2 +- include/rdma/opa_addr.h | 3 ++- 7 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 11ae67514e13..6b54280530c9 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,6 +38,7 @@ #include #include +#include #include #include "mad_priv.h" diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..eef2623406cc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,8 +275,11 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.lid = attr.lid; resp.sm_lid = attr.sm_lid; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) + resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); + else + resp.lid = (u16)attr.lid; resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index ea24230ea0d4..5a897b0106a9 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev, memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - guid_info_rec.lid = cpu_to_be16(attr.lid); + guid_info_rec.lid = cpu_to_be16((u16)attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 21d31cb1325f..00f057033cb9 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df3db71777a..617531f1bfc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..4fa94e69b1fc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,8 +549,8 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 lid; u16 sm_lid; + u32 lid; u8 lmc; u8 max_vl_num; u8 sm_sl; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index eace28f1555d..46d0567fffea 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -50,7 +50,8 @@ #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) - +#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ + ? 0 : x) /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that -- cgit v1.2.3-59-g8ed1b From 7db20ecd1d9700e2c240dee505162eb56ab55b5b Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Thu, 8 Jun 2017 13:37:49 -0400 Subject: IB/core: Change wc.slid from 16 to 32 bits slid field in struct ib_wc is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 10 +++++++--- drivers/infiniband/hw/hfi1/mad.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 6 +++--- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- drivers/infiniband/sw/rdmavt/cq.c | 2 +- include/rdma/ib_verbs.h | 14 +++++++++++++- 10 files changed, 32 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2b4d613a3474..b39ee16aa479 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1703,7 +1703,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = cpu_to_be16(wc->slid); + req_msg->primary_local_lid = ib_slid_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } @@ -1713,7 +1713,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = cpu_to_be16(wc->slid); + req_msg->alt_local_lid = ib_slid_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 36a6f5c8914c..ff3c67a7aaad 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 01e2ff023980..eb0da3784bf4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1190,7 +1190,8 @@ out: return ret ? ret : in_len; } -static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, + struct ib_wc *wc) { struct ib_uverbs_wc tmp; @@ -1204,7 +1205,10 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) tmp.src_qp = wc->src_qp; tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; - tmp.slid = wc->slid; + if (rdma_cap_opa_ah(ib_dev, wc->port_num)) + tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); + else + tmp.slid = ib_slid_cpu16(wc->slid); tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; @@ -1248,7 +1252,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(data_ptr, &wc); + ret = copy_wc_to_user(ib_dev, data_ptr, &wc); if (ret) goto out_put; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673a52d4..00ebc26cd187 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3958,7 +3958,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = in_wc->slid; + u16 slid = ib_slid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 00f057033cb9..04fb44e7699e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, @@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); } else { tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid); } ib_dma_sync_single_for_device(&dev->ib_dev, @@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 95db929bdc34..cd2264ac88ae 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d83a53c0c67..e19ae0b9b439 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); - MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) @@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 617531f1bfc6..a9caadab22cf 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev, u16 *out_mad_pkey_index) { int err; - u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; const struct ib_mad *in_mad = (const struct ib_mad *)in; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0ae2ff8cf81e..0335a3df74d5 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].slid = ib_slid_cpu16(entry->slid); wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 620535908118..7eaf7d2ab424 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -948,7 +948,7 @@ struct ib_wc { u32 src_qp; int wc_flags; u16 pkey_index; - u16 slid; + u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ @@ -3706,4 +3706,16 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, else return RDMA_AH_ATTR_TYPE_IB; } + +/* Return slid in 16bit CPU encoding */ +static inline u16 ib_slid_cpu16(u32 slid) +{ + return (u16)slid; +} + +/* Return slid in 16bit BE encoding */ +static inline u16 ib_slid_be16(u32 slid) +{ + return cpu_to_be16((u16)slid); +} #endif /* IB_VERBS_H */ -- cgit v1.2.3-59-g8ed1b From 40b24403f33ed8e9401b087e25f46d002fc8f396 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:42 +0300 Subject: mlx5: support ->get_vector_affinity Simply refer to the generic affinity mask helper. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..a6d3bcfbf229 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3556,6 +3556,14 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev, + int comp_vector) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_get_vector_affinity(dev->mdev, comp_vector); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3686,6 +3694,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; -- cgit v1.2.3-59-g8ed1b From 9abb0d1bbd9529c574eacd8586e2bf68d17966cd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:49:53 +0300 Subject: RDMA: Simplify get firmware interface There is a need to forward FW version to user space application through RDMA netlink. In order to make it safe, there is need to declare nla_policy and limit the size of FW string. The new define IB_FW_VERSION_NAME_MAX will limit the size of FW version string. That define was chosen to be equal to ETHTOOL_FWVERS_LEN, because many drivers anyway are limited by that value indirectly. The introduction of this define allows us to remove the string size from get_fw_str function signature. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/core/sysfs.c | 4 ++-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 5 ++--- drivers/infiniband/hw/cxgb4/provider.c | 5 ++--- drivers/infiniband/hw/hfi1/verbs.c | 5 ++--- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 7 +++---- drivers/infiniband/hw/mlx4/main.c | 5 ++--- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- drivers/infiniband/hw/mthca/mthca_provider.c | 5 ++--- drivers/infiniband/hw/nes/nes_verbs.c | 5 ++--- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 5 ++--- drivers/infiniband/hw/qedr/main.c | 5 ++--- drivers/infiniband/hw/usnic/usnic_ib_main.c | 6 ++---- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 5 ++--- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 +-- include/rdma/ib_verbs.h | 6 ++++-- 16 files changed, 36 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 66b109bc6753..fbc92c649be8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -336,10 +336,10 @@ static int read_port_immutable(struct ib_device *device) return 0; } -void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->get_dev_fw_str) - dev->get_dev_fw_str(dev, str, str_len); + dev->get_dev_fw_str(dev, str); else str[0] = '\0'; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7ebe1ef23652..abc5ab581f82 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1210,8 +1210,8 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct ib_device *dev = container_of(device, struct ib_device, dev); - ib_get_device_fw_str(dev, buf, PAGE_SIZE); - strlcat(buf, "\n", PAGE_SIZE); + ib_get_device_fw_str(dev, buf); + strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); return strlen(buf); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 0cd0c1fa27d4..099e76f3758a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1336,8 +1336,7 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) { struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); struct ethtool_drvinfo info; @@ -1345,7 +1344,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, pr_debug("%s dev 0x%p\n", __func__, iwch_dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, str_len, "%s", info.fw_version); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } int iwch_register_device(struct iwch_dev *dev) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0771e9a4d061..346e8334279a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -517,14 +517,13 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *dev, char *str) { struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); pr_debug("%s dev 0x%p\n", __func__, dev); - snprintf(str, str_len, "%u.%u.%u.%u", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u", FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc51bf247006..c88c03c11555 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1561,14 +1561,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } -static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver), dc8051_ver_min(ver), dc8051_ver_patch(ver)); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 02d871db7ca5..1aa411034a27 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2584,13 +2584,12 @@ static const char * const i40iw_hw_stat_names[] = { "iwRdmaInv" }; -static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str) { u32 firmware_version = I40IW_FW_VERSION; - snprintf(str, str_len, "%u.%u", firmware_version, - (firmware_version & 0x000000ff)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version, + (firmware_version & 0x000000ff)); } /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1f25a37eb056..c636842c5be0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2587,12 +2587,11 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_fw_ver_str(struct ib_device *device, char *str, - size_t str_len) +static void get_fw_ver_str(struct ib_device *device, char *str) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d", (int) (dev->dev->caps.fw_ver >> 32), (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, (int) dev->dev->caps.fw_ver & 0xffff); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9279631d8da0..0a5a4e3fa66d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3285,13 +3285,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *ibdev, char *str) { struct mlx5_ib_dev *dev = container_of(ibdev, struct mlx5_ib_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d", + fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), + fw_rev_sub(dev->mdev)); } static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c197cd9b193f..eae9bffd45d4 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1178,12 +1178,11 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *device, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *device, char *str) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d", (int) (dev->fw_ver >> 32), (int) (dev->fw_ver >> 16) & 0xffff, (int) dev->fw_ver & 0xffff); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 25dcd7573df9..c2943e39d2f9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3672,15 +3672,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *dev, char *str) { struct nes_ib_device *nesibdev = container_of(dev, struct nes_ib_device, ibdev); struct nes_vnic *nesvnic = nesibdev->nesvnic; nes_debug(NES_DBG_INIT, "\n"); - snprintf(str, str_len, "%u.%u", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", (nesvnic->nesdev->nesadapter->firmware_version >> 16), (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 757c65816295..fbfbd9e96147 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -107,12 +107,11 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *device, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *device, char *str) { struct ocrdma_dev *dev = get_ocrdma_dev(device); - snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]); } static int ocrdma_register_device(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 199b6edbef92..97d033f51dc9 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -68,13 +68,12 @@ static enum rdma_link_layer qedr_link_layer(struct ib_device *device, return IB_LINK_LAYER_ETHERNET; } -static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) { struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, str_len, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index e69c8e476a2b..e86700f994cb 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -333,9 +333,7 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void usnic_get_dev_fw_str(struct ib_device *device, - char *str, - size_t str_len) +static void usnic_get_dev_fw_str(struct ib_device *device, char *str) { struct usnic_ib_dev *us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev); @@ -345,7 +343,7 @@ static void usnic_get_dev_fw_str(struct ib_device *device, us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); mutex_unlock(&us_ibdev->usdev_lock); - snprintf(str, str_len, "%s", info.fw_version); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } /* Start of PF discovery section */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e76565280afa..7f29e4db28a1 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -102,12 +102,11 @@ static struct device_attribute *pvrdma_class_attributes[] = { &dev_attr_board_id }; -static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, - size_t str_len) +static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) { struct pvrdma_dev *dev = container_of(device, struct pvrdma_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d\n", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", (int) (dev->dsr->caps.fw_ver >> 32), (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, (int) dev->dsr->caps.fw_ver & 0xffff); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 7871379342f4..98e30b41e436 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -62,8 +62,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev, { struct ipoib_dev_priv *priv = ipoib_priv(netdev); - ib_get_device_fw_str(priv->ca, drvinfo->fw_version, - sizeof(drvinfo->fw_version)); + ib_get_device_fw_str(priv->ca, drvinfo->fw_version); strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3391df5fdc9c..e0e87a1f66fb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -64,6 +64,8 @@ #include #include +#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN + extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -2307,7 +2309,7 @@ struct ib_device { * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); - void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + void (*get_dev_fw_str)(struct ib_device *, char *str); }; struct ib_client { @@ -2343,7 +2345,7 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); -void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); +void ib_get_device_fw_str(struct ib_device *device, char *str); int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, -- cgit v1.2.3-59-g8ed1b From 8ccf098104dcc6fb9036d760f79016d5227a4437 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 14 Aug 2017 11:15:26 -0400 Subject: Revert "RDMA/hns: fix build regression" This reverts commit ecd840ff9b793ac60e3e6658414525535349a17b. --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index aeef2adf1a45..e493a61e14e1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,7 +33,6 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H -#include #include #define DRV_NAME "hns_roce" -- cgit v1.2.3-59-g8ed1b From 3e5f0881f17525e3b49835947a5e0cf2d681b1e2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 25 Jul 2017 17:29:06 +0300 Subject: IB/hns: Avoid compile test under non 64bit environments The hns driver uses __raw_writeq which is only defined in 64BIT environments. Trying to compile the driver in a 32BIT environment results in errors. Only COMPILE_TEST when 64BIT is defined. Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index cbe6b51394fd..61c93bbd230d 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,7 +1,7 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on (ARM64 || COMPILE_TEST) && HNS && HNS_DSAF && HNS_ENET + depends on (ARM64 || (COMPILE_TEST && 64BIT)) && HNS && HNS_DSAF && HNS_ENET ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hi1610 and more further ICT SoC. -- cgit v1.2.3-59-g8ed1b From 836daee2df01b79e9cf54b80b985aea6cff05f57 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sat, 10 Jun 2017 11:19:20 +0200 Subject: cxgb4: Remove some dead code This 'BUG_ON(!ep)' can never trigger because we have: if (!ep) return 0; just a few lines above. So it can be removed safely. Signed-off-by: Christophe JAILLET Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e49b34c3b136..ceaa2fa54d32 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2871,7 +2871,6 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - BUG_ON(!ep); /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); -- cgit v1.2.3-59-g8ed1b From 055bb279065f00205281956d2a01777d17d23c7c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 3 Jul 2017 10:23:47 +0100 Subject: IB/qib: fix spelling mistake: "failng" -> "failing" Trivial fix to spelling mistake in qib_dev_err error message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6c16ba1107ba..cce4ab468b29 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -399,7 +399,7 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { qib_dev_err(dd, - "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + "Driver only handles version %d, chip swversion is %d (%llx), failing\n", QIB_CHIP_SWVERSION, (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & -- cgit v1.2.3-59-g8ed1b From a63aa5dbed4eda4bec97556f4f0d70b4247fc9d1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 13 Jul 2017 23:13:38 +0100 Subject: IB/hfi1: fix spelling mistake in variable name continious Trivial fix to spelling mistake, rename variable 'continious' to the correct spelling 'continuous' Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 94b54850ec75..ebbc52d668e7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7330,7 +7330,7 @@ void handle_verify_cap(struct work_struct *work) struct hfi1_devdata *dd = ppd->dd; u64 reg; u8 power_management; - u8 continious; + u8 continuous; u8 vcu; u8 vau; u8 z; @@ -7349,7 +7349,7 @@ void handle_verify_cap(struct work_struct *work) lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); - read_vc_remote_phy(dd, &power_management, &continious); + read_vc_remote_phy(dd, &power_management, &continuous); read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); @@ -7363,7 +7363,7 @@ void handle_verify_cap(struct work_struct *work) get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, "Peer PHY: power management 0x%x, continuous updates 0x%x\n", - (int)power_management, (int)continious); + (int)power_management, (int)continuous); dd_dev_info(dd, "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n", (int)vau, (int)z, (int)vcu, (int)vl15buf, -- cgit v1.2.3-59-g8ed1b From 24bb4d86887fd7730737b790bdb1b4f415b4b241 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 14 Jul 2017 08:30:10 +0100 Subject: RDMA/bnxt_re: fix spelling mistake: "Deallocte" -> "Deallocate" Trivial fix to spelling mistake in dev_err error message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f0e01b3ac711..36a88e32b4f2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3410,7 +3410,7 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) &rdev->qplib_res.dpi_tbl, &uctx->dpi); if (rc) - dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!"); + dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!"); /* Don't fail, continue*/ uctx->dpi.dbr = NULL; } -- cgit v1.2.3-59-g8ed1b From 92d50fc1602ecef44babe411c475344e55e1cdd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 15:01:06 +0200 Subject: PCI/IB: add support for pci driver attribute groups Some drivers (specifically the nes IB driver), want to create a lot of sysfs driver attributes. Instead of open-coding the creation and removal of these files (and getting it wrong btw), it's a better idea to let the driver core handle all of this logic for us. So add a new field to the pci driver structure, **groups, that allows pci drivers to specify an attribute group list it wishes to have created when it is registered with the driver core. Big bonus is now the driver doesn't race with userspace when the sysfs files are created vs. when the kobject is announced, so any script/tool that actually wanted to use these files will not have to poll waiting for them to show up. Cc: Faisal Latif Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 67 ++++++++++++++--------------------------- drivers/pci/pci-driver.c | 1 + include/linux/pci.h | 1 + 3 files changed, 25 insertions(+), 44 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a30aa6527f7e..84162e9ae5c0 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -808,13 +808,6 @@ static void nes_remove(struct pci_dev *pcidev) } -static struct pci_driver nes_pci_driver = { - .name = DRV_NAME, - .id_table = nes_pci_table, - .probe = nes_probe, - .remove = nes_remove, -}; - static ssize_t adapter_show(struct device_driver *ddp, char *buf) { unsigned int devfn = 0xffffffff; @@ -1156,35 +1149,29 @@ static DRIVER_ATTR_RW(idx_addr); static DRIVER_ATTR_RW(idx_data); static DRIVER_ATTR_RW(wqm_quanta); -static int nes_create_driver_sysfs(struct pci_driver *drv) -{ - int error; - error = driver_create_file(&drv->driver, &driver_attr_adapter); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data); - error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_flash_data); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); - error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_idx_data); - error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); - return error; -} +static struct attribute *nes_attrs[] = { + &driver_attr_adapter.attr, + &driver_attr_eeprom_cmd.attr, + &driver_attr_eeprom_data.attr, + &driver_attr_flash_cmd.attr, + &driver_attr_flash_data.attr, + &driver_attr_nonidx_addr.attr, + &driver_attr_nonidx_data.attr, + &driver_attr_idx_addr.attr, + &driver_attr_idx_data.attr, + &driver_attr_wqm_quanta.attr, + NULL, +}; +ATTRIBUTE_GROUPS(nes); + +static struct pci_driver nes_pci_driver = { + .name = DRV_NAME, + .id_table = nes_pci_table, + .probe = nes_probe, + .remove = nes_remove, + .groups = nes_groups, +}; -static void nes_remove_driver_sysfs(struct pci_driver *drv) -{ - driver_remove_file(&drv->driver, &driver_attr_adapter); - driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd); - driver_remove_file(&drv->driver, &driver_attr_eeprom_data); - driver_remove_file(&drv->driver, &driver_attr_flash_cmd); - driver_remove_file(&drv->driver, &driver_attr_flash_data); - driver_remove_file(&drv->driver, &driver_attr_nonidx_addr); - driver_remove_file(&drv->driver, &driver_attr_nonidx_data); - driver_remove_file(&drv->driver, &driver_attr_idx_addr); - driver_remove_file(&drv->driver, &driver_attr_idx_data); - driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); -} /** * nes_init_module - module initialization entry point @@ -1192,20 +1179,13 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) static int __init nes_init_module(void) { int retval; - int retval1; retval = nes_cm_start(); if (retval) { printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); return retval; } - retval = pci_register_driver(&nes_pci_driver); - if (retval >= 0) { - retval1 = nes_create_driver_sysfs(&nes_pci_driver); - if (retval1 < 0) - printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n"); - } - return retval; + return pci_register_driver(&nes_pci_driver); } @@ -1215,7 +1195,6 @@ static int __init nes_init_module(void) static void __exit nes_exit_module(void) { nes_cm_stop(); - nes_remove_driver_sysfs(&nes_pci_driver); pci_unregister_driver(&nes_pci_driver); } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d51e8738f9c2..4450feaf5c00 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1307,6 +1307,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.mod_name = mod_name; + drv->driver.groups = drv->groups; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..c421af60f2df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -729,6 +729,7 @@ struct pci_driver { void (*shutdown) (struct pci_dev *dev); int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ const struct pci_error_handlers *err_handler; + const struct attribute_group **groups; struct device_driver driver; struct pci_dynids dynids; }; -- cgit v1.2.3-59-g8ed1b From e0604df111e57773f45c50d143fedb0374bfe916 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:44 +0530 Subject: infiniband: mthca: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 13067 805 4 13876 3634 infiniband/hw/mthca/mthca_main.o File size After adding 'const': text data bss dec hex filename 13419 453 4 13876 3634 infiniband/hw/mthca/mthca_main.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index c309e5c96383..5695cda52a07 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1162,7 +1162,7 @@ static void mthca_remove_one(struct pci_dev *pdev) mutex_unlock(&mthca_device_mutex); } -static struct pci_device_id mthca_pci_table[] = { +static const struct pci_device_id mthca_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR), -- cgit v1.2.3-59-g8ed1b From edb566913b9591910a0d1dafd53e5d4de3212b55 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:45 +0530 Subject: infiniband: nes: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10429 780 33 11242 2bea drivers/infiniband/hw/nes/nes.o File size After adding 'const': text data bss dec hex filename 10541 668 33 11242 2bea drivers/infiniband/hw/nes/nes.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 84162e9ae5c0..b00eac1b899f 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -102,7 +102,7 @@ static unsigned int ee_flsh_adapter; static unsigned int sysfs_nonidx_addr; static unsigned int sysfs_idx_addr; -static struct pci_device_id nes_pci_table[] = { +static const struct pci_device_id nes_pci_table[] = { { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, {0} -- cgit v1.2.3-59-g8ed1b From 7806def081b2185a488fb31bd1deb4f5a3fe4765 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:46 +0530 Subject: infiniband: pvrdma: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10774 1872 8 12654 316e infiniband/hw/vmw_pvrdma/pvrdma_main.o File size After adding 'const': text data bss dec hex filename 10838 1808 8 12654 316e infiniband/hw/vmw_pvrdma/pvrdma_main.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 34ebc7615411..2b8353070925 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1078,7 +1078,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } -static struct pci_device_id pvrdma_pci_table[] = { +static const struct pci_device_id pvrdma_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, { 0 }, }; -- cgit v1.2.3-59-g8ed1b From 02654b5ae1a45c0e31060816231086685cfcd841 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 16 Jul 2017 13:09:23 +0200 Subject: i40iw: Simplify code Axe a few lines of code and re-use existing error handling path to avoid code duplication. Signed-off-by: Christophe JAILLET Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_pble.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index c87ba1617087..540aab5e502d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -269,10 +269,8 @@ static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev, status = i40iw_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, I40IW_SD_TYPE_PAGED, I40IW_HMC_DIRECT_BP_SIZE); - if (status) { - i40iw_free_vmalloc_mem(dev->hw, chunk); - return status; - } + if (status) + goto error; if (!dev->is_pf) { status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE, fpm_to_idx(pble_rsrc, @@ -280,8 +278,7 @@ static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev, (info->pages << PBLE_512_SHIFT)); if (status) { i40iw_pr_err("allocate PBLEs in the PF. Error %i\n", status); - i40iw_free_vmalloc_mem(dev->hw, chunk); - return status; + goto error; } } addr = chunk->vaddr; -- cgit v1.2.3-59-g8ed1b From 83fb1c89e7ee5bb16397b294ccfbd65a9a22e402 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 19 Jul 2017 13:55:26 -0500 Subject: i40iw: Fixes for static checker warnings Remove NULL check for cm_node->listener in i40iw_accept as listener is always present at this point. Remove the check for cm_node->accept_pend and related code in i40iw_cm_event_connected as the cm_node in this context is only pertinent to active node and cm_node->accept_pend is always 0. This fixes the following smatch warnings, drivers/infiniband/hw/i40iw/i40iw_cm.c:3691 i40iw_accept() error: we previously assumed 'cm_node->listener' could be null drivers/infiniband/hw/i40iw/i40iw_cm.c:4061 i40iw_cm_event_connected() error: we previously assumed 'cm_node->listener' could be null Reported-by: Dan Carpenter Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5a2fa743676c..a2b135039e5a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3687,8 +3687,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->accelerated = 1; if (cm_node->accept_pend) { - if (!cm_node->listener) - i40iw_pr_err("cm_node->listener NULL for passive node\n"); atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } @@ -4056,12 +4054,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); cm_node->accelerated = 1; - if (cm_node->accept_pend) { - if (!cm_node->listener) - i40iw_pr_err("listener is null for passive node\n"); - atomic_dec(&cm_node->listener->pend_accepts_cnt); - cm_node->accept_pend = 0; - } + return; error: -- cgit v1.2.3-59-g8ed1b From 5a5a3d0cfe6b3c99585d7763cd966ec1654bc4e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Jul 2017 23:19:33 +0100 Subject: i40iw: fix spelling mistake: "allloc_buf" -> "alloc_buf" Trivial fix to spelling mistake in i40iw_debug message and also split up a couple of lines that are too long and cause checkpatch warnings Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 71050c5d29a0..40c3137a7325 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -949,14 +949,16 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, ret = i40iw_puda_qp_create(rsrc); } if (ret) { - i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__); + i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", + __func__); goto error; } rsrc->completion = PUDA_QP_CREATED; ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size); if (ret) { - i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__); + i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error alloc_buf\n", + __func__); goto error; } -- cgit v1.2.3-59-g8ed1b From 238f43a79799c76a266e89eb123376d36f24c3e5 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 2 Aug 2017 15:31:29 +0530 Subject: IB/qib: add const to bin_attribute structures Add const to bin_attribute structures as they are only passed to the functions sysfs_{remove/create}_bin_file. The arguments passed are of type const, so declare the structures to be const. Done using Coccinelle. @m disable optional_qualifier@ identifier s; position p; @@ static struct bin_attribute s@p={...}; @okay1@ position p; identifier m.s; @@ ( sysfs_create_bin_file(...,&s@p,...) | sysfs_remove_bin_file(...,&s@p,...) ) @bad@ position p!={m.p,okay1.p}; identifier m.s; @@ s@p @change depends on !bad disable optional_qualifier@ identifier m.s; @@ static +const struct bin_attribute s={...}; Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index fe4cf5e4acec..ca2638d8f35e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -247,7 +247,7 @@ static struct kobj_type qib_port_cc_ktype = { .release = qib_port_release, }; -static struct bin_attribute cc_table_bin_attr = { +static const struct bin_attribute cc_table_bin_attr = { .attr = {.name = "cc_table_bin", .mode = 0444}, .read = read_cc_table_bin, .size = PAGE_SIZE, @@ -286,7 +286,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute cc_setting_bin_attr = { +static const struct bin_attribute cc_setting_bin_attr = { .attr = {.name = "cc_settings_bin", .mode = 0444}, .read = read_cc_setting_bin, .size = PAGE_SIZE, -- cgit v1.2.3-59-g8ed1b From 9d6fd7aca149e2c95915447f76b125dc2123db45 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 2 Aug 2017 15:31:30 +0530 Subject: IB/hfi1: add const to bin_attribute structures Add const to bin_attribute structures as they are only passed to the functions sysfs_{remove/create}_bin_file. The arguments passed are of type const, so declare the structures to be const. Done using Coccinelle. @m disable optional_qualifier@ identifier s; position p; @@ static struct bin_attribute s@p={...}; @okay1@ position p; identifier m.s; @@ ( sysfs_create_bin_file(...,&s@p,...) | sysfs_remove_bin_file(...,&s@p,...) ) @bad@ position p!={m.p,okay1.p}; identifier m.s; @@ s@p @change depends on !bad disable optional_qualifier@ identifier m.s; @@ static +const struct bin_attribute s={...}; Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 2f3bbcac1e34..6d2702ef34ac 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -95,7 +95,7 @@ static void port_release(struct kobject *kobj) /* nothing to do since memory is freed by hfi1_free_devdata() */ } -static struct bin_attribute cc_table_bin_attr = { +static const struct bin_attribute cc_table_bin_attr = { .attr = {.name = "cc_table_bin", .mode = 0444}, .read = read_cc_table_bin, .size = PAGE_SIZE, @@ -137,7 +137,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute cc_setting_bin_attr = { +static const struct bin_attribute cc_setting_bin_attr = { .attr = {.name = "cc_settings_bin", .mode = 0444}, .read = read_cc_setting_bin, .size = PAGE_SIZE, -- cgit v1.2.3-59-g8ed1b From 62ede7779904bc75bdd84f1ff0016113956ce3b4 Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Mon, 14 Aug 2017 14:17:43 -0400 Subject: Add OPA extended LID support This patch series primarily increases sizes of variables that hold lid values from 16 to 32 bits. Additionally, it adds a check in the IB mad stack to verify a properly formatted MAD when OPA extended LIDs are used. Signed-off-by: Don Hiatt Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 11 ++++++----- drivers/infiniband/hw/hfi1/mad.c | 2 +- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 8 ++++---- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_mad.c | 4 ++-- drivers/infiniband/sw/rdmavt/cq.c | 2 +- include/rdma/ib_verbs.h | 26 ++++++++++++++++++++------ 11 files changed, 41 insertions(+), 26 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 7a389697e2ec..fa3b0a428195 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1770,7 +1770,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = ib_slid_be16(wc->slid); + req_msg->primary_local_lid = ib_lid_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } @@ -1780,7 +1780,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = ib_slid_be16(wc->slid); + req_msg->alt_local_lid = ib_lid_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index ff3c67a7aaad..c1696e6084b2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 39a0f1dc84e4..a21881e22bad 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,12 +275,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { - resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); + resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); } else { - resp.lid = (u16)attr.lid; - resp.sm_lid = (u16)attr.sm_lid; + resp.lid = ib_lid_cpu16(attr.lid); + resp.sm_lid = ib_lid_cpu16(attr.sm_lid); } resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; @@ -1206,9 +1207,9 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; if (rdma_cap_opa_ah(ib_dev, wc->port_num)) - tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); + tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); else - tmp.slid = ib_slid_cpu16(wc->slid); + tmp.slid = ib_lid_cpu16(wc->slid); tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 8daa3a5f7e95..11be4d19e607 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4216,7 +4216,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = ib_slid_cpu16(in_wc->slid); + u16 slid = ib_lid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 5a897b0106a9..0e4f60cfc59d 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev, memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - guid_info_rec.lid = cpu_to_be16((u16)attr.lid); + guid_info_rec.lid = ib_lid_be16(attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 04fb44e7699e..0793a21d76f4 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, op_modifier |= 0x4; - in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; + in_modifier |= ib_lid_cpu16(in_wc->slid) << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, @@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); } else { tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid); + tun_mad->hdr.slid_mac_47_32 = ib_lid_be16(wc->slid); } ib_dma_sync_single_for_device(&dev->ib_dev, @@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); @@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = (u16)pattr.lid; + prev_lid = ib_lid_cpu16(pattr.lid); err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index cd2264ac88ae..18cfe5bf0fa3 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index e19ae0b9b439..d0f062fc2a4b 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); - MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, ib_lid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) @@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, op_modifier |= 0x4; - in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; + in_modifier |= ib_lid_cpu16(in_wc->slid) << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index a9caadab22cf..093f7755c843 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev, u16 *out_mad_pkey_index) { int err; - u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; const struct ib_mad *in_mad = (const struct ib_mad *)in; @@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = (u16)pattr.lid; + prev_lid = ib_lid_cpu16(pattr.lid); err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0335a3df74d5..97d71e49c092 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = ib_slid_cpu16(entry->slid); + wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4db4ad56ace6..70a183179224 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3724,16 +3724,30 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_IB; } -/* Return slid in 16bit CPU encoding */ -static inline u16 ib_slid_cpu16(u32 slid) +/** + * ib_lid_cpu16 - Return lid in 16bit CPU encoding. + * In the current implementation the only way to get + * get the 32bit lid is from other sources for OPA. + * For IB, lids will always be 16bits so cast the + * value accordingly. + * + * @lid: A 32bit LID + */ +static inline u16 ib_lid_cpu16(u32 lid) { - return (u16)slid; + WARN_ON_ONCE(lid & 0xFFFF0000); + return (u16)lid; } -/* Return slid in 16bit BE encoding */ -static inline u16 ib_slid_be16(u32 slid) +/** + * ib_lid_be16 - Return lid in 16bit BE encoding. + * + * @lid: A 32bit LID + */ +static inline __be16 ib_lid_be16(u32 lid) { - return cpu_to_be16((u16)slid); + WARN_ON_ONCE(lid & 0xFFFF0000); + return cpu_to_be16((u16)lid); } /** -- cgit v1.2.3-59-g8ed1b From 6a5df91baf2528e584bf4493c30bbafe2db74c9e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 2 Aug 2017 01:46:18 -0700 Subject: RDMA/bnxt_re: Allocate multiple notification queues Enables multiple Interrupt vectors. Driver is requesting the max MSIX vectors based on the number of online cpus and creates upto 9 MSIx vectors (1 for control path and 8 for data path). A tasklet is created for each of these vectors. NQs are assigned to CQs in round robin fashion. This patch also adds IRQ affinity hint for the MSIX vector of each NQ. Signed-off-by: Ray Jui Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 5 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 17 +++-- drivers/infiniband/hw/bnxt_re/main.c | 106 +++++++++++++++++++------------ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 4 +- 5 files changed, 104 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 85527532c49d..b3ad37fec578 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -85,7 +85,7 @@ struct bnxt_re_sqp_entries { }; #define BNXT_RE_MIN_MSIX 2 -#define BNXT_RE_MAX_MSIX 16 +#define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 @@ -116,7 +116,7 @@ struct bnxt_re_dev { struct bnxt_qplib_rcfw rcfw; /* NQ */ - struct bnxt_qplib_nq nq; + struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; /* Device Resources */ struct bnxt_qplib_dev_attr dev_attr; @@ -140,6 +140,7 @@ struct bnxt_re_dev { struct bnxt_re_qp *qp1_sqp; struct bnxt_re_ah *sqp_ah; struct bnxt_re_sqp_entries sqp_tbl[1024]; + atomic_t nq_alloc_cnt; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 58461ee3773d..24e2785774b8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2290,6 +2290,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq) struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); struct bnxt_re_dev *rdev = cq->rdev; int rc; + struct bnxt_qplib_nq *nq = cq->qplib_cq.nq; rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2304,7 +2305,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq) kfree(cq); } atomic_dec(&rdev->cq_count); - rdev->nq.budget--; + nq->budget--; return 0; } @@ -2318,6 +2319,8 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, struct bnxt_re_cq *cq = NULL; int rc, entries; int cqe = attr->cqe; + struct bnxt_qplib_nq *nq = NULL; + unsigned int nq_alloc_cnt; /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { @@ -2369,9 +2372,15 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, cq->qplib_cq.sghead = NULL; cq->qplib_cq.nmap = 0; } + /* + * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a + * used for getting the NQ index. + */ + nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt); + nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)]; cq->qplib_cq.max_wqe = entries; - cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id; - cq->qplib_cq.nq = &rdev->nq; + cq->qplib_cq.cnq_hw_ring_id = nq->ring_id; + cq->qplib_cq.nq = nq; rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2381,7 +2390,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, cq->ib_cq.cqe = entries; cq->cq_period = cq->qplib_cq.period; - rdev->nq.budget++; + nq->budget++; atomic_inc(&rdev->cq_count); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2f71123e0a9f..5b78d8fc28dc 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -161,7 +161,7 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) { - int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got; + int rc = 0, num_msix_want = BNXT_RE_MAX_MSIX, num_msix_got; struct bnxt_en_dev *en_dev; if (!rdev) @@ -169,6 +169,8 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) en_dev = rdev->en_dev; + num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus()); + rtnl_lock(); num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, rdev->msix_entries, @@ -651,8 +653,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { - if (rdev->nq.hwq.max_elements) - bnxt_qplib_disable_nq(&rdev->nq); + int i; + + if (rdev->nq[0].hwq.max_elements) { + for (i = 1; i < rdev->num_msix; i++) + bnxt_qplib_disable_nq(&rdev->nq[i - 1]); + } if (rdev->qplib_res.rcfw) bnxt_qplib_cleanup_res(&rdev->qplib_res); @@ -660,31 +666,41 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) static int bnxt_re_init_res(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc = 0, i; bnxt_qplib_init_res(&rdev->qplib_res); - if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0) - return -EINVAL; + for (i = 1; i < rdev->num_msix ; i++) { + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], + i - 1, rdev->msix_entries[i].vector, + rdev->msix_entries[i].db_offset, + &bnxt_re_cqn_handler, NULL); - rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq, - rdev->msix_entries[BNXT_RE_NQ_IDX].vector, - rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset, - &bnxt_re_cqn_handler, - NULL); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to enable NQ with rc = 0x%x", rc); + goto fail; + } + } + return 0; +fail: + return rc; +} - if (rc) - dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc); +static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait) +{ + int i; - return rc; + for (i = 0; i < rdev->num_msix - 1; i++) { + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait); + bnxt_qplib_free_nq(&rdev->nq[i]); + } } static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) { - if (rdev->nq.hwq.max_elements) { - bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait); - bnxt_qplib_free_nq(&rdev->nq); - } + bnxt_re_free_nq_res(rdev, lock_wait); + if (rdev->qplib_res.dpi_tbl.max) { bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &rdev->qplib_res.dpi_tbl, @@ -698,7 +714,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc = 0, i; /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; @@ -715,30 +731,42 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) &rdev->dpi_privileged, rdev); if (rc) - goto fail; + goto dealloc_res; - rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + - BNXT_RE_MAX_SRQC_COUNT + 2; - rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate NQ memory: %#x", rc); - goto fail; - } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr, - rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count, - HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1, - rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx, - &rdev->nq.ring_id); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate NQ ring: %#x", rc); - goto free_nq; + for (i = 0; i < rdev->num_msix - 1; i++) { + rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + + BNXT_RE_MAX_SRQC_COUNT + 2; + rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]); + if (rc) { + dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", + i, rc); + goto dealloc_dpi; + } + rc = bnxt_re_net_ring_alloc + (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, + rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count, + HWRM_RING_ALLOC_CMPL, + BNXT_QPLIB_NQE_MAX_CNT - 1, + rdev->msix_entries[i + 1].ring_idx, + &rdev->nq[i].ring_id); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to allocate NQ fw id with rc = 0x%x", + rc); + goto free_nq; + } } return 0; free_nq: - bnxt_qplib_free_nq(&rdev->nq); + for (i = 0; i < rdev->num_msix - 1; i++) + bnxt_qplib_free_nq(&rdev->nq[i]); +dealloc_dpi: + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, + &rdev->qplib_res.dpi_tbl, + &rdev->dpi_privileged); +dealloc_res: + bnxt_qplib_free_res(&rdev->qplib_res); + fail: rdev->qplib_res.rcfw = NULL; return rc; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 31e15f31bba5..e8afc47f8949 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -365,6 +365,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) tasklet_kill(&nq->worker); if (nq->requested) { + irq_set_affinity_hint(nq->vector, NULL); free_irq(nq->vector, nq); nq->requested = false; } @@ -378,7 +379,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) } int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, - int msix_vector, int bar_reg_offset, + int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *), int (*srqn_handler)(struct bnxt_qplib_nq *nq, @@ -402,13 +403,25 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, goto fail; nq->requested = false; - rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq); + memset(nq->name, 0, 32); + sprintf(nq->name, "bnxt_qplib_nq-%d", nq_idx); + rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq); if (rc) { dev_err(&nq->pdev->dev, "Failed to request IRQ for NQ: %#x", rc); bnxt_qplib_disable_nq(nq); goto fail; } + + cpumask_clear(&nq->mask); + cpumask_set_cpu(nq_idx, &nq->mask); + rc = irq_set_affinity_hint(nq->vector, &nq->mask); + if (rc) { + dev_warn(&nq->pdev->dev, + "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", + nq->vector, nq_idx); + } + nq->requested = true; nq->bar_reg = NQ_CONS_PCI_BAR_REGION; nq->bar_reg_off = bar_reg_offset; @@ -432,8 +445,10 @@ fail: void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) { - if (nq->hwq.max_elements) + if (nq->hwq.max_elements) { bnxt_qplib_free_hwq(nq->pdev, &nq->hwq); + nq->hwq.max_elements = 0; + } } int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 23a26d5cbe18..8ead70ca1c1d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -407,6 +407,7 @@ struct bnxt_qplib_nq { struct pci_dev *pdev; int vector; + cpumask_t mask; int budget; bool requested; struct tasklet_struct worker; @@ -425,6 +426,7 @@ struct bnxt_qplib_nq { void *srq, u8 event); struct workqueue_struct *cqn_wq; + char name[32]; }; struct bnxt_qplib_nq_work { @@ -435,7 +437,7 @@ struct bnxt_qplib_nq_work { void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, - int msix_vector, int bar_reg_offset, + int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq), int (*srqn_handler)(struct bnxt_qplib_nq *nq, -- cgit v1.2.3-59-g8ed1b From 225937d6ccff3ba49b7065672ce35f21465aaeb9 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 2 Aug 2017 01:46:19 -0700 Subject: RDMA/bnxt_re: Implement the alloc/get_hw_stats callback Expose HW counters using the get_hw_stats callback Signed-off-by: Somnath Kotur Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/Makefile | 2 +- drivers/infiniband/hw/bnxt_re/hw_counters.c | 114 ++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/hw_counters.h | 62 +++++++++++++++ drivers/infiniband/hw/bnxt_re/main.c | 4 + 4 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/bnxt_re/hw_counters.c create mode 100644 drivers/infiniband/hw/bnxt_re/hw_counters.h (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile index 036f84efbc73..afbaa0e20670 100644 --- a/drivers/infiniband/hw/bnxt_re/Makefile +++ b/drivers/infiniband/hw/bnxt_re/Makefile @@ -3,4 +3,4 @@ ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o bnxt_re-y := main.o ib_verbs.o \ qplib_res.o qplib_rcfw.o \ - qplib_sp.o qplib_fp.o + qplib_sp.o qplib_fp.o hw_counters.o diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c new file mode 100644 index 000000000000..7b28219eba46 --- /dev/null +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -0,0 +1,114 @@ +/* + * Broadcom NetXtreme-E RoCE driver. + * + * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Statistics + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "bnxt_ulp.h" +#include "roce_hsi.h" +#include "qplib_res.h" +#include "qplib_sp.h" +#include "qplib_fp.h" +#include "qplib_rcfw.h" +#include "bnxt_re.h" +#include "hw_counters.h" + +static const char * const bnxt_re_stat_name[] = { + [BNXT_RE_ACTIVE_QP] = "active_qps", + [BNXT_RE_ACTIVE_SRQ] = "active_srqs", + [BNXT_RE_ACTIVE_CQ] = "active_cqs", + [BNXT_RE_ACTIVE_MR] = "active_mrs", + [BNXT_RE_ACTIVE_MW] = "active_mws", + [BNXT_RE_RX_PKTS] = "rx_pkts", + [BNXT_RE_RX_BYTES] = "rx_bytes", + [BNXT_RE_TX_PKTS] = "tx_pkts", + [BNXT_RE_TX_BYTES] = "tx_bytes", + [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors" +}; + +int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; + + if (!port || !stats) + return -EINVAL; + + stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count); + stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count); + stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); + stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); + stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); + if (bnxt_re_stats) { + stats->value[BNXT_RE_RECOVERABLE_ERRORS] = + le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); + stats->value[BNXT_RE_RX_PKTS] = + le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); + stats->value[BNXT_RE_RX_BYTES] = + le64_to_cpu(bnxt_re_stats->rx_ucast_bytes); + stats->value[BNXT_RE_TX_PKTS] = + le64_to_cpu(bnxt_re_stats->tx_ucast_pkts); + stats->value[BNXT_RE_TX_BYTES] = + le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); + } + return ARRAY_SIZE(bnxt_re_stat_name); +} + +struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS); + /* We support only per port stats */ + if (!port_num) + return NULL; + + return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, + ARRAY_SIZE(bnxt_re_stat_name), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h new file mode 100644 index 000000000000..be0dc0093b58 --- /dev/null +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -0,0 +1,62 @@ +/* + * Broadcom NetXtreme-E RoCE driver. + * + * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Statistics (header) + * + */ + +#ifndef __BNXT_RE_HW_STATS_H__ +#define __BNXT_RE_HW_STATS_H__ + +enum bnxt_re_hw_stats { + BNXT_RE_ACTIVE_QP, + BNXT_RE_ACTIVE_SRQ, + BNXT_RE_ACTIVE_CQ, + BNXT_RE_ACTIVE_MR, + BNXT_RE_ACTIVE_MW, + BNXT_RE_RX_PKTS, + BNXT_RE_RX_BYTES, + BNXT_RE_TX_PKTS, + BNXT_RE_TX_BYTES, + BNXT_RE_RECOVERABLE_ERRORS, + BNXT_RE_NUM_COUNTERS +}; + +struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num); +int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index); +#endif /* __BNXT_RE_HW_STATS_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 5b78d8fc28dc..82d1cbc27aee 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -64,6 +64,8 @@ #include "ib_verbs.h" #include #include "bnxt.h" +#include "hw_counters.h" + static char version[] = BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n"; @@ -513,6 +515,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; ibdev->mmap = bnxt_re_mmap; + ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; + ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; return ib_register_device(ibdev, NULL); } -- cgit v1.2.3-59-g8ed1b From 11880a5512ac51b9326507df9acc42a8bd13e2b7 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 22 Aug 2017 13:46:56 +0200 Subject: IB/mthca: Replace PCI pool old API The PCI pool API is deprecated. This commit replaces the PCI pool old API by the appropriate function with the DMA pool API. Signed-off-by: Romain Perier Acked-by: Peter Senna Tschudin Tested-by: Peter Senna Tschudin Acked-by: Doug Ledford Tested-by: Doug Ledford Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_av.c | 10 +++++----- drivers/infiniband/hw/mthca/mthca_cmd.c | 8 ++++---- drivers/infiniband/hw/mthca/mthca_dev.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 2aec9908c40a..e7f6223e9c60 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -186,7 +186,7 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { - ah->av = pci_pool_zalloc(dev->av_table.pool, + ah->av = dma_pool_zalloc(dev->av_table.pool, GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -250,7 +250,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) break; case MTHCA_AH_PCI_POOL: - pci_pool_free(dev->av_table.pool, ah->av, ah->avdma); + dma_pool_free(dev->av_table.pool, ah->av, ah->avdma); break; case MTHCA_AH_KMALLOC: @@ -340,7 +340,7 @@ int mthca_init_av_table(struct mthca_dev *dev) if (err) return err; - dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev, + dev->av_table.pool = dma_pool_create("mthca_av", &dev->pdev->dev, MTHCA_AV_SIZE, MTHCA_AV_SIZE, 0); if (!dev->av_table.pool) @@ -360,7 +360,7 @@ int mthca_init_av_table(struct mthca_dev *dev) return 0; out_free_pool: - pci_pool_destroy(dev->av_table.pool); + dma_pool_destroy(dev->av_table.pool); out_free_alloc: mthca_alloc_cleanup(&dev->av_table.alloc); @@ -374,6 +374,6 @@ void mthca_cleanup_av_table(struct mthca_dev *dev) if (dev->av_table.av_map) iounmap(dev->av_table.av_map); - pci_pool_destroy(dev->av_table.pool); + dma_pool_destroy(dev->av_table.pool); mthca_alloc_cleanup(&dev->av_table.alloc); } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index d0f062fc2a4b..a1900d2a371b 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -538,7 +538,7 @@ int mthca_cmd_init(struct mthca_dev *dev) return -ENOMEM; } - dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + dev->cmd.pool = dma_pool_create("mthca_cmd", &dev->pdev->dev, MTHCA_MAILBOX_SIZE, MTHCA_MAILBOX_SIZE, 0); if (!dev->cmd.pool) { @@ -551,7 +551,7 @@ int mthca_cmd_init(struct mthca_dev *dev) void mthca_cmd_cleanup(struct mthca_dev *dev) { - pci_pool_destroy(dev->cmd.pool); + dma_pool_destroy(dev->cmd.pool); iounmap(dev->hcr); if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS) iounmap(dev->cmd.dbell_map); @@ -621,7 +621,7 @@ struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + mailbox->buf = dma_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); @@ -635,7 +635,7 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) if (!mailbox) return; - pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ec7da9a474cd..5508afbf1c67 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -118,7 +118,7 @@ enum { }; struct mthca_cmd { - struct pci_pool *pool; + struct dma_pool *pool; struct mutex hcr_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -263,7 +263,7 @@ struct mthca_qp_table { }; struct mthca_av_table { - struct pci_pool *pool; + struct dma_pool *pool; int num_ddr_avs; u64 ddr_av_base; void __iomem *av_map; -- cgit v1.2.3-59-g8ed1b From e093111ddb6c786e32b882108c1c08ef83d781f4 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Tue, 27 Jun 2017 17:04:42 +0300 Subject: IB/core: Fix input len in multiple user verbs Most user verbs pass user data to the kernel with the inclusion of the ib_uverbs_cmd_hdr structure. This is problematic because the vendor has no ideas if the verb was called by a legacy verb or an extended verb. Also, the incosistency between the verbs is confusing. Fixes: 565197dd8fb1 ("IB/core: Extend ib_uverbs_create_cq") Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 70 ++++++++++++++++------------ drivers/infiniband/hw/mlx5/cq.c | 6 +-- drivers/infiniband/hw/mlx5/main.c | 11 ++--- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- 4 files changed, 46 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a21881e22bad..7ea5a3bb5a04 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -91,9 +91,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err; } - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); if (ret) @@ -319,9 +320,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); uobj = uobj_alloc(uobj_get_type(pd), file->ucontext); if (IS_ERR(uobj)) @@ -488,9 +490,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); mutex_lock(&file->device->xrcd_tree_mutex); @@ -652,9 +655,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; @@ -746,7 +750,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long) cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; @@ -1086,7 +1091,8 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1167,9 +1173,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) @@ -1742,9 +1749,10 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), file->ucontext); @@ -2055,7 +2063,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, return -EOPNOTSUPP; INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, - in_len - sizeof(cmd.base), out_len); + in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr), + out_len); ret = modify_qp(file, &cmd, &udata); if (ret) @@ -2561,7 +2570,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); uobj = uobj_alloc(uobj_get_type(ah), file->ucontext); if (IS_ERR(uobj)) @@ -3625,10 +3635,10 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) @@ -3652,10 +3662,10 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a384d72ea3cd..c155df465c44 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -751,10 +751,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, void *cqc; int err; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = udata->inlen < sizeof(ucmd) ? + (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cde7d1ce4a3c..e2fffdc43c86 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1273,7 +1273,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_bfreg_info *bfregi; int ver; int err; - size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); bool lib_uar_4k; @@ -1281,18 +1280,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!dev->ib_active) return ERR_PTR(-EAGAIN); - if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) - return ERR_PTR(-EINVAL); - - reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen >= min_req_v2) + else if (udata->inlen >= min_req_v2) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index eae9bffd45d4..6fee7795d1c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -914,7 +914,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err = 0; int write_mtt_size; - if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { + if (udata->inlen < sizeof ucmd) { if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", current->comm); -- cgit v1.2.3-59-g8ed1b From ecdb19f4b513033e6f2c4326cd5b81e04393e5e1 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Fri, 4 Aug 2017 13:52:13 -0700 Subject: IB/hfi1: Revert egress pkey check enforcement Current code has some serious flaws. Disarm the flag pending an appropriate patch. Fixes: 53526500f301 ("IB/hfi1: Permanently enable P_Key checking in HFI") Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index be027c9c880e..da7cd5bd27ca 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -519,7 +519,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; if (loopback) { hfi1_early_err(&pdev->dev, -- cgit v1.2.3-59-g8ed1b From 02a222c7f6c8d17b5fb6803ca453fcd9d5a3853d Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Fri, 4 Aug 2017 13:52:26 -0700 Subject: IB/hfi1: Remove lstate from hfi1_pportdata Do not track logical state separately from host_link_state. Deduce logical state from host_link_state when required. Transitions in set_link_state and goto_offline already make sure host_link_state reflects hardware's logical state properly. Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 52 +++++++++++++++++++-------------------- drivers/infiniband/hw/hfi1/chip.h | 3 +-- drivers/infiniband/hw/hfi1/hfi.h | 17 ------------- drivers/infiniband/hw/hfi1/mad.c | 2 +- 4 files changed, 28 insertions(+), 46 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index fdeec8abb6f1..2320b8fe9dbc 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1065,6 +1065,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, static int read_idle_sma(struct hfi1_devdata *dd, u64 *data); static int thermal_init(struct hfi1_devdata *dd); +static void update_statusp(struct hfi1_pportdata *ppd, u32 state); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, @@ -10261,8 +10262,10 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); - /* call again to adjust ppd->statusp, if needed */ - get_logical_state(ppd); + /* adjust ppd->statusp, if needed */ + update_statusp(ppd, IB_PORT_DOWN); + + dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n"); } /* @@ -10458,11 +10461,11 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) } /* - * driver_logical_state - convert the driver's notion of a port's + * driver_lstate - convert the driver's notion of a port's * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1 * (converted to a u32) to indicate error. */ -u32 driver_logical_state(struct hfi1_pportdata *ppd) +u32 driver_lstate(struct hfi1_pportdata *ppd) { if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; @@ -12651,20 +12654,8 @@ const char *opa_pstate_name(u32 pstate) return "unknown"; } -/* - * Read the hardware link state and set the driver's cached value of it. - * Return the (new) current value. - */ -u32 get_logical_state(struct hfi1_pportdata *ppd) +static void update_statusp(struct hfi1_pportdata *ppd, u32 state) { - u32 new_state; - - new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd)); - if (new_state != ppd->lstate) { - dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", - opa_lstate_name(new_state), new_state); - ppd->lstate = new_state; - } /* * Set port status flags in the page mapped into userspace * memory. Do it here to ensure a reliable state - this is @@ -12674,7 +12665,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd) * function. */ if (ppd->statusp) { - switch (ppd->lstate) { + switch (state) { case IB_PORT_DOWN: case IB_PORT_INIT: *ppd->statusp &= ~(HFI1_STATUS_IB_CONF | @@ -12688,10 +12679,9 @@ u32 get_logical_state(struct hfi1_pportdata *ppd) break; } } - return ppd->lstate; } -/** +/* * wait_logical_linkstate - wait for an IB link state change to occur * @ppd: port device * @state: the state to wait for @@ -12705,18 +12695,29 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs) { unsigned long timeout; + u32 new_state; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { - if (get_logical_state(ppd) == state) - return 0; - if (time_after(jiffies, timeout)) + new_state = chip_to_opa_lstate(ppd->dd, + read_logical_state(ppd->dd)); + if (new_state == state) break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for link state 0x%x\n", + state); + return -ETIMEDOUT; + } msleep(20); } - dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state); - return -ETIMEDOUT; + update_statusp(ppd, state); + dd_dev_info(ppd->dd, + "logical state changed to %s (0x%x)\n", + opa_lstate_name(state), + state); + return 0; } /* @@ -14855,7 +14856,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, * Set the initial values to reasonable default, will be set * for real when link is up. */ - ppd->lstate = IB_PORT_DOWN; ppd->overrun_threshold = 0x4; ppd->phy_error_threshold = 0xf; ppd->port_crc_mode_enabled = link_crc_mask; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6a0c691f1385..f7083025fb04 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -748,12 +748,11 @@ int is_ax(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); -u32 get_logical_state(struct hfi1_pportdata *ppd); void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); -u32 driver_logical_state(struct hfi1_pportdata *ppd); +u32 driver_lstate(struct hfi1_pportdata *ppd); int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok); int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fb5f8394fbed..e66e8f96ceab 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -591,8 +591,6 @@ struct hfi1_pportdata { struct mutex hls_lock; u32 host_link_state; - u32 lstate; /* logical link state */ - /* these are the "32 bit" regs */ u32 ibmtu; /* The MTU programmed for this unit */ @@ -1296,21 +1294,6 @@ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) int hfi1_reset_device(int); -/* return the driver's idea of the logical OPA port state */ -static inline u32 driver_lstate(struct hfi1_pportdata *ppd) -{ - /* - * The driver does some processing from the time the logical - * link state is at INIT to the time the SM can be notified - * as such. Return IB_PORT_DOWN until the software state - * is ready. - */ - if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP)) - return IB_PORT_DOWN; - else - return ppd->lstate; -} - /* return the driver's idea of the physical OPA port state */ static inline u32 driver_pstate(struct hfi1_pportdata *ppd) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 11be4d19e607..cd1f6f841f34 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1117,7 +1117,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, u32 logical_new, u32 physical_new) { u32 physical_old = driver_physical_state(ppd); - u32 logical_old = driver_logical_state(ppd); + u32 logical_old = driver_lstate(ppd); int ret, logical_allowed, physical_allowed; ret = logical_transition_allowed(logical_old, logical_new); -- cgit v1.2.3-59-g8ed1b From 64a296f579303322ebec9edae09cf87240b1ad78 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 4 Aug 2017 13:52:32 -0700 Subject: IB/hfi1: Use host_link_state to read state when DC is shut down When DC is shut down (by e.g. disconnecting the cable), the driver should use host_link_state to get port's current physical state. This is due to the fact that physical state is read from DC's CSRs and when DC is shut down and state is changed, its registers are not impacted. Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index e66e8f96ceab..728ed457e447 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1297,6 +1297,13 @@ int hfi1_reset_device(int); /* return the driver's idea of the physical OPA port state */ static inline u32 driver_pstate(struct hfi1_pportdata *ppd) { + /* + * When DC is shut down and state is changed, its CSRs are not + * impacted, therefore host_link_state should be used to get + * current physical state. + */ + if (ppd->dd->dc_shutdown) + return driver_physical_state(ppd); /* * The driver does some processing from the time the physical * link state is at LINKUP to the time the SM can be notified -- cgit v1.2.3-59-g8ed1b From f2a3bc00a03c2cc9caa40c8867de973fd4e48c6a Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 4 Aug 2017 13:52:38 -0700 Subject: IB/hfi1: Protect context array set/clear with spinlock The rcd array can be accessed from user context or during interrupts. Protecting this with a mutex isn't a good idea because the mutex should not be used from an IRQ. Protect the allocation and freeing of rcd array elements with a spinlock. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 10 +- drivers/infiniband/hw/hfi1/file_ops.c | 171 ++++++++++++---------------- drivers/infiniband/hw/hfi1/hfi.h | 8 +- drivers/infiniband/hw/hfi1/init.c | 199 +++++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/vnic_main.c | 22 +--- 5 files changed, 229 insertions(+), 181 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2320b8fe9dbc..2cdf09dfcb55 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15054,10 +15054,16 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; - ret = hfi1_create_ctxts(dd); + ret = hfi1_create_kctxts(dd); if (ret) goto bail_cleanup; + /* + * Initialize aspm, to be done after gen3 transition and setting up + * contexts and before enabling interrupts + */ + aspm_init(dd); + dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; /* * rcd[0] is guaranteed to be valid by this point. Also, all @@ -15076,7 +15082,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, goto bail_cleanup; } - /* use contexts created by hfi1_create_ctxts */ + /* use contexts created by hfi1_create_kctxts */ ret = set_up_interrupts(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index a0c13fa5babb..7361366d80e4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -79,8 +79,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); -static int init_subctxts(struct hfi1_ctxtdata *uctxt, - const struct hfi1_user_info *uinfo); +static void init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo); static int init_user_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); @@ -758,7 +758,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - mutex_lock(&hfi1_mutex); flush_wc(); /* drain user sdma queue */ @@ -778,6 +777,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; + mutex_lock(&hfi1_mutex); __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); fdata->uctxt = NULL; hfi1_rcd_put(uctxt); /* fdata reference */ @@ -844,6 +844,38 @@ static u64 kvirt_to_phys(void *addr) return paddr; } +static int complete_subctxt(struct hfi1_filedata *fd) +{ + int ret; + + /* + * sub-context info can only be set up after the base context + * has been completed. + */ + ret = wait_event_interruptible( + fd->uctxt->wait, + !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); + + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + + /* The only thing a sub context needs is the user_xxx stuff */ + if (!ret) { + fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); + ret = init_user_ctxt(fd, fd->uctxt); + } + + if (ret) { + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; + mutex_lock(&hfi1_mutex); + __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + mutex_unlock(&hfi1_mutex); + } + + return ret; +} + static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { int ret; @@ -854,24 +886,25 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) if (swmajor != HFI1_USER_SWMAJOR) return -ENODEV; + if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) + return -EINVAL; + swminor = uinfo->userversion & 0xffff; + /* + * Acquire the mutex to protect against multiple creations of what + * could be a shared base context. + */ mutex_lock(&hfi1_mutex); /* - * Get a sub context if necessary. + * Get a sub context if available (fd->uctxt will be set). * ret < 0 error, 0 no context, 1 sub-context found */ - ret = 0; - if (uinfo->subctxt_cnt) { - ret = find_sub_ctxt(fd, uinfo); - if (ret > 0) - fd->rec_cpu_num = - hfi1_get_proc_affinity(fd->uctxt->numa_id); - } + ret = find_sub_ctxt(fd, uinfo); /* - * Allocate a base context if context sharing is not required or we - * couldn't find a sub context. + * Allocate a base context if context sharing is not required or a + * sub context wasn't found. */ if (!ret) ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); @@ -879,31 +912,10 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) mutex_unlock(&hfi1_mutex); /* Depending on the context type, do the appropriate init */ - if (ret > 0) { - /* - * sub-context info can only be set up after the base - * context has been completed. - */ - ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( - HFI1_CTXT_BASE_UNINIT, - &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) - ret = -ENOMEM; - - /* The only thing a sub context needs is the user_xxx stuff */ - if (!ret) - ret = init_user_ctxt(fd, fd->uctxt); - - if (ret) - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - - } else if (!ret) { + switch (ret) { + case 0: ret = setup_base_ctxt(fd, uctxt); if (uctxt->subctxt_cnt) { - /* If there is an error, set the failed bit. */ - if (ret) - set_bit(HFI1_CTXT_BASE_FAILED, - &uctxt->event_flags); /* * Base context is done, notify anybody using a * sub-context that is waiting for this completion @@ -911,14 +923,12 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); wake_up(&uctxt->wait); } - if (ret) - deallocate_ctxt(uctxt); - } - - /* If an error occurred, clear the reference */ - if (ret && fd->uctxt) { - hfi1_rcd_put(fd->uctxt); - fd->uctxt = NULL; + break; + case 1: + ret = complete_subctxt(fd); + break; + default: + break; } return ret; @@ -926,7 +936,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) /* * The hfi1_mutex must be held when this function is called. It is - * necessary to ensure serialized access to the bitmask in_use_ctxts. + * necessary to ensure serialized creation of shared contexts. */ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) @@ -935,6 +945,9 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd = fd->dd; u16 subctxt; + if (!uinfo->subctxt_cnt) + return 0; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { struct hfi1_ctxtdata *uctxt = dd->rcd[i]; @@ -983,7 +996,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_ctxtdata **cd) { struct hfi1_ctxtdata *uctxt; - u16 ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -997,22 +1009,9 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, return -EIO; } - /* - * This check is sort of redundant to the next EBUSY error. It would - * also indicate an inconsistancy in the driver if this value was - * zero, but there were still contexts available. - */ if (!dd->freectxts) return -EBUSY; - for (ctxt = dd->first_dyn_alloc_ctxt; - ctxt < dd->num_rcv_contexts; ctxt++) - if (!dd->rcd[ctxt]) - break; - - if (ctxt == dd->num_rcv_contexts) - return -EBUSY; - /* * If we don't have a NUMA node requested, preference is towards * device NUMA node. @@ -1022,11 +1021,10 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, numa = cpu_to_node(fd->rec_cpu_num); else numa = numa_node_id(); - uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); - if (!uctxt) { - dd_dev_err(dd, - "Unable to allocate ctxtdata memory, failing open\n"); - return -ENOMEM; + ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); + if (ret < 0) { + dd_dev_err(dd, "user ctxtdata allocation failed\n"); + return ret; } hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, @@ -1035,8 +1033,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, /* * Allocate and enable a PIO send context. */ - uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, - uctxt->dd->node); + uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); if (!uctxt->sc) { ret = -ENOMEM; goto ctxdata_free; @@ -1048,20 +1045,13 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, goto ctxdata_free; /* - * Setup sub context resources if the user-level has requested + * Setup sub context information if the user-level has requested * sub contexts. * This has to be done here so the rest of the sub-contexts find the - * proper master. + * proper base context. */ - if (uinfo->subctxt_cnt) { - ret = init_subctxts(uctxt, uinfo); - /* - * On error, we don't need to disable and de-allocate the - * send context because it will be done during file close - */ - if (ret) - goto ctxdata_free; - } + if (uinfo->subctxt_cnt) + init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); @@ -1081,9 +1071,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, return 0; ctxdata_free: - *cd = NULL; - dd->rcd[ctxt] = NULL; - hfi1_rcd_put(uctxt); + hfi1_free_ctxt(dd, uctxt); return ret; } @@ -1093,28 +1081,17 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) hfi1_stats.sps_ctxts--; if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) aspm_enable_all(uctxt->dd); - - /* _rcd_put() should be done after releasing mutex */ - uctxt->dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_rcd_put(uctxt); /* dd reference */ + + hfi1_free_ctxt(uctxt->dd, uctxt); } -static int init_subctxts(struct hfi1_ctxtdata *uctxt, - const struct hfi1_user_info *uinfo) +static void init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo) { - u16 num_subctxts; - - num_subctxts = uinfo->subctxt_cnt; - if (num_subctxts > HFI1_MAX_SHARED_CTXTS) - return -EINVAL; - uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; - uctxt->redirect_seq_cnt = 1; set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); - - return 0; } static int setup_subctxt(struct hfi1_ctxtdata *uctxt) @@ -1302,8 +1279,8 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, return 0; setup_failed: - /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ - hfi1_free_ctxtdata(dd, uctxt); + set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); + deallocate_ctxt(uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 728ed457e447..bb003ff1df96 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -273,7 +273,6 @@ struct hfi1_ctxtdata { u16 poll_type; /* receive packet sequence counter */ u8 seq_cnt; - u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; /* QPs waiting for context processing */ @@ -1263,9 +1262,10 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); -int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, - int numa); +int hfi1_create_kctxts(struct hfi1_devdata *dd); +int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, + struct hfi1_ctxtdata **rcd); +void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index da7cd5bd27ca..23f0bbc9c436 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -126,71 +126,67 @@ static struct idr hfi1_unit_table; u32 hfi1_cpulist_count; unsigned long *hfi1_cpulist; -/* - * Common code for creating the receive context array. - */ -int hfi1_create_ctxts(struct hfi1_devdata *dd) +static int hfi1_create_kctxt(struct hfi1_devdata *dd, + struct hfi1_pportdata *ppd) { - u16 i; + struct hfi1_ctxtdata *rcd; int ret; /* Control context has to be always 0 */ BUILD_BUG_ON(HFI1_CTRL_CTXT != 0); + ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd); + if (ret < 0) { + dd_dev_err(dd, "Kernel receive context allocation failed\n"); + return ret; + } + + /* + * Set up the kernel context flags here and now because they use + * default values for all receive side memories. User contexts will + * be handled as they are created. + */ + rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + + /* Control context must use DMA_RTAIL */ + if (rcd->ctxt == HFI1_CTRL_CTXT) + rcd->flags |= HFI1_CAP_DMA_RTAIL; + rcd->seq_cnt = 1; + + rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); + if (!rcd->sc) { + dd_dev_err(dd, "Kernel send context allocation failed\n"); + return -ENOMEM; + } + hfi1_init_ctxt(rcd->sc); + + return 0; +} + +/* + * Create the receive context array and one or more kernel contexts + */ +int hfi1_create_kctxts(struct hfi1_devdata *dd) +{ + u16 i; + int ret; + dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd), GFP_KERNEL, dd->node); if (!dd->rcd) - goto nomem; + return -ENOMEM; - /* create one or more kernel contexts */ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { - struct hfi1_pportdata *ppd; - struct hfi1_ctxtdata *rcd; - - ppd = dd->pport + (i % dd->num_pports); - - /* dd->rcd[i] gets assigned inside the callee */ - rcd = hfi1_create_ctxtdata(ppd, i, dd->node); - if (!rcd) { - dd_dev_err(dd, - "Unable to allocate kernel receive context, failing\n"); - goto nomem; - } - /* - * Set up the kernel context flags here and now because they - * use default values for all receive side memories. User - * contexts will be handled as they are created. - */ - rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | - HFI1_CAP_KGET(NODROP_RHQ_FULL) | - HFI1_CAP_KGET(NODROP_EGR_FULL) | - HFI1_CAP_KGET(DMA_RTAIL); - - /* Control context must use DMA_RTAIL */ - if (rcd->ctxt == HFI1_CTRL_CTXT) - rcd->flags |= HFI1_CAP_DMA_RTAIL; - rcd->seq_cnt = 1; - - rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); - if (!rcd->sc) { - dd_dev_err(dd, - "Unable to allocate kernel send context, failing\n"); - goto nomem; - } - - hfi1_init_ctxt(rcd->sc); + ret = hfi1_create_kctxt(dd, dd->pport); + if (ret) + goto bail; } - /* - * Initialize aspm, to be done after gen3 transition and setting up - * contexts and before enabling interrupts - */ - aspm_init(dd); - return 0; -nomem: - ret = -ENOMEM; - +bail: for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) hfi1_rcd_put(dd->rcd[i]); @@ -208,6 +204,11 @@ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) kref_init(&rcd->kref); } +/** + * hfi1_rcd_free - When reference is zero clean up. + * @kref: pointer to an initialized rcd data structure + * + */ static void hfi1_rcd_free(struct kref *kref) { struct hfi1_ctxtdata *rcd = @@ -217,6 +218,12 @@ static void hfi1_rcd_free(struct kref *kref) kfree(rcd); } +/** + * hfi1_rcd_put - decrement reference for rcd + * @rcd: pointer to an initialized rcd data structure + * + * Use this to put a reference after the init. + */ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) { if (rcd) @@ -225,16 +232,58 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) return 0; } +/** + * hfi1_rcd_get - increment reference for rcd + * @rcd: pointer to an initialized rcd data structure + * + * Use this to get a reference after the init. + */ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) { kref_get(&rcd->kref); } +/** + * allocate_rcd_index - allocate an rcd index from the rcd array + * @dd: pointer to a valid devdata structure + * @rcd: rcd data structure to assign + * @index: pointer to index that is allocated + * + * Find an empty index in the rcd array, and assign the given rcd to it. + * If the array is full, we are EBUSY. + * + */ +static u16 allocate_rcd_index(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *rcd, u16 *index) +{ + unsigned long flags; + u16 ctxt; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++) + if (!dd->rcd[ctxt]) + break; + + if (ctxt < dd->num_rcv_contexts) { + rcd->ctxt = ctxt; + dd->rcd[ctxt] = rcd; + hfi1_rcd_init(rcd); + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + if (ctxt >= dd->num_rcv_contexts) + return -EBUSY; + + *index = ctxt; + + return 0; +} + /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, - int numa) +int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, + struct hfi1_ctxtdata **context) { struct hfi1_devdata *dd = ppd->dd; struct hfi1_ctxtdata *rcd; @@ -248,9 +297,18 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; + u16 ctxt; + int ret; hfi1_cdbg(PROC, "setting up context %u\n", ctxt); + ret = allocate_rcd_index(dd, rcd, &ctxt); + if (ret) { + *context = NULL; + kfree(rcd); + return ret; + } + INIT_LIST_HEAD(&rcd->qp_wait_list); hfi1_exp_tid_group_init(&rcd->tid_group_list); hfi1_exp_tid_group_init(&rcd->tid_used_list); @@ -258,8 +316,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); - rcd->ctxt = ctxt; - dd->rcd[ctxt] = rcd; rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; @@ -363,15 +419,34 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, goto bail; } - hfi1_rcd_init(rcd); + *context = rcd; + return 0; } - return rcd; + bail: - dd->rcd[ctxt] = NULL; - kfree(rcd->egrbufs.rcvtids); - kfree(rcd->egrbufs.buffers); - kfree(rcd); - return NULL; + *context = NULL; + hfi1_free_ctxt(dd, rcd); + return -ENOMEM; +} + +/** + * hfi1_free_ctxt + * @dd: Pointer to a valid device + * @rcd: pointer to an initialized rcd data structure + * + * This is the "free" to match the _create_ctxtdata (alloc) function. + * This is the final "put" for the kref. + */ +void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) +{ + unsigned long flags; + + if (rcd) { + spin_lock_irqsave(&dd->uctxt_lock, flags); + dd->rcd[rcd->ctxt] = NULL; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + hfi1_rcd_put(rcd); + } } /* diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 89b056489769..c91456c16cdb 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -106,22 +106,13 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata **vnic_ctxt) { struct hfi1_ctxtdata *uctxt; - u16 ctxt; int ret; if (dd->flags & HFI1_FROZEN) return -EIO; - for (ctxt = dd->first_dyn_alloc_ctxt; - ctxt < dd->num_rcv_contexts; ctxt++) - if (!dd->rcd[ctxt]) - break; - - if (ctxt == dd->num_rcv_contexts) - return -EBUSY; - - uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); - if (!uctxt) { + ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); + if (ret < 0) { dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); return -ENOMEM; } @@ -156,11 +147,10 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * hfi1_free_ctxt() will call hfi1_free_ctxtdata(), which will * release send_context structure if uctxt->sc is not null */ - dd->rcd[uctxt->ctxt] = NULL; - hfi1_rcd_put(uctxt); + hfi1_free_ctxt(dd, uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -201,14 +191,14 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; uctxt->event_flags = 0; hfi1_clear_tids(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_rcd_put(uctxt); + + hfi1_free_ctxt(dd, uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) -- cgit v1.2.3-59-g8ed1b From d295dbeb2a0c93364444e76b3bb30f587a823e0e Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 4 Aug 2017 13:52:44 -0700 Subject: IB/hf1: User context locking is inconsistent There is a mixture of mutex and spinlocks to protect receive context (rcd/uctxt) information. This is not used consistently. Use the mutex to protect device receive context information only. Use the spinlock to protect sub context information only. Protect access to items in the rcd array with a spinlock and reference count. Remove spinlock around dd->rcd array cleanup. Since interrupts are disabled and cleaned up before this point, this lock is not useful. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 35 ++++--- drivers/infiniband/hw/hfi1/chip.c | 30 ++++-- drivers/infiniband/hw/hfi1/debugfs.c | 32 ++++-- drivers/infiniband/hw/hfi1/driver.c | 71 ++++++++----- drivers/infiniband/hw/hfi1/file_ops.c | 185 +++++++++++++++++++++------------ drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/init.c | 134 +++++++++++++++--------- drivers/infiniband/hw/hfi1/trace_rx.h | 12 +-- drivers/infiniband/hw/hfi1/vnic_main.c | 12 +-- 9 files changed, 326 insertions(+), 191 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 3f9a071ccac3..522b40ed9937 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -240,11 +240,14 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd) u16 i; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = dd->rcd[i]; - del_timer_sync(&rcd->aspm_timer); - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = false; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + del_timer_sync(&rcd->aspm_timer); + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = false; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } } aspm_disable(dd); @@ -264,11 +267,14 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) return; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = dd->rcd[i]; - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = true; - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = true; + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } } } @@ -284,13 +290,18 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) static inline void aspm_init(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u16 i; spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) - aspm_ctx_init(dd->rcd[i]); + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + aspm_ctx_init(rcd); + hfi1_rcd_put(rcd); + } /* Start with ASPM disabled */ aspm_hw_set_l1_ent_latency(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2cdf09dfcb55..1446c16bc8a8 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6785,13 +6785,17 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze) static void rxe_freeze(struct hfi1_devdata *dd) { int i; + struct hfi1_ctxtdata *rcd; /* disable port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); /* disable all receive contexts */ - for (i = 0; i < dd->num_rcv_contexts; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]); + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, rcd); + hfi1_rcd_put(rcd); + } } /* @@ -6804,20 +6808,23 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { u32 rcvmask; u16 i; + struct hfi1_ctxtdata *rcd; /* enable all kernel contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *rcd = dd->rcd[i]; + rcd = hfi1_rcd_get_by_index(dd, i); /* Ensure all non-user contexts(including vnic) are enabled */ - if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) { + hfi1_rcd_put(rcd); continue; - + } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); + hfi1_rcd_put(rcd); } /* enable port */ @@ -8104,7 +8111,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) char *err_detail; if (likely(source < dd->num_rcv_contexts)) { - rcd = dd->rcd[source]; + rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* Check for non-user contexts, including vnic */ if ((source < dd->first_dyn_alloc_ctxt) || @@ -8112,6 +8119,8 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); + + hfi1_rcd_put(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ @@ -8133,12 +8142,14 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) char *err_detail; if (likely(source < dd->num_rcv_contexts)) { - rcd = dd->rcd[source]; + rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* only pay attention to user urgent interrupts */ if ((source >= dd->first_dyn_alloc_ctxt) && (!rcd->sc || (rcd->sc->type == SC_USER))) handle_user_interrupt(rcd); + + hfi1_rcd_put(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ @@ -8343,7 +8354,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data) int disposition; int present; - trace_hfi1_receive_interrupt(dd, rcd->ctxt); + trace_hfi1_receive_interrupt(dd, rcd); this_cpu_inc(*dd->int_counter); aspm_ctx_disable(rcd); @@ -13030,7 +13041,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) me->type = IRQ_SDMA; } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; - rcd = dd->rcd[idx]; + rcd = hfi1_rcd_get_by_index(dd, idx); if (rcd) { /* * Set the interrupt register and mask for this @@ -13049,6 +13060,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) remap_intr(dd, IS_RCVAVAIL_START + idx, i); me->type = IRQ_RCVCTXT; rcd->msix_intr = i; + hfi1_rcd_put(rcd); } } else { /* not in our expected range - complain, then diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index e9fa3c293e42..550119c6f1ee 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -173,12 +173,15 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0, n_bytes = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - if (!dd->rcd[j]) - continue; - n_packets += dd->rcd[j]->opstats->stats[i].n_packets; - n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); } if (!n_packets && !n_bytes) return SEQ_SKIP; @@ -231,6 +234,7 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; if (v == SEQ_START_TOKEN) { seq_puts(s, "Ctx:npkts\n"); @@ -240,11 +244,14 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) spos = v; i = *spos; - if (!dd->rcd[i]) + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) return SEQ_SKIP; - for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) - n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + for (j = 0; j < ARRAY_SIZE(rcd->opstats->stats); j++) + n_packets += rcd->opstats->stats[j].n_packets; + + hfi1_rcd_put(rcd); if (!n_packets) return SEQ_SKIP; @@ -1098,12 +1105,15 @@ static int _fault_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0, n_bytes = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - if (!dd->rcd[j]) - continue; - n_packets += dd->rcd[j]->opstats->stats[i].n_packets; - n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); } if (!n_packets && !n_bytes) return SEQ_SKIP; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0b7ca0e05320..14f2a00c13c2 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -839,6 +839,7 @@ bail: static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) { + struct hfi1_ctxtdata *rcd; u16 i; /* @@ -847,18 +848,27 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - dd->rcd[ctxt]->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + rcd = hfi1_rcd_get_by_index(dd, ctxt); + if (rcd) { + rcd->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + hfi1_rcd_put(rcd); + } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) - dd->rcd[i]->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + rcd->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + hfi1_rcd_put(rcd); + } } static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) { + struct hfi1_ctxtdata *rcd; u16 i; /* @@ -867,27 +877,39 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - dd->rcd[ctxt]->do_interrupt = - &handle_receive_interrupt_dma_rtail; + rcd = hfi1_rcd_get_by_index(dd, ctxt); + if (rcd) { + rcd->do_interrupt = + &handle_receive_interrupt_dma_rtail; + hfi1_rcd_put(rcd); + } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) - dd->rcd[i]->do_interrupt = - &handle_receive_interrupt_dma_rtail; + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + rcd->do_interrupt = + &handle_receive_interrupt_dma_rtail; + hfi1_rcd_put(rcd); + } } void set_all_slowpath(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u16 i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *rcd = dd->rcd[i]; - + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) + continue; if ((i < dd->first_dyn_alloc_ctxt) || - (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) + (rcd->sc && (rcd->sc->type == SC_KERNEL))) { rcd->do_interrupt = &handle_receive_interrupt; + } + hfi1_rcd_put(rcd); } } @@ -1068,6 +1090,7 @@ void receive_interrupt_work(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, linkstate_active_work); struct hfi1_devdata *dd = ppd->dd; + struct hfi1_ctxtdata *rcd; u16 i; /* Received non-SC15 packet implies neighbor_normal */ @@ -1078,8 +1101,12 @@ void receive_interrupt_work(struct work_struct *work) * Interrupt all statically allocated kernel contexts that could * have had an interrupt during auto activation. */ - for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) - force_recv_intr(dd->rcd[i]); + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + force_recv_intr(rcd); + hfi1_rcd_put(rcd); + } } /* @@ -1270,10 +1297,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, int hfi1_reset_device(int unit) { int ret; - u16 i; struct hfi1_devdata *dd = hfi1_lookup(unit); struct hfi1_pportdata *ppd; - unsigned long flags; int pidx; if (!dd) { @@ -1291,17 +1316,15 @@ int hfi1_reset_device(int unit) goto bail; } - spin_lock_irqsave(&dd->uctxt_lock, flags); + /* If there are any user/vnic contexts, we cannot reset */ + mutex_lock(&hfi1_mutex); if (dd->rcd) - for (i = dd->first_dyn_alloc_ctxt; - i < dd->num_rcv_contexts; i++) { - if (!dd->rcd[i]) - continue; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (hfi1_stats.sps_ctxts) { + mutex_unlock(&hfi1_mutex); ret = -EBUSY; goto bail; } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); + mutex_unlock(&hfi1_mutex); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7361366d80e4..ab8eb2bf48d8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -757,7 +757,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (!uctxt) goto done; - hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); + hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); flush_wc(); /* drain user sdma queue */ @@ -769,6 +769,13 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* clean up rcv side */ hfi1_user_exp_rcv_free(fdata); + /* + * fdata->uctxt is used in the above cleanup. It is not ready to be + * removed until here. + */ + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -777,16 +784,14 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; - mutex_lock(&hfi1_mutex); + spin_lock_irqsave(&dd->uctxt_lock, flags); __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); - fdata->uctxt = NULL; - hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { - mutex_unlock(&hfi1_mutex); + spin_unlock_irqrestore(&dd->uctxt_lock, flags); goto done; } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); - spin_lock_irqsave(&dd->uctxt_lock, flags); /* * Disable receive context and interrupt available, reset all * RcvCtxtCtrl bits to default values. @@ -808,13 +813,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) set_pio_integrity(uctxt->sc); sc_disable(uctxt->sc); } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->event_flags = 0; - mutex_unlock(&hfi1_mutex); deallocate_ctxt(uctxt); done: @@ -844,9 +847,22 @@ static u64 kvirt_to_phys(void *addr) return paddr; } +/** + * complete_subctxt + * @fd: valid filedata pointer + * + * Sub-context info can only be set up after the base context + * has been completed. This is indicated by the clearing of the + * HFI1_CTXT_BASE_UINIT bit. + * + * Wait for the bit to be cleared, and then complete the subcontext + * initialization. + * + */ static int complete_subctxt(struct hfi1_filedata *fd) { int ret; + unsigned long flags; /* * sub-context info can only be set up after the base context @@ -859,7 +875,7 @@ static int complete_subctxt(struct hfi1_filedata *fd) if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) ret = -ENOMEM; - /* The only thing a sub context needs is the user_xxx stuff */ + /* Finish the sub-context init */ if (!ret) { fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); ret = init_user_ctxt(fd, fd->uctxt); @@ -868,9 +884,9 @@ static int complete_subctxt(struct hfi1_filedata *fd) if (ret) { hfi1_rcd_put(fd->uctxt); fd->uctxt = NULL; - mutex_lock(&hfi1_mutex); + spin_lock_irqsave(&fd->dd->uctxt_lock, flags); __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - mutex_unlock(&hfi1_mutex); + spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); } return ret; @@ -911,14 +927,15 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) mutex_unlock(&hfi1_mutex); - /* Depending on the context type, do the appropriate init */ + /* Depending on the context type, finish the appropriate init */ switch (ret) { case 0: ret = setup_base_ctxt(fd, uctxt); if (uctxt->subctxt_cnt) { /* - * Base context is done, notify anybody using a - * sub-context that is waiting for this completion + * Base context is done (successfully or not), notify + * anybody using a sub-context that is waiting for + * this completion. */ clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); wake_up(&uctxt->wait); @@ -934,58 +951,97 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) return ret; } -/* - * The hfi1_mutex must be held when this function is called. It is - * necessary to ensure serialized creation of shared contexts. +/** + * match_ctxt + * @fd: valid filedata pointer + * @uinfo: user info to compare base context with + * @uctxt: context to compare uinfo to. + * + * Compare the given context with the given information to see if it + * can be used for a sub context. */ -static int find_sub_ctxt(struct hfi1_filedata *fd, - const struct hfi1_user_info *uinfo) +static int match_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata *uctxt) { - u16 i; struct hfi1_devdata *dd = fd->dd; + unsigned long flags; u16 subctxt; - if (!uinfo->subctxt_cnt) + /* Skip dynamically allocated kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) return 0; - for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *uctxt = dd->rcd[i]; + /* Skip ctxt if it doesn't match the requested one */ + if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || + uctxt->subctxt_id != uinfo->subctxt_id || + uctxt->subctxt_cnt != uinfo->subctxt_cnt) + return 0; - /* Skip ctxts which are not yet open */ - if (!uctxt || - bitmap_empty(uctxt->in_use_ctxts, - HFI1_MAX_SHARED_CTXTS)) - continue; + /* Verify the sharing process matches the base */ + if (uctxt->userversion != uinfo->userversion) + return -EINVAL; - /* Skip dynamically allocted kernel contexts */ - if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) - continue; + /* Find an unused sub context */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { + /* context is being closed, do not use */ + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + return 0; + } - /* Skip ctxt if it doesn't match the requested one */ - if (memcmp(uctxt->uuid, uinfo->uuid, - sizeof(uctxt->uuid)) || - uctxt->jkey != generate_jkey(current_uid()) || - uctxt->subctxt_id != uinfo->subctxt_id || - uctxt->subctxt_cnt != uinfo->subctxt_cnt) - continue; + subctxt = find_first_zero_bit(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS); + if (subctxt >= uctxt->subctxt_cnt) { + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + return -EBUSY; + } - /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion) - return -EINVAL; + fd->subctxt = subctxt; + __set_bit(fd->subctxt, uctxt->in_use_ctxts); + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + fd->uctxt = uctxt; + hfi1_rcd_get(uctxt); - /* Find an unused context */ - subctxt = find_first_zero_bit(uctxt->in_use_ctxts, - HFI1_MAX_SHARED_CTXTS); - if (subctxt >= uctxt->subctxt_cnt) - return -EBUSY; + return 1; +} - fd->uctxt = uctxt; - fd->subctxt = subctxt; +/** + * find_sub_ctxt + * @fd: valid filedata pointer + * @uinfo: matching info to use to find a possible context to share. + * + * The hfi1_mutex must be held when this function is called. It is + * necessary to ensure serialized creation of shared contexts. + * + * Return: + * 0 No sub-context found + * 1 Subcontext found and allocated + * errno EINVAL (incorrect parameters) + * EBUSY (all sub contexts in use) + */ +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo) +{ + struct hfi1_ctxtdata *uctxt; + struct hfi1_devdata *dd = fd->dd; + u16 i; + int ret; - hfi1_rcd_get(uctxt); - __set_bit(fd->subctxt, uctxt->in_use_ctxts); + if (!uinfo->subctxt_cnt) + return 0; - return 1; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { + uctxt = hfi1_rcd_get_by_index(dd, i); + if (uctxt) { + ret = match_ctxt(fd, uinfo, uctxt); + hfi1_rcd_put(uctxt); + /* value of != 0 will return */ + if (ret) + return ret; + } } return 0; @@ -993,7 +1049,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo, - struct hfi1_ctxtdata **cd) + struct hfi1_ctxtdata **rcd) { struct hfi1_ctxtdata *uctxt; int ret, numa; @@ -1066,12 +1122,12 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, if (dd->freectxts-- == dd->num_user_contexts) aspm_disable_all(dd); - *cd = uctxt; + *rcd = uctxt; return 0; ctxdata_free: - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); return ret; } @@ -1083,7 +1139,7 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) aspm_enable_all(uctxt->dd); mutex_unlock(&hfi1_mutex); - hfi1_free_ctxt(uctxt->dd, uctxt); + hfi1_free_ctxt(uctxt); } static void init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1279,8 +1335,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, return 0; setup_failed: + /* Set the failed bit so sub-context init can do the right thing */ set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); deallocate_ctxt(uctxt); + return ret; } @@ -1417,18 +1475,13 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) struct hfi1_ctxtdata *uctxt; struct hfi1_devdata *dd = ppd->dd; u16 ctxt; - int ret = 0; - unsigned long flags; - if (!dd->events) { - ret = -EINVAL; - goto done; - } + if (!dd->events) + return -EINVAL; - spin_lock_irqsave(&dd->uctxt_lock, flags); for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) { - uctxt = dd->rcd[ctxt]; + uctxt = hfi1_rcd_get_by_index(dd, ctxt); if (uctxt) { unsigned long *evs = dd->events + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * @@ -1441,11 +1494,11 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) set_bit(evtbit, evs); for (i = 1; i < uctxt->subctxt_cnt; i++) set_bit(evtbit, evs + i); + hfi1_rcd_put(uctxt); } } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); -done: - return ret; + + return 0; } /** diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bb003ff1df96..fa9160f68bb7 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -938,8 +938,7 @@ struct hfi1_devdata { u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ - /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ - spinlock_t uctxt_lock; /* rcd and user context changes */ + spinlock_t uctxt_lock; /* protect rcd changes */ struct mutex dc8051_lock; /* exclusive access to 8051 */ struct workqueue_struct *update_cntr_wq; struct work_struct update_cntr_work; @@ -1265,12 +1264,13 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_kctxts(struct hfi1_devdata *dd); int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, struct hfi1_ctxtdata **rcd); -void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); +struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 23f0bbc9c436..fba77001c3a7 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -188,7 +188,7 @@ int hfi1_create_kctxts(struct hfi1_devdata *dd) return 0; bail: for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) - hfi1_rcd_put(dd->rcd[i]); + hfi1_free_ctxt(dd->rcd[i]); /* All the contexts should be freed, free the array */ kfree(dd->rcd); @@ -197,7 +197,7 @@ bail: } /* - * Helper routines for the receive context reference count (rcd and uctxt) + * Helper routines for the receive context reference count (rcd and uctxt). */ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) { @@ -211,10 +211,16 @@ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) */ static void hfi1_rcd_free(struct kref *kref) { + unsigned long flags; struct hfi1_ctxtdata *rcd = container_of(kref, struct hfi1_ctxtdata, kref); hfi1_free_ctxtdata(rcd->dd, rcd); + + spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); + rcd->dd->rcd[rcd->ctxt] = NULL; + spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); + kfree(rcd); } @@ -253,7 +259,7 @@ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) * If the array is full, we are EBUSY. * */ -static u16 allocate_rcd_index(struct hfi1_devdata *dd, +static int allocate_rcd_index(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, u16 *index) { unsigned long flags; @@ -279,8 +285,36 @@ static u16 allocate_rcd_index(struct hfi1_devdata *dd, return 0; } +/** + * hfi1_rcd_get_by_index + * @dd: pointer to a valid devdata structure + * @ctxt: the index of an possilbe rcd + * + * We need to protect access to the rcd array. If access is needed to + * one or more index, get the protecting spinlock and then increment the + * kref. + * + * The caller is responsible for making the _put(). + * + */ +struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) +{ + unsigned long flags; + struct hfi1_ctxtdata *rcd = NULL; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (dd->rcd[ctxt]) { + rcd = dd->rcd[ctxt]; + hfi1_rcd_get(rcd); + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + return rcd; +} + /* - * Common code for user and kernel context setup. + * Common code for user and kernel context create and setup. + * NOTE: the initial kref is done here (hf1_rcd_init()). */ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, struct hfi1_ctxtdata **context) @@ -300,8 +334,6 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, u16 ctxt; int ret; - hfi1_cdbg(PROC, "setting up context %u\n", ctxt); - ret = allocate_rcd_index(dd, rcd, &ctxt); if (ret) { *context = NULL; @@ -321,6 +353,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, mutex_init(&rcd->exp_lock); + hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); + /* * Calculate the context's RcvArray entry starting point. * We do this here because we have to take into account all @@ -425,28 +459,23 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, bail: *context = NULL; - hfi1_free_ctxt(dd, rcd); + hfi1_free_ctxt(rcd); return -ENOMEM; } /** * hfi1_free_ctxt - * @dd: Pointer to a valid device * @rcd: pointer to an initialized rcd data structure * - * This is the "free" to match the _create_ctxtdata (alloc) function. - * This is the final "put" for the kref. + * This wrapper is the free function that matches hfi1_create_ctxtdata(). + * When a context is done being used (kernel or user), this function is called + * for the "final" put to match the kref init from hf1i_create_ctxtdata(). + * Other users of the context do a get/put sequence to make sure that the + * structure isn't removed while in use. */ -void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) +void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) { - unsigned long flags; - - if (rcd) { - spin_lock_irqsave(&dd->uctxt_lock, flags); - dd->rcd[rcd->ctxt] = NULL; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); - hfi1_rcd_put(rcd); - } + hfi1_rcd_put(rcd); } /* @@ -669,16 +698,19 @@ static int loadtime_init(struct hfi1_devdata *dd) static int init_after_reset(struct hfi1_devdata *dd) { int i; - + struct hfi1_ctxtdata *rcd; /* * Ensure chip does no sends or receives, tail updates, or * pioavail updates while we re-initialize. This is mostly * for the driver data structures, not chip registers. */ - for (i = 0; i < dd->num_rcv_contexts; i++) + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]); + HFI1_RCVCTRL_TAILUPD_DIS, rcd); + hfi1_rcd_put(rcd); + } pio_send_control(dd, PSC_GLOBAL_DISABLE); for (i = 0; i < dd->num_send_contexts; i++) sc_disable(dd->send_contexts[i].sc); @@ -688,6 +720,7 @@ static int init_after_reset(struct hfi1_devdata *dd) static void enable_chip(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u32 rcvmask; u16 i; @@ -699,17 +732,21 @@ static void enable_chip(struct hfi1_devdata *dd) * Other ctxts done as user opens and initializes them. */ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) + continue; rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; - rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; - if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR)) + if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_RHQ_FULL)) + if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL)) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; - if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL)) + if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]); - sc_enable(dd->rcd[i]->sc); + hfi1_rcvctrl(dd, rcvmask, rcd); + sc_enable(rcd->sc); + hfi1_rcd_put(rcd); } } @@ -854,7 +891,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) * existing, and re-allocate. * Need to re-create rest of ctxt 0 ctxtdata as well. */ - rcd = dd->rcd[i]; + rcd = hfi1_rcd_get_by_index(dd, i); if (!rcd) continue; @@ -868,6 +905,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); ret = lastfail; } + hfi1_rcd_put(rcd); } /* Allocate enough memory for user event notification. */ @@ -987,6 +1025,7 @@ static void stop_timers(struct hfi1_devdata *dd) static void shutdown_device(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd; + struct hfi1_ctxtdata *rcd; unsigned pidx; int i; @@ -1005,12 +1044,15 @@ static void shutdown_device(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - for (i = 0; i < dd->num_rcv_contexts; i++) + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS | HFI1_RCVCTRL_PKEY_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]); + HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd); + hfi1_rcd_put(rcd); + } /* * Gracefully stop all sends allowing any in progress to * trickle out first. @@ -1450,8 +1492,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) { int ctxt; int pidx; - struct hfi1_ctxtdata **tmp; - unsigned long flags; /* users can't do anything more with chip */ for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -1476,18 +1516,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) free_credit_return(dd); - /* - * Free any resources still in use (usually just kernel contexts) - * at unload; we do for ctxtcnt, because that's what we allocate. - * We acquire lock to be really paranoid that rcd isn't being - * accessed from some interrupt-related code (that should not happen, - * but best to be sure). - */ - spin_lock_irqsave(&dd->uctxt_lock, flags); - tmp = dd->rcd; - dd->rcd = NULL; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); - if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, @@ -1495,16 +1523,22 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dd->rcvhdrtail_dummy_kvaddr = NULL; } - for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) { - struct hfi1_ctxtdata *rcd = tmp[ctxt]; + /* + * Free any resources still in use (usually just kernel contexts) + * at unload; we do for ctxtcnt, because that's what we allocate. + */ + for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) { + struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_rcd_put(rcd); + hfi1_free_ctxt(rcd); } } - kfree(tmp); + + kfree(dd->rcd); + dd->rcd = NULL; + free_pio_map(dd); /* must follow rcv context free - need to remove rcv's hooks */ for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++) diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index bebf0a8f214d..f9909d240dcc 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -114,24 +114,24 @@ TRACE_EVENT(hfi1_rcvhdr, ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt), - TP_ARGS(dd, ctxt), + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd), + TP_ARGS(dd, rcd), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(u32, ctxt) __field(u8, slow_path) __field(u8, dma_rtail) ), TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - if (dd->rcd[ctxt]->do_interrupt == + __entry->ctxt = rcd->ctxt; + if (rcd->do_interrupt == &handle_receive_interrupt) { __entry->slow_path = 1; __entry->dma_rtail = 0xFF; - } else if (dd->rcd[ctxt]->do_interrupt == + } else if (rcd->do_interrupt == &handle_receive_interrupt_dma_rtail){ __entry->dma_rtail = 1; __entry->slow_path = 0; - } else if (dd->rcd[ctxt]->do_interrupt == + } else if (rcd->do_interrupt == &handle_receive_interrupt_nodma_rtail) { __entry->dma_rtail = 0; __entry->slow_path = 0; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c91456c16cdb..2917a238a343 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -146,11 +146,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: - /* - * hfi1_free_ctxt() will call hfi1_free_ctxtdata(), which will - * release send_context structure if uctxt->sc is not null - */ - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -158,15 +154,12 @@ bail: static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) { - unsigned long flags; - dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); if (dd->num_msix_entries) hfi1_reset_vnic_msix_info(uctxt); - spin_lock_irqsave(&dd->uctxt_lock, flags); /* * Disable receive context and interrupt available, reset all * RcvCtxtCtrl bits to default values. @@ -189,7 +182,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, sc_disable(uctxt->sc); dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); uctxt->event_flags = 0; @@ -198,7 +190,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_stats.sps_ctxts--; - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) -- cgit v1.2.3-59-g8ed1b From 13c19222889daf91da36b7fb63b5d5d9ce89b377 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:51 -0700 Subject: IB/rdmavt, hfi1, qib: Modify check_ah() to account for extended LIDs rvt_check_ah() delegates lid verification to underlying driver. Underlying driver uses different conditions to check for dlid depending on whether the device supports extended LIDs Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/common.h | 9 --------- drivers/infiniband/hw/hfi1/mad.c | 5 +++-- drivers/infiniband/hw/hfi1/verbs.c | 5 +++++ drivers/infiniband/hw/qib/qib_verbs.c | 9 +++++++++ drivers/infiniband/sw/rdmavt/ah.c | 10 ---------- drivers/infiniband/sw/rdmavt/qp.c | 29 +++++++++++++++++++++++------ include/rdma/opa_addr.h | 18 ++++++++++++++++++ 7 files changed, 58 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index ba9ab971ced9..aa416ef93c1a 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -333,15 +333,6 @@ struct diag_pkt { #define DEFAULT_P_KEY LIM_MGMT_P_KEY -/** - * 0xF8 - 4 bits of multicast range and 1 bit for collective range - * Example: For 24 bit LID space, - * Multicast range: 0xF00000 to 0xF7FFFF - * Collective range: 0xF80000 to 0xFFFFFE - */ -#define HFI1_MCAST_NR 0x4 /* Number of top bits set */ -#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ - #define HFI1_PSM_IOC_BASE_SEQ 0x0 static inline __u64 rhf_to_cpu(const __le32 *rbuf) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index cd1f6f841f34..21fadb4b510c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -46,6 +46,7 @@ */ #include +#include #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \ / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16))) @@ -905,8 +906,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->buffer_units = cpu_to_be32(buffer_units); pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); - pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) - << 3 | (HFI1_MCAST_NR & 0x7)); + pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7) + << 3 | (OPA_MCAST_NR & 0x7)); /* HFI supports a replay buffer 128 LTPs in size */ pi->replay_depth.buffer = 0x80; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c88c03c11555..97ca42beb023 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "hfi.h" #include "common.h" @@ -1461,6 +1462,10 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) struct hfi1_devdata *dd; u8 sc5; + if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) && + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + /* test the mapping for validity */ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ac42dce7e281..9d92aeb8d9a1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1341,6 +1341,15 @@ int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) if (rdma_ah_get_sl(ah_attr) > 15) return -EINVAL; + if (rdma_ah_get_dlid(ah_attr) == 0) + return -EINVAL; + if (rdma_ah_get_dlid(ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE) && + rdma_ah_get_dlid(ah_attr) != + be16_to_cpu(IB_LID_PERMISSIVE) && + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index a96d4aa80ae8..ba3639a0d77c 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -66,8 +66,6 @@ int rvt_check_ah(struct ib_device *ibdev, int port_num = rdma_ah_get_port_num(ah_attr); struct ib_port_attr port_attr; struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - u32 dlid = rdma_ah_get_dlid(ah_attr); u8 ah_flags = rdma_ah_get_ah_flags(ah_attr); u8 static_rate = rdma_ah_get_static_rate(ah_attr); @@ -83,14 +81,6 @@ int rvt_check_ah(struct ib_device *ibdev, if ((ah_flags & IB_AH_GRH) && rdma_ah_read_grh(ah_attr)->sgid_index >= port_attr.gid_tbl_len) return -EINVAL; - if (link != IB_LINK_LAYER_ETHERNET) { - if (dlid == 0) - return -EINVAL; - if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && - dlid != be16_to_cpu(IB_LID_PERMISSIVE) && - !(ah_flags & IB_AH_GRH)) - return -EINVAL; - } if (rdi->driver_f.check_ah) return rdi->driver_f.check_ah(ibdev, ah_attr); return 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index eb0c3d60c584..6f6525d24a2f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "qp.h" #include "vt.h" #include "trace.h" @@ -1066,6 +1067,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mig = 0; int pmtu = 0; /* for gcc warning only */ enum rdma_link_layer link; + int opa_ah; link = rdma_port_get_link_layer(ibqp->device, qp->port_num); @@ -1076,6 +1078,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num); if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, link)) @@ -1086,17 +1089,31 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto inval; if (attr_mask & IB_QP_AV) { - if (rdma_ah_get_dlid(&attr->ah_attr) >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; + if (opa_ah) { + if (rdma_ah_get_dlid(&attr->ah_attr) >= + opa_get_mcast_base(OPA_MCAST_NR)) + goto inval; + } else { + if (rdma_ah_get_dlid(&attr->ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + } + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; } if (attr_mask & IB_QP_ALT_PATH) { - if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; + if (opa_ah) { + if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= + opa_get_mcast_base(OPA_MCAST_NR)) + goto inval; + } else { + if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + } + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; if (attr->alt_pkey_index >= rvt_get_npkeys(rdi)) diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9b5e642cf550..8d3ad4ecbea1 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -48,10 +48,21 @@ #ifndef OPA_ADDR_H #define OPA_ADDR_H +#include + #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +/** + * 0xF8 - 4 bits of multicast range and 1 bit for collective range + * Example: For 24 bit LID space, + * Multicast range: 0xF00000 to 0xF7FFFF + * Collective range: 0xF80000 to 0xFFFFFE + */ +#define OPA_MCAST_NR 0x4 /* Number of top bits set */ +#define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ + /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that @@ -95,4 +106,11 @@ static inline bool opa_is_extended_lid(u32 dlid, u32 slid) else return false; } + +/* Get multicast lid base */ +static inline u32 opa_get_mcast_base(u32 nr_top_bits) +{ + return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); +} + #endif /* OPA_ADDR_H */ -- cgit v1.2.3-59-g8ed1b From 72c07e2b671eda1cf3e8ebabc664f542f673b997 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:58 -0700 Subject: IB/hfi1: Add support to receive 16B bypass packets We introduce a struct hfi1_16b_header to support 16B headers. 16B bypass packets are received by the driver and processed similar to 9B packets. Add basic support to handle 16B packets. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 ++ drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 127 ++++++++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 131 ++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 4 +- drivers/infiniband/hw/hfi1/verbs.c | 17 +++-- drivers/infiniband/hw/hfi1/verbs.h | 13 ++++ drivers/infiniband/hw/hfi1/vnic.h | 15 ---- drivers/infiniband/hw/hfi1/vnic_main.c | 4 +- include/rdma/opa_vnic.h | 3 - 12 files changed, 274 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 1446c16bc8a8..ee1324cce25a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14468,6 +14468,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) static void init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; + u64 val; /* enable all receive errors */ write_csr(dd, RCV_ERR_MASK, ~0ull); @@ -14492,6 +14493,11 @@ static void init_rxe(struct hfi1_devdata *dd) * (64 bytes). Max_Payload_Size is possibly modified upward in * tune_pcie_caps() which is called after this routine. */ + + /* Have 16 bytes (4DW) of bypass header available in header queue */ + val = read_csr(dd, RCV_BYPASS); + val |= (4ull << 16); + write_csr(dd, RCV_BYPASS, val); } static void init_other(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index aa416ef93c1a..3e27794ec750 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -327,6 +327,7 @@ struct diag_pkt { /* misc. */ #define SC15_PACKET 0xF #define SIZE_OF_CRC 1 +#define SIZE_OF_LT 1 #define LIM_MGMT_P_KEY 0x7FFF #define FULL_MGMT_P_KEY 0xFFFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 14f2a00c13c2..5280d82c344e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -237,6 +237,13 @@ static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, return (struct ib_header *)hfi1_get_header(dd, rhf_addr); } +static inline struct hfi1_16b_header + *hfi1_get_16B_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -925,6 +932,11 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } else if (etype == RHF_RCV_TYPE_BYPASS) { + struct hfi1_16b_header *hdr = hfi1_get_16B_header( + packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_16B_get_sc(hdr); } if (sc != SC15_PACKET) { int hwstate = driver_lstate(rcd->ppd); @@ -1386,9 +1398,14 @@ static int hfi1_setup_9B_packet(struct hfi1_packet *packet) } /* Query commonly used fields from packet header */ + packet->payload = packet->ebuf; packet->opcode = ib_bth_get_opcode(packet->ohdr); packet->slid = ib_get_slid(hdr); packet->dlid = ib_get_dlid(hdr); + if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE)))) + packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - + be16_to_cpu(IB_MULTICAST_LID_BASE); packet->sl = ib_get_sl(hdr); packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); packet->pad = ib_bth_get_pad(packet->ohdr); @@ -1402,6 +1419,73 @@ drop: return -EINVAL; } +static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) +{ + /* + * Bypass packets have a different header/payload split + * compared to an IB packet. + * Current split is set such that 16 bytes of the actual + * header is in the header buffer and the remining is in + * the eager buffer. We chose 16 since hfi1 driver only + * supports 16B bypass packets and we will be able to + * receive the entire LRH with such a split. + */ + + struct hfi1_ctxtdata *rcd = packet->rcd; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + u8 l4; + u8 grh_len; + + packet->hdr = (struct hfi1_16b_header *) + hfi1_get_16B_header(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; + + l4 = hfi1_16B_get_l4(packet->hdr); + if (l4 == OPA_16B_L4_IB_LOCAL) { + grh_len = 0; + packet->ohdr = packet->ebuf; + packet->grh = NULL; + } else if (l4 == OPA_16B_L4_IB_GLOBAL) { + u32 vtf; + + grh_len = sizeof(struct ib_grh); + packet->ohdr = packet->ebuf + grh_len; + packet->grh = packet->ebuf; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->hlen = hdr_len_by_opcode[packet->opcode] + 8 + grh_len; + packet->payload = packet->ebuf + packet->hlen - (4 * sizeof(u32)); + packet->slid = hfi1_16B_get_slid(packet->hdr); + packet->dlid = hfi1_16B_get_dlid(packet->hdr); + if (unlikely(hfi1_is_16B_mcast(packet->dlid))) + packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - + opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), + 16B); + packet->sc = hfi1_16B_get_sc(packet->hdr); + packet->sl = ibp->sc_to_sl[packet->sc]; + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + packet->extra_byte = SIZE_OF_LT; + packet->fecn = hfi1_16B_get_fecn(packet->hdr); + packet->becn = hfi1_16B_get_becn(packet->hdr); + + return 0; +drop: + hfi1_cdbg(PKT, "%s: packet dropped\n", __func__); + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1464,8 +1548,8 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) if (packet->rcd->is_vnic) return true; - if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) && - (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR)) + if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) && + (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR)) return true; return false; @@ -1475,25 +1559,38 @@ int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (unlikely(rhf_err_flags(packet->rhf))) { - handle_eflags(packet); - } else if (hfi1_is_vnic_packet(packet)) { + if (hfi1_is_vnic_packet(packet)) { hfi1_vnic_bypass_rcv(packet); return RHF_RCV_CONTINUE; } - dd_dev_err(dd, "Unsupported bypass packet. Dropping\n"); - incr_cntr64(&dd->sw_rcv_bypass_packet_errors); - if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { - u64 *flits = packet->ebuf; + if (hfi1_setup_bypass_packet(packet)) + return RHF_RCV_CONTINUE; + + if (unlikely(rhf_err_flags(packet->rhf))) { + handle_eflags(packet); + return RHF_RCV_CONTINUE; + } - if (flits && !(packet->rhf & RHF_LEN_ERR)) { - dd->err_info_rcvport.packet_flit1 = flits[0]; - dd->err_info_rcvport.packet_flit2 = - packet->tlen > sizeof(flits[0]) ? flits[1] : 0; + if (hfi1_16B_get_l2(packet->hdr) == 0x2) { + hfi1_16B_rcv(packet); + } else { + dd_dev_err(dd, + "Bypass packets other than 16B are not supported in normal operation. Dropping\n"); + incr_cntr64(&dd->sw_rcv_bypass_packet_errors); + if (!(dd->err_info_rcvport.status_and_code & + OPA_EI_STATUS_SMASK)) { + u64 *flits = packet->ebuf; + + if (flits && !(packet->rhf & RHF_LEN_ERR)) { + dd->err_info_rcvport.packet_flit1 = flits[0]; + dd->err_info_rcvport.packet_flit2 = + packet->tlen > sizeof(flits[0]) ? + flits[1] : 0; + } + dd->err_info_rcvport.status_and_code |= + (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } - dd->err_info_rcvport.status_and_code |= - (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } return RHF_RCV_CONTINUE; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fa9160f68bb7..dbbad760cad4 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -325,6 +326,7 @@ struct hfi1_ctxtdata { struct hfi1_packet { void *ebuf; void *hdr; + void *payload; struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; @@ -351,6 +353,83 @@ struct hfi1_packet { bool fecn; }; +/* + * OPA 16B Header + */ +#define OPA_16B_L4_MASK 0xFFull +#define OPA_16B_SC_MASK 0x1F00000ull +#define OPA_16B_SC_SHIFT 20 +#define OPA_16B_LID_MASK 0xFFFFFull +#define OPA_16B_DLID_MASK 0xF000ull +#define OPA_16B_DLID_SHIFT 20 +#define OPA_16B_DLID_HIGH_SHIFT 12 +#define OPA_16B_SLID_MASK 0xF00ull +#define OPA_16B_SLID_SHIFT 20 +#define OPA_16B_SLID_HIGH_SHIFT 8 +#define OPA_16B_BECN_MASK 0x80000000ull +#define OPA_16B_BECN_SHIFT 31 +#define OPA_16B_FECN_MASK 0x10000000ull +#define OPA_16B_FECN_SHIFT 28 +#define OPA_16B_L2_MASK 0x60000000ull +#define OPA_16B_L2_SHIFT 29 + +/* + * OPA 16B L2/L4 Encodings + */ +#define OPA_16B_L2_TYPE 0x02 +#define OPA_16B_L4_IB_LOCAL 0x09 +#define OPA_16B_L4_IB_GLOBAL 0x0A +#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR + +static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) +{ + return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); +} + +static inline u8 hfi1_16B_get_sc(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_SC_MASK) >> OPA_16B_SC_SHIFT); +} + +static inline u32 hfi1_16B_get_dlid(struct hfi1_16b_header *hdr) +{ + return (u32)((hdr->lrh[1] & OPA_16B_LID_MASK) | + (((hdr->lrh[2] & OPA_16B_DLID_MASK) >> + OPA_16B_DLID_HIGH_SHIFT) << OPA_16B_DLID_SHIFT)); +} + +static inline u32 hfi1_16B_get_slid(struct hfi1_16b_header *hdr) +{ + return (u32)((hdr->lrh[0] & OPA_16B_LID_MASK) | + (((hdr->lrh[2] & OPA_16B_SLID_MASK) >> + OPA_16B_SLID_HIGH_SHIFT) << OPA_16B_SLID_SHIFT)); +} + +static inline u8 hfi1_16B_get_becn(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[0] & OPA_16B_BECN_MASK) >> OPA_16B_BECN_SHIFT); +} + +static inline u8 hfi1_16B_get_fecn(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_FECN_MASK) >> OPA_16B_FECN_SHIFT); +} + +static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT); +} + +/* + * BTH + */ +#define OPA_16B_BTH_PAD_MASK 7 +static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) & + OPA_16B_BTH_PAD_MASK); +} + struct rvt_sge_state; /* @@ -2084,11 +2163,55 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); /* * hfi1_check_mcast- Check if the given lid is - * in the IB multicast range. + * in the OPA multicast range. + * + * The LID might either reside in ah.dlid or might be + * in the GRH of the address handle as DGID if extended + * addresses are in use. */ -static inline bool hfi1_check_mcast(u16 lid) +static inline bool hfi1_check_mcast(u32 lid) +{ + return ((lid >= opa_get_mcast_base(OPA_MCAST_NR)) && + (lid != be32_to_cpu(OPA_LID_PERMISSIVE))); +} + +#define opa_get_lid(lid, format) \ + __opa_get_lid(lid, OPA_PORT_PACKET_FORMAT_##format) + +/* Convert a lid to a specific lid space */ +static inline u32 __opa_get_lid(u32 lid, u8 format) +{ + bool is_mcast = hfi1_check_mcast(lid); + + switch (format) { + case OPA_PORT_PACKET_FORMAT_8B: + case OPA_PORT_PACKET_FORMAT_10B: + if (is_mcast) + return (lid - opa_get_mcast_base(OPA_MCAST_NR) + + 0xF0000); + return lid & 0xFFFFF; + case OPA_PORT_PACKET_FORMAT_16B: + if (is_mcast) + return (lid - opa_get_mcast_base(OPA_MCAST_NR) + + 0xF00000); + return lid & 0xFFFFFF; + case OPA_PORT_PACKET_FORMAT_9B: + if (is_mcast) + return (lid - + opa_get_mcast_base(OPA_MCAST_NR) + + be16_to_cpu(IB_MULTICAST_LID_BASE)); + else + return lid & 0xFFFF; + default: + return lid; + } +} + +/* Return true if the given lid is the OPA 16B multicast range */ +static inline bool hfi1_is_16B_mcast(u32 lid) { - return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE))); + return ((lid >= + opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) && + (lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))); } #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index baa67bf0772b..cf74a56e20e5 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,7 +1916,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 76c2451a53d7..366f7b9517fe 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,7 +297,7 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6bf7a1b08491..dcf8c14c6d0e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -667,11 +667,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); - u32 bth1; u8 sl_from_sc; u8 extra_bytes = packet->pad; u8 opcode = packet->opcode; @@ -679,7 +678,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; - bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); pkey = ib_bth_get_pkey(ohdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 97ca42beb023..ebddab1a06f4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -571,7 +571,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, goto drop; mcast = rvt_mcast_find(&ibp->rvp, &packet->grh->dgid, - packet->dlid); + opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { @@ -627,14 +627,17 @@ drop: void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - bool is_mcast = false; - if (unlikely(hfi1_check_mcast(packet->dlid))) - is_mcast = true; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); +} + +void hfi1_16B_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; - trace_input_ibhdr(rcd->dd, packet, - !!(packet->rhf & RHF_DC_INFO_SMASK)); - hfi1_handle_packet(packet, is_mcast); + trace_input_ibhdr(rcd->dd, packet, false); + hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); } /* diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 34267c7ef85a..590aab270f98 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -104,6 +104,17 @@ enum { HFI1_HAS_GRH = (1 << 0), }; +struct hfi1_16b_header { + u32 lrh[4]; + union { + struct { + struct ib_grh grh; + struct ib_other_headers oth; + } l; + struct ib_other_headers oth; + } u; +} __packed; + struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -378,6 +389,8 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *); void hfi1_ib_rcv(struct hfi1_packet *packet); +void hfi1_16B_rcv(struct hfi1_packet *packet); + unsigned hfi1_get_npkeys(struct hfi1_devdata *); int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index eec7c1424991..5ae781514e32 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -54,21 +54,6 @@ #define HFI1_VNIC_MAX_TXQ 16 #define HFI1_VNIC_MAX_PAD 12 -/* L2 header definitions */ -#define HFI1_L2_TYPE_OFFSET 0x7 -#define HFI1_L2_TYPE_SHFT 0x5 -#define HFI1_L2_TYPE_MASK 0x3 - -#define HFI1_GET_L2_TYPE(hdr) \ - ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \ - HFI1_L2_TYPE_MASK) - -/* L4 type definitions */ -#define HFI1_L4_TYPE_OFFSET 8 - -#define HFI1_GET_L4_TYPE(data) \ - (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET)) - /* L4 header definitions */ #define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 2917a238a343..f419cbb05928 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -564,8 +564,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) int l4_type, vesw_id = -1; u8 q_idx; - l4_type = HFI1_GET_L4_TYPE(packet->ebuf); - if (likely(l4_type == OPA_VNIC_L4_ETHR)) { + l4_type = hfi1_16B_get_l4(packet->ebuf); + if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index 39d6890616a6..0c07a70bd7f6 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -54,9 +54,6 @@ #include -/* VNIC uses 16B header format */ -#define OPA_VNIC_L2_TYPE 0x2 - /* 16 header bytes + 2 reserved bytes */ #define OPA_VNIC_L2_HDR_LEN (16 + 2) -- cgit v1.2.3-59-g8ed1b From 30e07416cf48801f127019c1dfece8039f1da8e2 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:04 -0700 Subject: IB/hfi1: Add support to send 16B bypass packets We introduce struct hfi1_opa_header as a union of ib (9B) and 16B headers. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 33 ++++++++++++++++++------------- drivers/infiniband/hw/hfi1/ruc.c | 17 ++++++++-------- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 19 +++++++++--------- drivers/infiniband/hw/hfi1/uc.c | 4 ++-- drivers/infiniband/hw/hfi1/ud.c | 25 ++++++++++++----------- drivers/infiniband/hw/hfi1/verbs.c | 24 +++++++++++++++++++--- drivers/infiniband/hw/hfi1/verbs.h | 22 ++++++++++----------- 7 files changed, 84 insertions(+), 60 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index cf74a56e20e5..e3dbf6d45afe 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -273,9 +273,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && @@ -724,7 +724,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct ib_header hdr; + struct hfi1_opa_header opah; + struct ib_header *hdr; struct ib_other_headers *ohdr; unsigned long flags; @@ -741,16 +742,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; /* Construct the header */ + opah.hdr_type = 0; + hdr = &opah.ibh; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ hwords = 6; if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - hwords += hfi1_make_grh(ibp, &hdr.u.l.grh, + hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, rdma_ah_read_grh(&qp->remote_ah_attr), hwords, 0); - ohdr = &hdr.u.l.oth; + ohdr = &hdr->u.l.oth; lrh0 = HFI1_LRH_GRH; } else { - ohdr = &hdr.u.oth; + ohdr = &hdr->u.oth; lrh0 = HFI1_LRH_BTH; } /* read pkey_index w/o lock (its atomic) */ @@ -768,11 +772,11 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ppd->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); + hdr->lrh[0] = cpu_to_be16(lrh0); + hdr->lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); + hdr->lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); + hdr->lrh[3] = cpu_to_be16(ppd->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); @@ -799,10 +803,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, } trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &hdr, ib_is_sc5(sc5)); + &opah, ib_is_sc5(sc5)); /* write the pbc and data */ - ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, hdr, hwords); return; @@ -985,9 +989,10 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) { struct ib_other_headers *ohdr; + struct ib_header *hdr = &opah->ibh; struct rvt_swqe *wqe; u32 opcode; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 4afa00f921f2..e30c64fcce67 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -668,7 +668,8 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) +#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \ + hdr.ibh.u.oth.bth[2]) / 4) /** * build_ahg - create ahg in s_ahg @@ -743,8 +744,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - rdma_ah_read_grh(&qp->remote_ah_attr), + &ps->s_txreq->phdr.hdr.ibh.u.l.grh, + &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; middle = 0; @@ -773,14 +774,14 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, build_ahg(qp, bth2); else qp->s_flags &= ~RVT_S_AHG_VALID; - ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = + ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.ibh.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - ps->s_txreq->phdr.hdr.lrh[2] = + ps->s_txreq->phdr.hdr.ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.lrh[3] = + ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); + rdma_ah_get_path_bits(&qp->remote_ah_attr)); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 0f2d2da057ec..73240255ffc5 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -213,9 +213,9 @@ DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, + struct hfi1_opa_header *opah, bool sc5), - TP_ARGS(dd, hdr, sc5), + TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) __field(u8, lnh) @@ -238,10 +238,11 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u32, psn) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(hdr)) + hfi1_trace_ib_hdr_len(&opah->ibh)) ), TP_fast_assign( struct ib_other_headers *ohdr; + struct ib_header *hdr = &opah->ibh; DD_DEV_ASSIGN(dd); @@ -294,18 +295,18 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 366f7b9517fe..e0bb766ae36c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -93,9 +93,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* Get the next send request. */ wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index dcf8c14c6d0e..2af993cbe5af 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -357,12 +357,13 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - rdma_ah_read_grh(ah_attr), - qp->s_hdrwords, nwords); + qp->s_hdrwords += + hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.ibh.u.l.grh, + rdma_ah_read_grh(ah_attr), + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -370,7 +371,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } else { /* Header size in 32-bit words. */ lrh0 = HFI1_LRH_BTH; - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -392,21 +393,21 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ps->s_txreq->sde = priv->s_sde; priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; - ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = + ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.ibh.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(ah_attr)); - ps->s_txreq->phdr.hdr.lrh[2] = + ps->s_txreq->phdr.hdr.ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { - ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; } else { lid = ppd->lid; if (lid) { lid |= rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1); - ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid); + ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(lid); } else { - ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; } } if (wqe->wr.send_flags & IB_SEND_SOLICITED) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ebddab1a06f4..4aec805c645d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -506,6 +506,24 @@ again: } } +static u8 get_opcode(struct hfi1_opa_header *hdr) +{ + struct ib_other_headers *ohdr; + + if (hdr->hdr_type) { + if (hfi1_16B_get_l4(&hdr->opah) == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr->opah.u.oth; + else + ohdr = &hdr->opah.u.l.oth; + } else { + if (ib_get_lnh(&hdr->ibh) == HFI1_LRH_BTH) + ohdr = &hdr->ibh.u.oth; + else + ohdr = &hdr->ibh.u.l.oth; + } + return ib_bth_get_opcode(ohdr); +} + /* * Make sure the QP is ready and able to accept the given opcode. */ @@ -686,7 +704,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct ib_header *hdr; + struct hfi1_opa_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -1175,7 +1193,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct ib_header *h = &tx->phdr.hdr; + struct hfi1_opa_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1221,7 +1239,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int ret; u8 lnh; - hdr = &ps->s_txreq->phdr.hdr; + hdr = &ps->s_txreq->phdr.hdr.ibh; /* locate the pkey within the headers */ lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_GRH) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 590aab270f98..20224100cbc5 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -115,6 +115,14 @@ struct hfi1_16b_header { } u; } __packed; +struct hfi1_opa_header { + union { + struct ib_header ibh; /* 9B header */ + struct hfi1_16b_header opah; /* 16B header */ + }; + u8 hdr_type; /* 9B or 16B */ +} __packed; + struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -124,7 +132,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct ib_header hdr; + struct hfi1_opa_header hdr; } __packed; /* @@ -326,7 +334,7 @@ u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); @@ -347,16 +355,6 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct ib_header *h) -{ - u16 lnh = be16_to_cpu(h->lrh[0]) & 3; - - if (lnh == IB_LNH_IBA_LOCAL) - return be32_to_cpu(h->u.oth.bth[0]) >> 24; - else - return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; -} - int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, -- cgit v1.2.3-59-g8ed1b From 5786adf3fde7aa22a68eedac9c59e40da76ffbfb Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:10 -0700 Subject: IB/hfi1: Add support to process 16B header errors Enhance hdr_rcverr() to also handle errors during 16B bypass packet receive. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 58 ++++++++++++++++++++++++++++++------- drivers/infiniband/hw/hfi1/hfi.h | 13 +++++++-- drivers/infiniband/hw/hfi1/ruc.c | 31 +++++++++++++------- drivers/infiniband/hw/hfi1/ud.c | 3 +- drivers/infiniband/hw/hfi1/verbs.c | 39 ++++++++++++++++++++----- drivers/infiniband/hw/hfi1/verbs.h | 1 + 6 files changed, 112 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 5280d82c344e..ae6a90d2a31c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -269,8 +269,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - u8 lnh = ib_get_lnh(rhdr); - bool has_grh = false; + u32 mlid_base; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -278,14 +277,20 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - has_grh = true; - packet->ohdr = &rhdr->u.l.oth; - packet->grh = &rhdr->u.l.grh; - } else { + if (packet->etype == RHF_RCV_TYPE_BYPASS) { goto drop; + } else { + u8 lnh = ib_get_lnh(rhdr); + + mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } } if (packet->rhf & RHF_TID_ERR) { @@ -293,14 +298,13 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (has_grh) { + if (packet->grh) { u32 vtf; struct ib_grh *grh = packet->grh; @@ -1370,6 +1374,35 @@ static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } +static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) +{ + struct hfi1_pportdata *ppd = packet->rcd->ppd; + + /* slid and dlid cannot be 0 */ + if ((!packet->slid) || (!packet->dlid)) + return -EINVAL; + + /* Compare port lid with incoming packet dlid */ + if ((!(hfi1_is_16B_mcast(packet->dlid))) && + (packet->dlid != + opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { + if (packet->dlid != ppd->lid) + return -EINVAL; + } + + /* No multicast packets with SC15 */ + if ((hfi1_is_16B_mcast(packet->dlid)) && (packet->sc == 0xF)) + return -EINVAL; + + /* Packets with permissive DLID always on SC15 */ + if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), + 16B)) && + (packet->sc != 0xF)) + return -EINVAL; + + return 0; +} + static int hfi1_setup_9B_packet(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); @@ -1479,6 +1512,9 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) packet->fecn = hfi1_16B_get_fecn(packet->hdr); packet->becn = hfi1_16B_get_becn(packet->hdr); + if (hfi1_bypass_ingress_pkt_check(packet)) + goto drop; + return 0; drop: hfi1_cdbg(PKT, "%s: packet dropped\n", __func__); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index dbbad760cad4..ee19660ca2fa 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -372,6 +372,10 @@ struct hfi1_packet { #define OPA_16B_FECN_SHIFT 28 #define OPA_16B_L2_MASK 0x60000000ull #define OPA_16B_L2_SHIFT 29 +#define OPA_16B_PKEY_MASK 0xFFFF0000ull +#define OPA_16B_PKEY_SHIFT 16 +#define OPA_16B_LEN_MASK 0x7FF00000ull +#define OPA_16B_LEN_SHIFT 20 /* * OPA 16B L2/L4 Encodings @@ -420,6 +424,11 @@ static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr) return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT); } +static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr) +{ + return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT); +} + /* * BTH */ @@ -1597,9 +1606,9 @@ static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey, * by HW and rcv_pkey_check function should be called instead. */ static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey, - u8 sc5, u8 idx, u16 slid) + u8 sc5, u8 idx, u32 slid, bool force) { - if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) + if (!(force) && !(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) return 0; /* If SC15, pkey[0:14] must be 0x7fff */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index e30c64fcce67..d252f8f2207a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -227,15 +227,23 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) u32 sl = packet->sl; int migrated; u32 bth0, bth1; + u16 pkey; bth0 = be32_to_cpu(packet->ohdr->bth[0]); bth1 = be32_to_cpu(packet->ohdr->bth[1]); - migrated = bth0 & IB_BTH_MIG_REQ; + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + pkey = hfi1_16B_get_pkey(packet->hdr); + migrated = bth1 & OPA_BTH_MIG_REQ; + } else { + pkey = ib_bth_get_pkey(packet->ohdr); + migrated = bth0 & IB_BTH_MIG_REQ; + } if (qp->s_mig_state == IB_MIG_ARMED && migrated) { if (!packet->grh) { - if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & - IB_AH_GRH) + if ((rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH) && + (packet->etype != RHF_RCV_TYPE_BYPASS)) return 1; } else { const struct ib_global_route *grh; @@ -254,10 +262,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) grh->dgid.global.interface_id)) return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, sc5, slid))) { - hfi1_bad_pkey(ibp, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, + slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -270,8 +278,9 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!packet->grh) { - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & - IB_AH_GRH) + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH) && + (packet->etype != RHF_RCV_TYPE_BYPASS)) return 1; } else { const struct ib_global_route *grh; @@ -290,10 +299,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) grh->dgid.global.interface_id)) return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, sc5, slid))) { - hfi1_bad_pkey(ibp, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, + slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 2af993cbe5af..b708376b67da 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -109,7 +109,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, - qp->s_pkey_index, slid))) { + qp->s_pkey_index, + slid, false))) { hfi1_bad_pkey(ibp, pkey, rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4aec805c645d..0b1556fed47e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -568,6 +568,24 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } +static int hfi1_do_pkey_check(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_16b_header *hdr = packet->hdr; + u16 pkey; + + /* Pkey check needed only for bypass packets */ + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return 0; + + /* Perform pkey check */ + pkey = hfi1_16B_get_pkey(hdr); + return ingress_pkey_check(ppd, pkey, packet->sc, + packet->qp->s_pkey_index, + packet->slid, true); +} + static inline void hfi1_handle_packet(struct hfi1_packet *packet, bool is_mcast) { @@ -594,6 +612,8 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; + if (hfi1_do_pkey_check(packet)) + goto drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -613,15 +633,16 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); - if (!packet->qp) { - rcu_read_unlock(); - goto drop; - } + if (!packet->qp) + goto unlock_drop; + + if (hfi1_do_pkey_check(packet)) + goto unlock_drop; + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, - true))) { - rcu_read_unlock(); - goto drop; - } + true))) + goto unlock_drop; + spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -632,6 +653,8 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, rcu_read_unlock(); } return; +unlock_drop: + rcu_read_unlock(); drop: ibp->rvp.n_pkt_drops++; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 20224100cbc5..68577a0c922b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -95,6 +95,7 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define OPA_BTH_MIG_REQ BIT(31) #define RC_OP(x) IB_OPCODE_RC_##x #define UC_OP(x) IB_OPCODE_UC_##x -- cgit v1.2.3-59-g8ed1b From d98bb7f7e6fa29d45008370084d5cabac7ac69ed Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:16 -0700 Subject: IB/hfi1: Determine 9B/16B L2 header type based on Address handle When address handle attributes are initialized, the LIDs are transformed to be in the 32 bit LID space. When constructing the header, hfi1 driver will look at the LID to determine the packet header to be created. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 21 +++++--- drivers/infiniband/core/uverbs_cmd.c | 3 ++ drivers/infiniband/hw/hfi1/hfi.h | 92 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/qp.c | 28 +++++++++++ drivers/infiniband/hw/hfi1/verbs.c | 12 +++++ drivers/infiniband/hw/hfi1/verbs.h | 1 + include/rdma/ib_verbs.h | 15 ++++++ include/rdma/opa_addr.h | 4 +- 8 files changed, 168 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index da29e2863c84..0179b21bad34 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "sa.h" #include "core_priv.h" @@ -1239,6 +1240,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); + + if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && + (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(ah_attr, true); + rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & get_src_path_mask(device, port_num)); @@ -2288,12 +2294,15 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); if (port_attr.grh_required) { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { + rdma_ah_set_make_grd(&ah_attr, true); + } else { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); + } } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7ea5a3bb5a04..dc7d773a96ec 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2009,6 +2009,7 @@ static int modify_qp(struct ib_uverbs_file *file, rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate); rdma_ah_set_port_num(&attr->ah_attr, cmd->base.dest.port_num); + rdma_ah_set_make_grd(&attr->ah_attr, false); attr->alt_ah_attr.type = rdma_ah_find_type(qp->device, cmd->base.dest.port_num); @@ -2032,6 +2033,7 @@ static int modify_qp(struct ib_uverbs_file *file, cmd->base.alt_dest.static_rate); rdma_ah_set_port_num(&attr->alt_ah_attr, cmd->base.alt_dest.port_num); + rdma_ah_set_make_grd(&attr->alt_ah_attr, false); ret = ib_modify_qp_with_udata(qp, attr, modify_qp_mask(qp->qp_type, @@ -2584,6 +2586,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num); + rdma_ah_set_make_grd(&attr, false); rdma_ah_set_dlid(&attr, cmd.attr.dlid); rdma_ah_set_sl(&attr, cmd.attr.sl); rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee19660ca2fa..cec9590870ba 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -70,6 +70,7 @@ #include #include #include +#include #include "chip_registers.h" #include "common.h" @@ -353,6 +354,10 @@ struct hfi1_packet { bool fecn; }; +/* Packet types */ +#define HFI1_PKT_TYPE_9B 0 +#define HFI1_PKT_TYPE_16B 1 + /* * OPA 16B Header */ @@ -2170,6 +2175,31 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) +static inline void hfi1_update_ah_attr(struct ib_device *ibdev, + struct rdma_ah_attr *attr) +{ + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + u32 dlid = rdma_ah_get_dlid(attr); + + /* + * Kernel clients may not have setup GRH information + * Set that here. + */ + ibp = to_iport(ibdev, rdma_ah_get_port_num(attr)); + ppd = ppd_from_ibp(ibp); + if ((((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))) && + (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) && + (!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))) || + (rdma_ah_get_make_grd(attr))) { + rdma_ah_set_ah_flags(attr, IB_AH_GRH); + rdma_ah_set_interface_id(attr, OPA_MAKE_ID(dlid)); + rdma_ah_set_subnet_prefix(attr, ibp->rvp.gid_prefix); + } +} + /* * hfi1_check_mcast- Check if the given lid is * in the OPA multicast range. @@ -2223,4 +2253,66 @@ static inline bool hfi1_is_16B_mcast(u32 lid) opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) && (lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))); } + +static inline void hfi1_make_opa_lid(struct rdma_ah_attr *attr) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + u32 dlid = rdma_ah_get_dlid(attr); + + /* Modify ah_attr.dlid to be in the 32 bit LID space. + * This is how the address will be laid out: + * Assuming MCAST_NR to be 4, + * 32 bit permissive LID = 0xFFFFFFFF + * Multicast LID range = 0xFFFFFFFE to 0xF0000000 + * Unicast LID range = 0xEFFFFFFF to 1 + * Invalid LID = 0 + */ + if (ib_is_opa_gid(&grh->dgid)) + dlid = opa_get_lid_from_gid(&grh->dgid); + else if ((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) && + (dlid != be32_to_cpu(OPA_LID_PERMISSIVE))) + dlid = dlid - be16_to_cpu(IB_MULTICAST_LID_BASE) + + opa_get_mcast_base(OPA_MCAST_NR); + else if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) + dlid = be32_to_cpu(OPA_LID_PERMISSIVE); + + rdma_ah_set_dlid(attr, dlid); +} + +static inline u8 hfi1_get_packet_type(u32 lid) +{ + /* 9B if lid > 0xF0000000 */ + if (lid >= opa_get_mcast_base(OPA_MCAST_NR)) + return HFI1_PKT_TYPE_9B; + + /* 16B if lid > 0xC000 */ + if (lid >= opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 9B)) + return HFI1_PKT_TYPE_16B; + + return HFI1_PKT_TYPE_9B; +} + +static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr) +{ + /* + * If there was an incoming 16B packet with permissive + * LIDs, OPA GIDs would have been programmed when those + * packets were received. A 16B packet will have to + * be sent in response to that packet. Return a 16B + * header type if that's the case. + */ + if (rdma_ah_get_dlid(attr) == be32_to_cpu(OPA_LID_PERMISSIVE)) + return (ib_is_opa_gid(&rdma_ah_read_grh(attr)->dgid)) ? + HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B; + + /* + * Return a 16B header type if either the the destination + * or source lid is extended. + */ + if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B) + return HFI1_PKT_TYPE_16B; + + return hfi1_get_packet_type(lid); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index b801d8469956..0fca6dfe8d9f 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -232,6 +232,31 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, return 0; } +/* + * qp_set_16b - Set the hdr_type based on whether the slid or the + * dlid in the connection is extended. Only applicable for RC and UC + * QPs. UD QPs determine this on the fly from the ah in the wqe + */ +static inline void qp_set_16b(struct rvt_qp *qp) +{ + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + struct hfi1_qp_priv *priv = qp->priv; + + /* Update ah_attr to account for extended LIDs */ + hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr); + + /* Create 32 bit LIDs */ + hfi1_make_opa_lid(&qp->remote_ah_attr); + + if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) + return; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr); +} + void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -242,6 +267,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); + qp_set_16b(qp); } if (attr_mask & IB_QP_PATH_MIG_STATE && @@ -251,6 +277,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); + qp_set_16b(qp); } } @@ -751,6 +778,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + qp_set_16b(qp); ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 0b1556fed47e..18b27276f202 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1421,6 +1421,15 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_2048); + /* + * sm_lid of 0xFFFF needs special handling so that it can + * be differentiated from a permissve LID of 0xFFFF. + * We set the grh_required flag here so the SA can program + * the DGID in the address handle appropriately + */ + if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) + props->grh_required = true; + return 0; } @@ -1528,6 +1537,7 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; u8 sc5; + struct rdma_ah_attr *attr = &ah->attr; /* * Do not trust reading anything from rvt_ah at this point as it is not @@ -1537,6 +1547,8 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)]; + hfi1_update_ah_attr(ibdev, attr); + hfi1_make_opa_lid(attr); dd = dd_from_ppd(ppd); ah->vl = sc_to_vlt(dd, sc5); if (ah->vl < num_vls || ah->vl == 15) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 68577a0c922b..d3dd0c01b8f6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -147,6 +147,7 @@ struct hfi1_qp_priv { u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; + u8 hdr_type; /* 9B or 16B */ }; /* diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 70a183179224..8f263930c56f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -864,6 +864,7 @@ struct roce_ah_attr { struct opa_ah_attr { u32 dlid; u8 src_path_bits; + bool make_grd; }; struct rdma_ah_attr { @@ -3625,6 +3626,20 @@ static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr) return 0; } +static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr, + bool make_grd) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + attr->opa.make_grd = make_grd; +} + +static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + return attr->opa.make_grd; + return false; +} + static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) { attr->port_num = port_num; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 8d3ad4ecbea1..9ae126fb8648 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -71,7 +71,7 @@ * * @gid: The Global identifier */ -static inline bool ib_is_opa_gid(union ib_gid *gid) +static inline bool ib_is_opa_gid(const union ib_gid *gid) { return ((be64_to_cpu(gid->global.interface_id) >> 40) == OPA_SPECIAL_OUI); @@ -84,7 +84,7 @@ static inline bool ib_is_opa_gid(union ib_gid *gid) * * @gid: The Global identifier */ -static inline u32 opa_get_lid_from_gid(union ib_gid *gid) +static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } -- cgit v1.2.3-59-g8ed1b From 88733e3b845024cb2324a68469a4a25fdd9c0a6c Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:23 -0700 Subject: IB/hfi1: Add 16B UD support Add 16B bypass packet support for UD traffic types. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 35 +-- drivers/infiniband/hw/hfi1/hfi.h | 117 +++++++++- drivers/infiniband/hw/hfi1/mad.c | 8 +- drivers/infiniband/hw/hfi1/ruc.c | 4 +- drivers/infiniband/hw/hfi1/ud.c | 421 +++++++++++++++++++++++++++--------- drivers/infiniband/hw/hfi1/verbs.h | 2 +- include/rdma/opa_addr.h | 1 + 7 files changed, 457 insertions(+), 131 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index ae6a90d2a31c..fc7085d6cf3f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -437,23 +437,33 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; - u16 rlid, dlid = ib_get_dlid(hdr); - u8 sc, svc_type; + u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); + u8 hdr_type, sc, svc_type; bool is_mcast = false; + if (pkt->etype == RHF_RCV_TYPE_BYPASS) { + is_mcast = hfi1_is_16B_mcast(dlid); + pkey = hfi1_16B_get_pkey(pkt->hdr); + sc = hfi1_16B_get_sc(pkt->hdr); + hdr_type = HFI1_PKT_TYPE_16B; + } else { + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + pkey = ib_bth_get_pkey(ohdr); + sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); + hdr_type = HFI1_PKT_TYPE_9B; + } + switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - rlid = ib_get_slid(hdr); - rqpn = ib_get_sqpn(ohdr); + rlid = ib_get_slid(pkt->hdr); + rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); @@ -469,14 +479,11 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hfi1_9B_get_sc5(hdr, pkt->rhf); - bth1 = be32_to_cpu(ohdr->bth[1]); - if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = ib_bth_get_pkey(ohdr); - - return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - } + /* Call appropriate CNP handler */ + if (do_cnp && (bth1 & IB_FECN_SMASK)) + hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, + dlid, rlid, sc, grh); if (!is_mcast && (bth1 & IB_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cec9590870ba..7e21192da8e1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -831,6 +831,10 @@ struct hfi1_pportdata { typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); typedef void (*opcode_handler)(struct hfi1_packet *packet); +typedef void (*hfi1_make_req)(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); + /* return values for the RHF receive functions */ #define RHF_RCV_CONTINUE 0 /* keep going */ @@ -1373,6 +1377,13 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; +void hfi1_make_ud_req_9B(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); + +void hfi1_make_ud_req_16B(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); /* receive packet handler dispositions */ #define RCV_PKT_OK 0x0 /* keep going */ @@ -1507,6 +1518,18 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); +void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh); +typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &return_cnp, + [HFI1_PKT_TYPE_16B] = &return_cnp_16B +}; #define PKEY_CHECK_INVALID -1 int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, u8 sc5, int8_t s_pkey_index); @@ -1747,12 +1770,22 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct ib_other_headers *ohdr = pkt->ohdr; - u32 bth1; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { + u32 bth1; + bool becn = false; + bool fecn = false; + + if (pkt->etype == RHF_RCV_TYPE_BYPASS) { + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); + } else { + bth1 = be32_to_cpu(ohdr->bth[1]); + fecn = bth1 & IB_FECN_SMASK; + becn = bth1 & IB_BECN_SMASK; + } + if (unlikely(fecn || becn)) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return !!(bth1 & IB_FECN_SMASK); + return fecn; } return false; } @@ -2315,4 +2348,80 @@ static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr) return hfi1_get_packet_type(lid); } + +static inline void hfi1_make_ext_grh(struct hfi1_packet *packet, + struct ib_grh *grh, u32 slid, + u32 dlid) +{ + struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + if (!ibp) + return; + + grh->hop_limit = 1; + grh->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; + if (slid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B)) + grh->sgid.global.interface_id = + OPA_MAKE_ID(be32_to_cpu(OPA_LID_PERMISSIVE)); + else + grh->sgid.global.interface_id = OPA_MAKE_ID(slid); + + /* + * Upper layers (like mad) may compare the dgid in the + * wc that is obtained here with the sgid_index in + * the wr. Since sgid_index in wr is always 0 for + * extended lids, set the dgid here to the default + * IB gid. + */ + grh->dgid.global.subnet_prefix = ibp->rvp.gid_prefix; + grh->dgid.global.interface_id = + cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]); +} + +static inline int hfi1_get_16b_padding(u32 hdr_size, u32 payload) +{ + return -(hdr_size + payload + (SIZE_OF_CRC << 2) + + SIZE_OF_LT) & 0x7; +} + +static inline void hfi1_make_ib_hdr(struct ib_header *hdr, + u16 lrh0, u16 len, + u16 dlid, u16 slid) +{ + hdr->lrh[0] = cpu_to_be16(lrh0); + hdr->lrh[1] = cpu_to_be16(dlid); + hdr->lrh[2] = cpu_to_be16(len); + hdr->lrh[3] = cpu_to_be16(slid); +} + +static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, + u32 slid, u32 dlid, + u16 len, u16 pkey, + u8 becn, u8 fecn, u8 l4, + u8 sc) +{ + u32 lrh0 = 0; + u32 lrh1 = 0x40000000; + u32 lrh2 = 0; + u32 lrh3 = 0; + + lrh0 = (lrh0 & ~OPA_16B_BECN_MASK) | (becn << OPA_16B_BECN_SHIFT); + lrh0 = (lrh0 & ~OPA_16B_LEN_MASK) | (len << OPA_16B_LEN_SHIFT); + lrh0 = (lrh0 & ~OPA_16B_LID_MASK) | (slid & OPA_16B_LID_MASK); + lrh1 = (lrh1 & ~OPA_16B_FECN_MASK) | (fecn << OPA_16B_FECN_SHIFT); + lrh1 = (lrh1 & ~OPA_16B_SC_MASK) | (sc << OPA_16B_SC_SHIFT); + lrh1 = (lrh1 & ~OPA_16B_LID_MASK) | (dlid & OPA_16B_LID_MASK); + lrh2 = (lrh2 & ~OPA_16B_SLID_MASK) | + ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) | + ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4; + + hdr->lrh[0] = lrh0; + hdr->lrh[1] = lrh1; + hdr->lrh[2] = lrh2; + hdr->lrh[3] = lrh3; +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 21fadb4b510c..1509bc6b76d8 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -373,12 +373,10 @@ static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid) * Send a bad P_Key trap (ch. 14.3.8). */ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) + u32 qp1, u32 qp2, u32 lid1, u32 lid2) { struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - u32 _lid1 = lid1; - u32 _lid2 = lid2; ibp->rvp.n_pkt_drops++; ibp->rvp.pkey_violations++; @@ -389,8 +387,8 @@ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, return; /* Send violation trap */ - trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1); - trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2); + trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1); + trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2); trap->data.ntc_257_258.key = cpu_to_be32(key); trap->data.ntc_257_258.sl = sl << 3; trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index d252f8f2207a..6839bfae933c 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -649,7 +649,7 @@ done: * @ibp: a pointer to the IB port * @hdr: a pointer to the GRH header being constructed * @grh: the global route address to send to - * @hwords: the number of 32 bit words of header being sent + * @hwords: size of header after grh being sent in dwords * @nwords: the number of 32 bit words of data being sent * * Return the size of the header in 32 bit words. @@ -661,7 +661,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | (grh->flow_label << IB_GRH_FLOW_SHIFT)); - hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); + hdr->paylen = cpu_to_be16((hwords + nwords) << 2); /* next_hdr is defined by C8-7 in ch. 8.4.1 */ hdr->next_hdr = IB_GRH_NEXT_HDR; hdr->hop_limit = grh->hop_limit; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index b708376b67da..2ba74fdd6f15 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -53,6 +53,12 @@ #include "verbs_txreq.h" #include "qp.h" +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_req hfi1_make_ud_req_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_ud_req_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_ud_req_16B +}; + /** * ud_loopback - handle send on loopback QPs * @sqp: the sending QP @@ -67,6 +73,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) { struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct hfi1_pportdata *ppd; + struct hfi1_qp_priv *priv = sqp->priv; struct rvt_qp *qp; struct rdma_ah_attr *ah_attr; unsigned long flags; @@ -102,7 +109,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num > 1) { u16 pkey; - u16 slid; + u32 slid; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index); @@ -176,9 +183,33 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { struct ib_grh grh; - const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); + struct ib_global_route grd = *(rdma_ah_read_grh(ah_attr)); + + /* + * For loopback packets with extended LIDs, the + * sgid_index in the GRH is 0 and the dgid is + * OPA GID of the sender. While creating a response + * to the loopback packet, IB core creates the new + * sgid_index from the DGID and that will be the + * OPA_GID_INDEX. The new dgid is from the sgid + * index and that will be in the IB GID format. + * + * We now have a case where the sent packet had a + * different sgid_index and dgid compared to the + * one that was received in response. + * + * Fix this inconsistency. + */ + if (priv->hdr_type == HFI1_PKT_TYPE_16B) { + if (grd.sgid_index == 0) + grd.sgid_index = OPA_GID_INDEX; - hfi1_make_grh(ibp, &grh, grd, 0, 0); + if (ib_is_opa_gid(&grd.dgid)) + grd.dgid.global.interface_id = + cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]); + } + + hfi1_make_grh(ibp, &grh, &grd, 0, 0); hfi1_copy_sge(&qp->r_sge, &grh, sizeof(grh), true, false); wc.wc_flags |= IB_WC_GRH; @@ -235,7 +266,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.pkey_index = 0; } wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); + ((1 << ppd->lmc) - 1)); /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); @@ -252,6 +283,183 @@ drop: rcu_read_unlock(); } +static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe, + struct ib_other_headers *ohdr, + u16 *pkey, u32 extra_bytes, bool bypass) +{ + u32 bth0; + struct hfi1_ibport *ibp; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; + } else { + bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + } + + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth0 |= extra_bytes << 20; + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) + *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + else + *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + if (!bypass) + bth0 |= *pkey; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); + ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); + /* + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); + ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); +} + +void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe) +{ + u32 nwords, extra_bytes; + u16 len, slid, dlid, pkey; + u16 lrh0 = 0; + u8 sc5; + struct hfi1_qp_priv *priv = qp->priv; + struct ib_other_headers *ohdr; + struct rdma_ah_attr *ah_attr; + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + struct ib_grh *grh; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + + extra_bytes = -wqe->length & 3; + nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; + /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ + qp->s_hdrwords = 7; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + qp->s_hdrwords++; + + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { + grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, + rdma_ah_read_grh(ah_attr), + qp->s_hdrwords - 2, nwords); + lrh0 = HFI1_LRH_GRH; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + } else { + lrh0 = HFI1_LRH_BTH; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } + + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; + if (qp->ibqp.qp_type == IB_QPT_SMI) { + lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ + priv->s_sc = 0xf; + } else { + lrh0 |= (sc5 & 0xf) << 12; + priv->s_sc = sc5; + } + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); + if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } else { + u16 lid = (u16)ppd->lid; + + if (lid) { + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); + slid = lid; + } else { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } + } + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false); + len = qp->s_hdrwords + nwords; + + /* Setup the packet */ + ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B; + hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, + lrh0, len, dlid, slid); +} + +void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct ib_other_headers *ohdr; + struct rdma_ah_attr *ah_attr; + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + u32 dlid, slid, nwords, extra_bytes; + u16 len, pkey; + u8 l4, sc5; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ + qp->s_hdrwords = 9; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + qp->s_hdrwords++; + + /* SW provides space for CRC and LT for bypass packets. */ + extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), + wqe->length); + nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; + + if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) { + struct ib_grh *grh; + struct ib_global_route *grd = rdma_ah_retrieve_grh(ah_attr); + /* + * Ensure OPA GIDs are transformed to IB gids + * before creating the GRH. + */ + if (grd->sgid_index == OPA_GID_INDEX) { + dd_dev_warn(ppd->dd, "Bad sgid_index. sgid_index: %d\n", + grd->sgid_index); + grd->sgid_index = 0; + } + grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, + qp->s_hdrwords - 4, nwords); + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + } else { + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + l4 = OPA_16B_L4_IB_LOCAL; + } + + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + if (qp->ibqp.qp_type == IB_QPT_SMI) + priv->s_sc = 0xf; + else + priv->s_sc = sc5; + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 16B); + if (!ppd->lid) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); + else + slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1)); + + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + /* Convert dwords to flits */ + len = (qp->s_hdrwords + nwords) >> 1; + + /* Setup the packet */ + ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B; + hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, + slid, dlid, len, pkey, 0, 0, l4, priv->s_sc); +} + /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP @@ -263,18 +471,12 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct ib_other_headers *ohdr; struct rdma_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; - u32 nwords; - u32 extra_bytes; - u32 bth0; - u16 lrh0; - u16 lid; int next_cur; - u8 sc5; + u32 lid; ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) @@ -311,13 +513,14 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) || - rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { + priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr); + if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) || + (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) { lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1); if (unlikely(!loopback && - (lid == ppd->lid || - (lid == be16_to_cpu(IB_LID_PERMISSIVE) && - qp->ibqp.qp_type == IB_QPT_GSI)))) { + ((lid == ppd->lid) || + ((lid == be32_to_cpu(OPA_LID_PERMISSIVE)) && + (qp->ibqp.qp_type == IB_QPT_GSI))))) { unsigned long tflags = ps->flags; /* * If DMAs are in progress, we can't generate @@ -341,11 +544,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_cur = next_cur; - extra_bytes = -wqe->length & 3; - nwords = (wqe->length + extra_bytes) >> 2; - - /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ - qp->s_hdrwords = 7; ps->s_txreq->s_cur_size = wqe->length; ps->s_txreq->ss = &qp->s_sge; qp->s_srate = rdma_ah_get_static_rate(ah_attr); @@ -356,78 +554,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.total_len = wqe->length; - if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { - /* Header size in 32-bit words. */ - qp->s_hdrwords += - hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.ibh.u.l.grh, - rdma_ah_read_grh(ah_attr), - qp->s_hdrwords, nwords); - lrh0 = HFI1_LRH_GRH; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; - /* - * Don't worry about sending to locally attached multicast - * QPs. It is unspecified by the spec. what happens. - */ - } else { - /* Header size in 32-bit words. */ - lrh0 = HFI1_LRH_BTH; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - } - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; - bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; - } else { - bth0 = IB_OPCODE_UD_SEND_ONLY << 24; - } - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; - lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; - if (qp->ibqp.qp_type == IB_QPT_SMI) { - lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ - priv->s_sc = 0xf; - } else { - lrh0 |= (sc5 & 0xf) << 12; - priv->s_sc = sc5; - } + /* Make the appropriate header */ + hfi1_make_ud_req_tbl[priv->hdr_type](qp, ps, qp->s_wqe); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); ps->s_txreq->sde = priv->s_sde; priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; - ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.ibh.lrh[1] = - cpu_to_be16(rdma_ah_get_dlid(ah_attr)); - ps->s_txreq->phdr.hdr.ibh.lrh[2] = - cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { - ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; - } else { - lid = ppd->lid; - if (lid) { - lid |= rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1); - ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(lid); - } else { - ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; - } - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= IB_BTH_SOLICITED; - bth0 |= extra_bytes << 20; - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); - else - bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); - /* - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? - qp->qkey : wqe->ud_wr.remote_qkey); - ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ priv->s_ahg->ahgcount = 0; priv->s_ahg->ahgidx = 0; @@ -497,6 +629,64 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) return -1; } +void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh) +{ + u64 pbc, pbc_flags = 0; + u32 bth0, plen, vl, hwords = 7; + u16 len; + u8 l4; + struct hfi1_16b_header hdr; + struct ib_other_headers *ohdr; + struct pio_buf *pbuf; + struct send_context *ctxt = qp_to_send_context(qp, sc5); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 nwords; + + /* Populate length */ + nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; + if (old_grh) { + struct ib_grh *grh = &hdr.u.l.grh; + + grh->version_tclass_flow = old_grh->version_tclass_flow; + grh->paylen = cpu_to_be16((hwords - 4 + nwords) << 2); + grh->hop_limit = 0xff; + grh->sgid = old_grh->dgid; + grh->dgid = old_grh->sgid; + ohdr = &hdr.u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + hwords += sizeof(struct ib_grh) / sizeof(u32); + } else { + ohdr = &hdr.u.oth; + l4 = OPA_16B_L4_IB_LOCAL; + } + + /* BIT 16 to 19 is TVER. Bit 20 to 22 is pad cnt */ + bth0 = (IB_OPCODE_CNP << 24) | (1 << 16) | + (hfi1_get_16b_padding(hwords << 2, 0) << 20); + ohdr->bth[0] = cpu_to_be32(bth0); + + ohdr->bth[1] = cpu_to_be32(remote_qpn); + ohdr->bth[2] = 0; /* PSN 0 */ + + /* Convert dwords to flits */ + len = (hwords + nwords) >> 1; + hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + + plen = 2 /* PBC */ + hwords + nwords; + pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + vl = sc_to_vlt(ppd->dd, sc5); + pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + if (ctxt) { + pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); + if (pbuf) + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, + &hdr, hwords); + } +} + void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) @@ -535,11 +725,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(dlid); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(slid); - + hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); @@ -672,18 +858,33 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); + u8 sc5 = packet->sc; u8 sl_from_sc; - u8 extra_bytes = packet->pad; u8 opcode = packet->opcode; u8 sl = packet->sl; u32 dlid = packet->dlid; u32 slid = packet->slid; + u8 extra_bytes; + bool dlid_is_permissive; + bool slid_is_permissive; + extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - pkey = ib_bth_get_pkey(ohdr); - extra_bytes += (SIZE_OF_CRC << 2); + + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + u32 permissive_lid = + opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); + + pkey = hfi1_16B_get_pkey(packet->hdr); + dlid_is_permissive = (dlid == permissive_lid); + slid_is_permissive = (slid == permissive_lid); + } else { + hdr = packet->hdr; + pkey = ib_bth_get_pkey(ohdr); + dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); + slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); + } sl_from_sc = ibp->sc_to_sl[sc5]; process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); @@ -701,8 +902,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). */ if (qp->ibqp.qp_num) { - if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE)) + if (unlikely(dlid_is_permissive || slid_is_permissive)) goto drop; if (qp->ibqp.qp_num > 1) { if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { @@ -740,8 +940,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (tlen > 2048) goto drop; - if ((hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE) && + if ((dlid_is_permissive || slid_is_permissive) && smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) goto drop; @@ -794,7 +993,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) goto drop; } if (packet->grh) { - hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, + hfi1_copy_sge(&qp->r_sge, packet->grh, + sizeof(struct ib_grh), true, false); + wc.wc_flags |= IB_WC_GRH; + } else if (packet->etype == RHF_RCV_TYPE_BYPASS) { + struct ib_grh grh; + /* + * Assuming we only created 16B on the send side + * if we want to use large LIDs, since GRH was stripped + * out when creating 16B, add back the GRH here. + */ + hfi1_make_ext_grh(packet, &grh, slid, dlid); + hfi1_copy_sge(&qp->r_sge, &grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; } else { @@ -827,14 +1037,15 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else { wc.pkey_index = 0; } - + if (slid_is_permissive) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); wc.slid = slid; wc.sl = sl_from_sc; /* * Save the LMC lower bits if the destination LID is a unicast LID. */ - wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 : + wc.dlid_path_bits = hfi1_check_mcast(dlid) ? 0 : dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d3dd0c01b8f6..4928ee4f92c1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -259,7 +259,7 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) * This must be called with s_lock held. */ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); + u32 qp1, u32 qp2, u32 lid1, u32 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9ae126fb8648..e6e90f18e6d5 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -54,6 +54,7 @@ #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +#define OPA_GID_INDEX 0x1 /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range * Example: For 24 bit LID space, -- cgit v1.2.3-59-g8ed1b From 863cf89d472fe7a61305b06de84b9ed2dea02611 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:29 -0700 Subject: IB/hfi1: Add 16B trace support Add trace support to 16B bypass packets during send and receive. Sample input header trace: -0 [000] d.h. 271742.509477: input_ibhdr: [0000:05:00.0] (16B) len:24 sc:0 dlid:0xf0000b slid:0x10002 age:0 becn:0 fecn:0 l4:10 rc:0 sc:0 pkey:0x8001 entropy:0x0000 op:0x65,UD_SEND_ONLY_WITH_IMMEDIATE se:0 m:1 pad:3 tver:0 qpn:0xffffff a:0 psn:0x00000001 hlen:248 deth qkey 0x01234567 sqpn 0x000004 Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 25 +++ drivers/infiniband/hw/hfi1/trace.c | 153 ++++++++++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 351 ++++++++++++++++++++---------- 3 files changed, 406 insertions(+), 123 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7e21192da8e1..b07f42cfa5bf 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -381,6 +381,11 @@ struct hfi1_packet { #define OPA_16B_PKEY_SHIFT 16 #define OPA_16B_LEN_MASK 0x7FF00000ull #define OPA_16B_LEN_SHIFT 20 +#define OPA_16B_RC_MASK 0xE000000ull +#define OPA_16B_RC_SHIFT 25 +#define OPA_16B_AGE_MASK 0xFF0000ull +#define OPA_16B_AGE_SHIFT 16 +#define OPA_16B_ENTROPY_MASK 0xFFFFull /* * OPA 16B L2/L4 Encodings @@ -434,6 +439,26 @@ static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr) return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT); } +static inline u8 hfi1_16B_get_rc(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_RC_MASK) >> OPA_16B_RC_SHIFT); +} + +static inline u8 hfi1_16B_get_age(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[3] & OPA_16B_AGE_MASK) >> OPA_16B_AGE_SHIFT); +} + +static inline u16 hfi1_16B_get_len(struct hfi1_16b_header *hdr) +{ + return (u16)((hdr->lrh[0] & OPA_16B_LEN_MASK) >> OPA_16B_LEN_SHIFT); +} + +static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr) +{ + return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK); +} + /* * BTH */ diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index b80b74d0c252..9938bb983ce6 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) +static u8 __get_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,9 +61,60 @@ u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } +static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr) +{ + struct ib_other_headers *ohdr; + u8 opcode; + + if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + opcode = ib_bth_get_opcode(ohdr); + return hdr_len_by_opcode[opcode] == 0 ? + 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8); +} + +u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet) +{ + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return __get_ib_hdr_len(packet->hdr); + else + return __get_16b_hdr_len(packet->hdr); +} + +u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opa_hdr) +{ + if (!opa_hdr->hdr_type) + return __get_ib_hdr_len(&opa_hdr->ibh); + else + return __get_16b_hdr_len(&opa_hdr->opah); +} + const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) { - return "IB"; + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return "IB"; + + switch (hfi1_16B_get_l2(packet->hdr)) { + case 0: + return "0"; + case 1: + return "1"; + case 2: + return "16B"; + case 3: + return "9B"; + } + return ""; +} + +const char *hfi1_trace_get_packet_type_str(u8 l4) +{ + if (l4) + return "16B"; + else + return "9B"; } #define IMM_PRN "imm:%d" @@ -89,10 +140,10 @@ static const char *parse_syndrome(u8 syndrome) return ""; } -void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, - u8 *se, u8 *pad, u8 *opcode, u8 *tver, - u16 *pkey, u32 *psn, u32 *qpn) +void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) { *ack = ib_bth_get_ackreq(ohdr); *becn = ib_bth_get_becn(ohdr); @@ -107,8 +158,22 @@ void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, *qpn = ib_bth_get_qpn(ohdr); } +void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *mig, u8 *opcode, + u8 *pad, u8 *se, u8 *tver, + u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *pad = ib_bth_get_pad(ohdr); + *se = ib_bth_get_se(ohdr); + *tver = ib_bth_get_tver(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, - struct ib_other_headers **ohdr, u8 *lnh, u8 *lver, u8 *sl, u8 *sc, u16 *len, u32 *dlid, u32 *slid) { @@ -119,11 +184,79 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, *len = ib_get_len(hdr); *dlid = ib_get_dlid(hdr); *slid = ib_get_slid(hdr); +} + +void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, + u8 *age, u8 *becn, u8 *fecn, + u8 *l4, u8 *rc, u8 *sc, + u16 *entropy, u16 *len, u16 *pkey, + u32 *dlid, u32 *slid) +{ + *age = hfi1_16B_get_age(hdr); + *becn = hfi1_16B_get_becn(hdr); + *fecn = hfi1_16B_get_fecn(hdr); + *l4 = hfi1_16B_get_l4(hdr); + *rc = hfi1_16B_get_rc(hdr); + *sc = hfi1_16B_get_sc(hdr); + *entropy = hfi1_16B_get_entropy(hdr); + *len = hfi1_16B_get_len(hdr); + *pkey = hfi1_16B_get_pkey(hdr); + *dlid = hfi1_16B_get_dlid(hdr); + *slid = hfi1_16B_get_slid(hdr); +} + +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x " +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d" +#define LRH_16B_PRN "age:%d becn:%d fecn:%d l4:%d " \ + "rc:%d sc:%d pkey:0x%.4x entropy:0x%.4x" +const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, + u8 age, u8 becn, u8 fecn, u8 l4, + u8 lnh, const char *lnh_name, u8 lver, + u8 rc, u8 sc, u8 sl, u16 entropy, + u16 len, u16 pkey, u32 dlid, u32 slid) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, LRH_PRN, len, sc, dlid, slid); + + if (bypass) + trace_seq_printf(p, LRH_16B_PRN, + age, becn, fecn, l4, rc, sc, pkey, entropy); - if (*lnh == HFI1_LRH_BTH) - *ohdr = &hdr->u.oth; else - *ohdr = &hdr->u.l.oth; + trace_seq_printf(p, LRH_9B_PRN, + lnh, lnh_name, lver, sl); + trace_seq_putc(p, 0); + + return ret; +} + +#define BTH_9B_PRN \ + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define BTH_16B_PRN \ + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ + "qpn:0x%.6x a:%d psn:0x%.8x" +const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, + u8 ack, u8 becn, u8 fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (bypass) + trace_seq_printf(p, BTH_16B_PRN, + opcode, opname, + se, mig, pad, tver, qpn, ack, psn); + + else + trace_seq_printf(p, BTH_9B_PRN, + opcode, opname, + se, mig, pad, tver, pkey, fecn, becn, + qpn, ack, psn); + trace_seq_putc(p, 0); + + return ret; } const char *parse_everbs_hdrs( diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 73240255ffc5..6721f84dafa5 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -95,17 +95,39 @@ __print_symbolic(opcode, \ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ ib_opcode_name(CNP)) +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); -u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); +u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); +const char *hfi1_trace_get_packet_type_str(u8 l4); const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); -void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, - u8 *se, u8 *pad, u8 *opcode, u8 *tver, - u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, - struct ib_other_headers **ohdr, u8 *lnh, u8 *lver, u8 *sl, u8 *sc, u16 *len, u32 *dlid, u32 *slid); +void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *mig, u8 *opcode, + u8 *pad, u8 *se, u8 *tver, + u32 *psn, u32 *qpn); +void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, + u8 *age, u8 *becn, u8 *fecn, + u8 *l4, u8 *rc, u8 *sc, + u16 *entropy, u16 *len, u16 *pkey, + u32 *dlid, u32 *slid); + +const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, + u8 age, u8 becn, u8 fecn, u8 l4, + u8 lnh, const char *lnh_name, u8 lver, + u8 rc, u8 sc, u8 sl, u16 entropy, + u16 len, u16 pkey, u32 dlid, u32 slid); + +const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, + u8 ack, u8 becn, u8 fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -114,13 +136,8 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) - -#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" -#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " -#define BTH_PRN \ - "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ - "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" -#define EHDR_PRN "hlen:%d %s" +#define PKT_ENTRY(pkt) __string(ptype, hfi1_trace_get_packet_str(packet)) +#define PKT_ASSIGN(pkt) __assign_str(ptype, hfi1_trace_get_packet_str(packet)) DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, @@ -129,75 +146,125 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) + PKT_ENTRY(packet) + __field(bool, bypass) + __field(u8, ack) + __field(u8, age) + __field(u8, becn) + __field(u8, fecn) + __field(u8, l4) __field(u8, lnh) __field(u8, lver) - __field(u8, sl) - __field(u16, len) - __field(u32, dlid) - __field(u8, sc) - __field(u32, slid) - __field(u8, opcode) - __field(u8, se) __field(u8, mig) + __field(u8, opcode) __field(u8, pad) + __field(u8, rc) + __field(u8, sc) + __field(u8, se) + __field(u8, sl) __field(u8, tver) + __field(u16, entropy) + __field(u16, len) __field(u16, pkey) - __field(u8, fecn) - __field(u8, becn) - __field(u32, qpn) - __field(u8, ack) + __field(u32, dlid) __field(u32, psn) + __field(u32, qpn) + __field(u32, slid) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(packet->hdr)) + hfi1_trace_packet_hdr_len(packet)) ), TP_fast_assign( - struct ib_other_headers *ohdr; + DD_DEV_ASSIGN(dd); + PKT_ASSIGN(packet); - DD_DEV_ASSIGN(dd); + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + __entry->bypass = true; + hfi1_trace_parse_16b_hdr(packet->hdr, + &__entry->age, + &__entry->becn, + &__entry->fecn, + &__entry->l4, + &__entry->rc, + &__entry->sc, + &__entry->entropy, + &__entry->len, + &__entry->pkey, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_9b_hdr(packet->hdr, sc5, - &ohdr, - &__entry->lnh, - &__entry->lver, - &__entry->sl, - &__entry->sc, - &__entry->len, - &__entry->dlid, - &__entry->slid); + hfi1_trace_parse_16b_bth(packet->ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } else { + __entry->bypass = false; + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_bth(ohdr, &__entry->ack, - &__entry->becn, &__entry->fecn, - &__entry->mig, &__entry->se, - &__entry->pad, &__entry->opcode, - &__entry->tver, &__entry->pkey, - &__entry->psn, &__entry->qpn); - /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - __get_dynamic_array_len(ehdrs)); + hfi1_trace_parse_9b_bth(packet->ohdr, + &__entry->ack, + &__entry->becn, + &__entry->fecn, + &__entry->mig, + &__entry->se, + &__entry->pad, + &__entry->opcode, + &__entry->tver, + &__entry->pkey, + &__entry->psn, + &__entry->qpn); + } + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), + &packet->ohdr->u, + __get_dynamic_array_len(ehdrs)); ), - TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " - BTH_PRN " " EHDR_PRN, + TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __entry->len, - __entry->sc, - __entry->dlid, - __entry->slid, - __entry->lnh, show_lnh(__entry->lnh), - __entry->lver, - __entry->sl, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->mig, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->fecn, - __entry->becn, - __entry->qpn, - __entry->ack, - __entry->psn, + __get_str(ptype), + hfi1_trace_fmt_lrh(p, + __entry->bypass, + __entry->age, + __entry->becn, + __entry->fecn, + __entry->l4, + __entry->lnh, + show_lnh(__entry->lnh), + __entry->lver, + __entry->rc, + __entry->sc, + __entry->sl, + __entry->entropy, + __entry->len, + __entry->pkey, + __entry->dlid, + __entry->slid), + hfi1_trace_fmt_bth(p, + __entry->bypass, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( @@ -213,78 +280,136 @@ DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_opa_header *opah, - bool sc5), + struct hfi1_opa_header *opah, bool sc5), TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) + __field(bool, bypass) + __field(u8, ack) + __field(u8, age) + __field(u8, becn) + __field(u8, fecn) + __field(u8, l4) __field(u8, lnh) __field(u8, lver) - __field(u8, sl) - __field(u16, len) - __field(u32, dlid) - __field(u8, sc) - __field(u32, slid) - __field(u8, opcode) - __field(u8, se) __field(u8, mig) + __field(u8, opcode) __field(u8, pad) + __field(u8, rc) + __field(u8, sc) + __field(u8, se) + __field(u8, sl) __field(u8, tver) + __field(u16, entropy) + __field(u16, len) __field(u16, pkey) - __field(u8, fecn) - __field(u8, becn) - __field(u32, qpn) - __field(u8, ack) + __field(u32, dlid) __field(u32, psn) + __field(u32, qpn) + __field(u32, slid) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(&opah->ibh)) + hfi1_trace_opa_hdr_len(opah)) ), TP_fast_assign( struct ib_other_headers *ohdr; - struct ib_header *hdr = &opah->ibh; DD_DEV_ASSIGN(dd); - hfi1_trace_parse_9b_hdr(hdr, sc5, - &ohdr, &__entry->lnh, - &__entry->lver, &__entry->sl, - &__entry->sc, &__entry->len, - &__entry->dlid, &__entry->slid); + if (opah->hdr_type) { + __entry->bypass = true; + hfi1_trace_parse_16b_hdr(&opah->opah, + &__entry->age, + &__entry->becn, + &__entry->fecn, + &__entry->l4, + &__entry->rc, + &__entry->sc, + &__entry->entropy, + &__entry->len, + &__entry->pkey, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_bth(ohdr, &__entry->ack, - &__entry->becn, &__entry->fecn, - &__entry->mig, &__entry->se, - &__entry->pad, &__entry->opcode, - &__entry->tver, &__entry->pkey, - &__entry->psn, &__entry->qpn); + if (entry->l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &opah->opah.u.oth; + else + ohdr = &opah->opah.u.l.oth; + hfi1_trace_parse_16b_bth(ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } else { + __entry->bypass = false; + hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + if (entry->lnh == HFI1_LRH_BTH) + ohdr = &opah->ibh.u.oth; + else + ohdr = &opah->ibh.u.l.oth; + hfi1_trace_parse_9b_bth(ohdr, + &__entry->ack, + &__entry->becn, + &__entry->fecn, + &__entry->mig, + &__entry->se, + &__entry->pad, + &__entry->opcode, + &__entry->tver, + &__entry->pkey, + &__entry->psn, + &__entry->qpn); + } /* extended headers */ memcpy(__get_dynamic_array(ehdrs), &ohdr->u, __get_dynamic_array_len(ehdrs)); ), - TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " - BTH_PRN " " EHDR_PRN, + TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __entry->len, - __entry->sc, - __entry->dlid, - __entry->slid, - __entry->lnh, show_lnh(__entry->lnh), - __entry->lver, - __entry->sl, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->mig, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->fecn, - __entry->becn, - __entry->qpn, - __entry->ack, - __entry->psn, + hfi1_trace_get_packet_type_str(__entry->l4), + hfi1_trace_fmt_lrh(p, + __entry->bypass, + __entry->age, + __entry->becn, + __entry->fecn, + __entry->l4, + __entry->lnh, + show_lnh(__entry->lnh), + __entry->lver, + __entry->rc, + __entry->sc, + __entry->sl, + __entry->entropy, + __entry->len, + __entry->pkey, + __entry->dlid, + __entry->slid), + hfi1_trace_fmt_bth(p, + __entry->bypass, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( -- cgit v1.2.3-59-g8ed1b From 51e658f5dd362cc8666f3f5ec1986660e3e51047 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Fri, 4 Aug 2017 13:54:35 -0700 Subject: IB/rdmavt, hfi1, qib: Enhance rdmavt and hfi1 to use 32 bit lids Increase lid used in hfi1 driver to 32 bits. qib continues to use 16 bit lids. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 12 +++-- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/mad.c | 91 ++++++++++++++++++++++++++++++++----- drivers/infiniband/hw/hfi1/verbs.c | 23 +--------- drivers/infiniband/hw/hfi1/verbs.h | 2 - drivers/infiniband/hw/qib/qib_mad.c | 4 +- include/rdma/rdma_vt.h | 2 +- 7 files changed, 93 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ee1324cce25a..cbda37386982 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10067,10 +10067,16 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) struct hfi1_devdata *dd = ppd->dd; u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); + u32 lid; + /* + * Program 0 in CSR if port lid is extended. This prevents + * 9B packets being sent out for large lids. + */ + lid = (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ? 0 : ppd->lid; c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); - c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) + c1 |= ((lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) | ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK) << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT); @@ -10081,7 +10087,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) */ sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) << SEND_CTXT_CHECK_SLID_MASK_SHIFT) | - (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << + (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); for (i = 0; i < dd->chip_send_contexts; i++) { @@ -10091,7 +10097,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) } /* Now we have to do the same thing for the sdma engines */ - sdma_update_lmc(dd, mask, ppd->lid); + sdma_update_lmc(dd, mask, lid); } static const char *state_completed_string(u32 completed) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b07f42cfa5bf..52cae1146b80 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -718,7 +718,7 @@ struct hfi1_pportdata { u32 ibmaxlen; u32 current_egress_rate; /* units [10^6 bits/sec] */ /* LID programmed for this instance */ - u16 lid; + u32 lid; /* list of pkeys programmed; 0 if not set */ u16 pkeys[MAX_PKEY_VALUES]; u16 link_width_supported; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1509bc6b76d8..cdcb4d021480 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -234,6 +234,61 @@ static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp, spin_unlock_irqrestore(&ibp->rvp.lock, flags); } +static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp, + struct rdma_ah_attr *attr, u32 dlid) +{ + rdma_ah_set_dlid(attr, dlid); + rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port); + if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + rdma_ah_set_ah_flags(attr, IB_AH_GRH); + grh->sgid_index = 0; + grh->hop_limit = 1; + grh->dgid.global.subnet_prefix = + ibp->rvp.gid_prefix; + grh->dgid.global.interface_id = OPA_MAKE_ID(dlid); + } +} + +static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp, + struct rvt_ah *ah, u32 dlid) +{ + struct rdma_ah_attr attr; + struct rvt_qp *qp0; + int ret = -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.type = ah->ibah.type; + hfi1_update_sm_ah_attr(ibp, &attr, dlid); + rcu_read_lock(); + qp0 = rcu_dereference(ibp->rvp.qp[0]); + if (qp0) + ret = rdma_modify_ah(&ah->ibah, &attr); + rcu_read_unlock(); + return ret; +} + +static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) +{ + struct rdma_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct rvt_qp *qp0; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ppd(ppd); + u8 port_num = ppd->port; + + memset(&attr, 0, sizeof(attr)); + attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); + hfi1_update_sm_ah_attr(ibp, &attr, dlid); + rcu_read_lock(); + qp0 = rcu_dereference(ibp->rvp.qp[0]); + if (qp0) + ah = rdma_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) { struct ib_mad_send_buf *send_buf; @@ -1283,8 +1338,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_ibport *ibp; u8 clientrereg; unsigned long flags; - u32 smlid, opa_lid; /* tmp vars to hold LID values */ - u16 lid; + u32 smlid; + u32 lid; u8 ls_old, ls_new, ps_new; u8 vls; u8 msl; @@ -1301,22 +1356,20 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - opa_lid = be32_to_cpu(pi->lid); - if (opa_lid & 0xFFFF0000) { - pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid); + lid = be32_to_cpu(pi->lid); + if (lid & 0xFF000000) { + pr_warn("OPA_PortInfo lid out of range: %X\n", lid); smp->status |= IB_SMP_INVALID_FIELD; goto get_only; } - lid = (u16)(opa_lid & 0x0000FFFF); smlid = be32_to_cpu(pi->sm_lid); - if (smlid & 0xFFFF0000) { + if (smlid & 0xFF000000) { pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid); smp->status |= IB_SMP_INVALID_FIELD; goto get_only; } - smlid &= 0x0000FFFF; clientrereg = (pi->clientrereg_subnettimeout & OPA_PI_MASK_CLIENT_REREGISTER); @@ -1331,12 +1384,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ls_old = driver_lstate(ppd); ibp->rvp.mkey = pi->mkey; - ibp->rvp.gid_prefix = pi->subnet_prefix; + if (ibp->rvp.gid_prefix != pi->subnet_prefix) { + ibp->rvp.gid_prefix = pi->subnet_prefix; + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); + } ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period); /* Must be a valid unicast LID address. */ if ((lid == 0 && ls_old > IB_PORT_INIT) || - lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + (hfi1_is_16B_mcast(lid))) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n", lid); @@ -1349,6 +1406,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC); event.event = IB_EVENT_LID_CHANGE; ib_dispatch_event(&event); + + if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) { + /* Manufacture GID from LID to support extended + * addresses + */ + ppd->guids[HFI1_PORT_GUID_INDEX + 1] = + be64_to_cpu(OPA_MAKE_ID(lid)); + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); + } } msl = pi->smsl & OPA_PI_MASK_SMSL; @@ -1359,7 +1426,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || - smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + (hfi1_is_16B_mcast(smlid))) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid); } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { @@ -1367,7 +1434,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid); + hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid); if (msl != ibp->rvp.sm_sl) rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 18b27276f202..83565e5f46d0 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1394,7 +1394,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; - u16 lid = ppd->lid; + u32 lid = ppd->lid; /* props being zeroed by the caller, avoid zeroing it here */ props->lid = lid ? lid : 0; @@ -1555,27 +1555,6 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); } -struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) -{ - struct rdma_ah_attr attr; - struct ib_ah *ah = ERR_PTR(-EINVAL); - struct rvt_qp *qp0; - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ppd(ppd); - u8 port_num = ppd->port; - - memset(&attr, 0, sizeof(attr)); - attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); - rdma_ah_set_dlid(&attr, dlid); - rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port); - rcu_read_lock(); - qp0 = rcu_dereference(ibp->rvp.qp[0]); - if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); - rcu_read_unlock(); - return ah; -} - /** * hfi1_get_npkeys - return the size of the PKEY table for context 0 * @dd: the hfi1_ib device diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 4928ee4f92c1..ab1618e32d9c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -334,8 +334,6 @@ void hfi1_rc_hdrerr( u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); -struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); - void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 6b9b43b944e3..549c71971a1f 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -105,7 +105,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid); + ah = qib_create_qp0_ah(ibp, (u16)ibp->rvp.sm_lid); if (IS_ERR(ah)) ret = PTR_ERR(ah); else { @@ -496,7 +496,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, pip->mkey = ibp->rvp.mkey; pip->gid_prefix = ibp->rvp.gid_prefix; pip->lid = cpu_to_be16(ppd->lid); - pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid); + pip->sm_lid = cpu_to_be16((u16)ibp->rvp.sm_lid); pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); /* pip->diag_code; */ pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fdfac0fd2f82..1d94f3c264ba 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -91,7 +91,7 @@ struct rvt_ibport { __be16 pma_counter_select[5]; u16 pma_tag; u16 mkey_lease_period; - u16 sm_lid; + u32 sm_lid; u8 sm_sl; u8 mkeyprot; u8 subnet_timeout; -- cgit v1.2.3-59-g8ed1b From 5b6cabb0db772042906cdc0fc235fe2a4f5a6000 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:41 -0700 Subject: IB/hfi1: Add 16B RC/UC support Add 16B bypass packet support for RC/UC traffic types. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 4 +- drivers/infiniband/hw/hfi1/rc.c | 390 ++++++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/ruc.c | 201 ++++++++++++++----- drivers/infiniband/hw/hfi1/uc.c | 40 ++-- drivers/infiniband/hw/hfi1/verbs.h | 3 +- 5 files changed, 445 insertions(+), 193 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 52cae1146b80..1dfbf16c2ca9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -459,6 +459,8 @@ static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr) return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK); } +#define OPA_16B_MAKE_QW(low_dw, high_dw) (((u64)(high_dw) << 32) | (low_dw)) + /* * BTH */ @@ -1538,7 +1540,7 @@ static inline u32 egress_cycles(u32 len, u32 rate) } void set_link_ipg(struct hfi1_pportdata *ppd); -void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, +void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type); void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e3dbf6d45afe..99defcc0ce45 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -100,8 +100,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + else + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; switch (qp->s_ack_state) { case OP(RDMA_READ_RESPONSE_LAST): @@ -258,8 +262,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - u32 hwords = 5; + u32 hwords; u32 len; u32 bth0 = 0; u32 bth2; @@ -273,9 +276,23 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } else { + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + } /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && @@ -703,6 +720,154 @@ bail_no_tx: return 0; } +static inline void hfi1_make_bth_aeth(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth1) +{ + if (qp->r_nak_state) + ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | + (qp->r_nak_state << + IB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = rvt_compute_aeth(qp); + + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); +} + +static inline void hfi1_queue_rc_ack(struct rvt_qp *qp, bool is_fecn) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); + qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + if (is_fecn) + qp->s_flags |= RVT_S_ECN; + + /* Schedule the send tasklet. */ + hfi1_schedule_send(qp); +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +} + +static inline void hfi1_make_rc_ack_9B(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_header *hdr = &opa_hdr->ibh; + struct ib_other_headers *ohdr; + u16 lrh0 = HFI1_LRH_BTH; + u16 pkey; + u32 bth0, bth1; + + opa_hdr->hdr_type = HFI1_PKT_TYPE_9B; + ohdr = &hdr->u.oth; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ + *hwords = 6; + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { + *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + *hwords - 2, SIZE_OF_CRC); + ohdr = &hdr->u.l.oth; + lrh0 = HFI1_LRH_GRH; + } + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ + *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + + /* read pkey_index w/o lock (its atomic) */ + pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + + lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT | + (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) << + IB_SL_SHIFT; + + hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), + ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr)); + + bth0 = pkey | (OP(ACKNOWLEDGE) << 24); + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + bth1 = (!!is_fecn) << IB_BECN_SHIFT; + hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); +} + +static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_16b_header *hdr = &opa_hdr->opah; + struct ib_other_headers *ohdr; + u32 bth0, bth1; + u16 len, pkey; + u8 becn = !!is_fecn; + u8 l4 = OPA_16B_L4_IB_LOCAL; + u8 extra_bytes; + + opa_hdr->hdr_type = HFI1_PKT_TYPE_16B; + ohdr = &hdr->u.oth; + /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */ + *hwords = 8; + extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0); + *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2); + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { + *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + *hwords - 4, *nwords); + ohdr = &hdr->u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + } + *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + + /* read pkey_index w/o lock (its atomic) */ + pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + + /* Convert dwords to flits */ + len = (*hwords + *nwords) >> 1; + + hfi1_make_16b_hdr(hdr, + ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr), + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), + 16B), + len, pkey, becn, 0, l4, sc5); + + bth0 = pkey | (OP(ACKNOWLEDGE) << 24); + bth0 |= extra_bytes << 20; + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth1 = OPA_BTH_MIG_REQ; + hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); +} + +typedef void (*hfi1_make_rc_ack)(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B +}; + /** * hfi1_send_rc_ack - Construct an ACK packet and send it * @qp: a pointer to the QP @@ -711,87 +876,48 @@ bail_no_tx: * Note that RDMA reads and atomics are handled in the * send side QP state and send engine. */ -void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, - int is_fecn) +void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, + struct rvt_qp *qp, bool is_fecn) { struct hfi1_ibport *ibp = rcd_to_iport(rcd); + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; u64 pbc, pbc_flags = 0; - u16 lrh0; - u16 sc5; - u32 bth0; - u32 hwords; - u32 vl, plen; - struct send_context *sc; + u32 hwords = 0; + u32 nwords = 0; + u32 plen; struct pio_buf *pbuf; - struct hfi1_opa_header opah; - struct ib_header *hdr; - struct ib_other_headers *ohdr; - unsigned long flags; + struct hfi1_opa_header opa_hdr; /* clear the defer count */ qp->r_adefered = 0; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->s_flags & RVT_S_RESP_PENDING) - goto queue_ack; + if (qp->s_flags & RVT_S_RESP_PENDING) { + hfi1_queue_rc_ack(qp, is_fecn); + return; + } /* Ensure s_rdma_ack_cnt changes are committed */ smp_read_barrier_depends(); - if (qp->s_rdma_ack_cnt) - goto queue_ack; - - /* Construct the header */ - opah.hdr_type = 0; - hdr = &opah.ibh; - - /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ - hwords = 6; - if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, - rdma_ah_read_grh(&qp->remote_ah_attr), - hwords, 0); - ohdr = &hdr->u.l.oth; - lrh0 = HFI1_LRH_GRH; - } else { - ohdr = &hdr->u.oth; - lrh0 = HFI1_LRH_BTH; + if (qp->s_rdma_ack_cnt) { + hfi1_queue_rc_ack(qp, is_fecn); + return; } - /* read pkey_index w/o lock (its atomic) */ - bth0 = hfi1_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24); - if (qp->s_mig_state == IB_MIG_MIGRATED) - bth0 |= IB_BTH_MIG_REQ; - if (qp->r_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | - (qp->r_nak_state << - IB_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = rvt_compute_aeth(qp); - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) - & 0xf) << 4; - hdr->lrh[0] = cpu_to_be16(lrh0); - hdr->lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - hdr->lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr->lrh[3] = cpu_to_be16(ppd->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); - ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); /* Don't try to send ACKs if the link isn't ACTIVE */ if (driver_lstate(ppd) != IB_PORT_ACTIVE) return; - sc = rcd->sc; - plen = 2 /* PBC */ + hwords; - vl = sc_to_vlt(ppd->dd, sc5); - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + /* Make the appropriate header */ + hfi1_make_rc_ack_tbl[priv->hdr_type](qp, &opa_hdr, sc5, is_fecn, + &pbc_flags, &hwords, &nwords); - pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); + plen = 2 /* PBC */ + hwords + nwords; + pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, + sc_to_vlt(ppd->dd, sc5), plen); + pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL); if (!pbuf) { /* * We have no room to send at the moment. Pass @@ -799,32 +925,18 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ - goto queue_ack; + hfi1_queue_rc_ack(qp, is_fecn); + return; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &opah, ib_is_sc5(sc5)); + &opa_hdr, ib_is_sc5(sc5)); /* write the pbc and data */ - ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, hdr, hwords); - + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, + (priv->hdr_type == HFI1_PKT_TYPE_9B ? + (void *)&opa_hdr.ibh : + (void *)&opa_hdr.opah), hwords); return; - -queue_ack: - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) - goto unlock; - this_cpu_inc(*ibp->rvp.rc_qacks); - qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - if (is_fecn) - qp->s_flags |= RVT_S_ECN; - - /* Schedule the send engine. */ - hfi1_schedule_send(qp); -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -992,8 +1104,10 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) { struct ib_other_headers *ohdr; - struct ib_header *hdr = &opah->ibh; + struct hfi1_qp_priv *priv = qp->priv; struct rvt_swqe *wqe; + struct ib_header *hdr = NULL; + struct hfi1_16b_header *hdr_16b = NULL; u32 opcode; u32 psn; @@ -1002,10 +1116,22 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) return; /* Find out where the BTH is */ - if (ib_get_lnh(hdr) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + hdr = &opah->ibh; + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + } else { + u8 l4; + + hdr_16b = &opah->opah; + l4 = hfi1_16B_get_l4(hdr_16b); + if (l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr_16b->u.oth; + else + ohdr = &hdr_16b->u.l.oth; + } opcode = ib_bth_get_opcode(ohdr); if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && @@ -1405,36 +1531,34 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, /** * rc_rcv_resp - process an incoming RC response packet - * @ibp: the port this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @hdrsize: the header length - * @pmtu: the path MTU + * @packet: data packet information * * This is called from hfi1_rc_rcv() to process an incoming RC response * packet for the given QP. * Called at interrupt level. */ -static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct ib_other_headers *ohdr, - void *data, u32 tlen, struct rvt_qp *qp, - u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, - struct hfi1_ctxtdata *rcd) +static void rc_rcv_resp(struct hfi1_packet *packet) { + struct hfi1_ctxtdata *rcd = packet->rcd; + void *data = packet->payload; + u32 tlen = packet->tlen; + struct rvt_qp *qp = packet->qp; + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct ib_other_headers *ohdr = packet->ohdr; struct rvt_swqe *wqe; enum ib_wc_status status; unsigned long flags; int diff; - u32 pad; - u32 aeth; u64 val; + u32 aeth; + u32 psn = ib_bth_get_psn(packet->ohdr); + u32 pmtu = qp->pmtu; + u16 hdrsize = packet->hlen; + u8 opcode = packet->opcode; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); spin_lock_irqsave(&qp->s_lock, flags); - trace_hfi1_ack(qp, psn); /* Ignore invalid responses. */ @@ -1500,7 +1624,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; read_middle: - if (unlikely(tlen != (hdrsize + pmtu + 4))) + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto ack_len_err; if (unlikely(pmtu >= qp->s_rdma_read_len)) goto ack_len_err; @@ -1532,13 +1656,11 @@ read_middle: aeth = be32_to_cpu(ohdr->u.aeth); if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) goto ack_done; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 0 && <= pmtu. * Remember to account for ICRC (4). */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto ack_len_err; /* * If this is a response to a resent RDMA read, we @@ -1556,16 +1678,14 @@ read_middle: goto ack_seq_err; if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for ICRC (4). */ - if (unlikely(tlen <= (hdrsize + pad + 4))) + if (unlikely(tlen <= (hdrsize + extra_bytes))) goto ack_len_err; read_last: - tlen -= hdrsize + pad + 4; + tlen -= hdrsize + extra_bytes; if (unlikely(tlen != qp->s_rdma_read_len)) goto ack_len_err; aeth = be32_to_cpu(ohdr->u.aeth); @@ -1850,7 +1970,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, spin_unlock_irqrestore(&ppd->cc_log_lock, flags); } -void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, +void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type) { struct cca_timer *cca_timer; @@ -1907,12 +2027,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, /** * hfi1_rc_rcv - process an incoming RC packet - * @rcd: the context pointer - * @hdr: the header of this packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet + * @packet: data packet information * * This is called from qp_rcv() to process an incoming RC packet * for the given QP. @@ -1926,10 +2041,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0; + u32 bth0 = be32_to_cpu(ohdr->bth[0]); u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; - u32 psn; + u32 psn = ib_bth_get_psn(packet->ohdr); u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; @@ -1940,15 +2055,14 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) bool is_fecn = false; bool copy_last = false; u32 rkey; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); lockdep_assert_held(&qp->r_lock); - bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -1958,8 +2072,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) */ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { - rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, - hdrsize, pmtu, rcd); + rc_rcv_resp(packet); if (is_fecn) goto send_ack; return; @@ -2026,7 +2139,12 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) case OP(RDMA_WRITE_MIDDLE): send_middle: /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) + /* + * There will be no padding for 9B packet but 16B packets + * will come in with some padding since we always add + * CRC and LT bytes which will need to be flit aligned + */ + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto nack_inv; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) @@ -2080,10 +2198,10 @@ no_immediate_data: send_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto nack_inv; - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + /* Don't count the CRC(and padding and LT byte for 16B). */ + tlen -= (hdrsize + extra_bytes); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 6839bfae933c..b3291f0fde9a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -735,73 +735,186 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, - struct hfi1_pkt_state *ps) +static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth1, u32 bth2) +{ + bth1 |= qp->remote_qpn; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(bth1); + ohdr->bth[2] = cpu_to_be32(bth2); +} + +static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = ps->ibp; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 bth1 = 0; + u32 slid; + u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + u8 l4 = OPA_16B_L4_IB_LOCAL; + u8 extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), + ps->s_txreq->s_cur_size); + u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + + extra_bytes + SIZE_OF_LT) >> 2); + u8 becn = 0; + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { + struct ib_grh *grh; + struct ib_global_route *grd = + rdma_ah_retrieve_grh(&qp->remote_ah_attr); + int hdrwords; + + /* + * Ensure OPA GIDs are transformed to IB gids + * before creating the GRH. + */ + if (grd->sgid_index == OPA_GID_INDEX) + grd->sgid_index = 0; + grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; + l4 = OPA_16B_L4_IB_GLOBAL; + hdrwords = qp->s_hdrwords - 4; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, + hdrwords, nwords); + middle = 0; + } + + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth1 |= OPA_BTH_MIG_REQ; + else + middle = 0; + + if (middle) + build_ahg(qp, bth2); + else + qp->s_flags &= ~RVT_S_AHG_VALID; + + bth0 |= pkey; + bth0 |= extra_bytes << 20; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + becn = 1; + } + hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); + + if (!ppd->lid) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); + else + slid = ppd->lid | + (rdma_ah_get_path_bits(&qp->remote_ah_attr) & + ((1 << ppd->lmc) - 1)); + + hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, + slid, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), + 16B), + (qp->s_hdrwords + nwords) >> 1, + pkey, becn, 0, l4, priv->s_sc); +} + +static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; - u16 lrh0; - u32 nwords; - u32 extra_bytes; - u32 bth1; - - /* Construct the header. */ - extra_bytes = -ps->s_txreq->s_cur_size & 3; - nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; - lrh0 = HFI1_LRH_BTH; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 bth1 = 0; + u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + u16 lrh0 = HFI1_LRH_BTH; + u16 slid; + u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; + u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + + extra_bytes) >> 2); + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - qp->s_hdrwords += - hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.ibh.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); + struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; + int hdrwords = qp->s_hdrwords - 2; + lrh0 = HFI1_LRH_GRH; + qp->s_hdrwords += + hfi1_make_grh(ibp, grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + hdrwords, nwords); middle = 0; } lrh0 |= (priv->s_sc & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; - /* - * reset s_ahg/AHG fields - * - * This insures that the ahgentry/ahgcount - * are at a non-AHG default to protect - * build_verbs_tx_desc() from using - * an include ahgidx. - * - * build_ahg() will modify as appropriate - * to use the AHG feature. - */ - priv->s_ahg->tx_flags = 0; - priv->s_ahg->ahgcount = 0; - priv->s_ahg->ahgidx = 0; + if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; else middle = 0; + if (middle) build_ahg(qp, bth2); else qp->s_flags &= ~RVT_S_AHG_VALID; - ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.ibh.lrh[1] = - cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - ps->s_txreq->phdr.hdr.ibh.lrh[2] = - cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.ibh.lrh[3] = - cpu_to_be16(ppd_from_ibp(ibp)->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); - bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); + + bth0 |= pkey; bth0 |= extra_bytes << 20; - ohdr->bth[0] = cpu_to_be32(bth0); - bth1 = qp->remote_qpn; if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } - ohdr->bth[1] = cpu_to_be32(bth1); - ohdr->bth[2] = cpu_to_be32(bth2); + hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); + + if (!ppd->lid) + slid = be16_to_cpu(IB_LID_PERMISSIVE); + else + slid = ppd->lid | + (rdma_ah_get_path_bits(&qp->remote_ah_attr) & + ((1 << ppd->lmc) - 1)); + hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, + lrh0, + qp->s_hdrwords + nwords, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), + ppd_from_ibp(ibp)->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); +} + +typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_ruc_header_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_ruc_header_16B +}; + +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) +{ + struct hfi1_qp_priv *priv = qp->priv; + + /* + * reset s_ahg/AHG fields + * + * This insures that the ahgentry/ahgcount + * are at a non-AHG default to protect + * build_verbs_tx_desc() from using + * an include ahgidx. + * + * build_ahg() will modify as appropriate + * to use the AHG feature. + */ + priv->s_ahg->tx_flags = 0; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; + + /* Make the appropriate header */ + hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps); } /* when sending, force a reschedule every one of these periods */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index e0bb766ae36c..0b646173ca22 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -65,7 +65,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - u32 hwords = 5; + u32 hwords; u32 bth0 = 0; u32 len; u32 pmtu = qp->pmtu; @@ -93,9 +93,23 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } else { + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + } /* Get the next send request. */ wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -309,6 +323,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); if (hfi1_ruc_check_hdr(ibp, packet)) return; @@ -408,7 +423,12 @@ send_first: /* FALLTHROUGH */ case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) + /* + * There will be no padding for 9B packet but 16B packets + * will come in with some padding since we always add + * CRC and LT bytes which will need to be flit aligned + */ + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto rewind; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) @@ -428,10 +448,10 @@ no_immediate_data: send_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto rewind; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto rewind; @@ -524,7 +544,7 @@ rdma_last_imm: if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { @@ -544,14 +564,12 @@ rdma_last_imm: case OP(RDMA_WRITE_LAST): rdma_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ab1618e32d9c..b164298a51c6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -373,7 +373,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); -void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); +void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, + bool is_fecn); int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -- cgit v1.2.3-59-g8ed1b From 566d53a8264452ee75aa6eb1f2f1970391c1a271 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:47 -0700 Subject: IB/hfi1: Enhance PIO/SDMA send for 16B PIO/SDMA send logic now uses the hdr_type field to determine the type of packet that has been constructed. Based on the hdr_type, certain things such as PBC flags, padding count and the LT extra trailing bytes are determined. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/user_sdma.c | 7 +- drivers/infiniband/hw/hfi1/verbs.c | 197 +++++++++++++++++++++------------ drivers/infiniband/hw/hfi1/verbs.h | 1 + 4 files changed, 135 insertions(+), 72 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1dfbf16c2ca9..dff3d3f02b2d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1558,7 +1558,7 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; #define PKEY_CHECK_INVALID -1 -int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, +int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index); #define PACKET_EGRESS_TIMEOUT 350 diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8f02707e9c95..aae1f40016e4 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -502,6 +502,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + u16 pkey; + u32 slid; int req_queued = 0; u16 dlid; u32 selector; @@ -635,8 +637,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } /* Checking P_KEY for requests from user-space */ - if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc, - PKEY_CHECK_INVALID)) { + pkey = (u16)be32_to_cpu(req->hdr.bth[0]); + slid = be16_to_cpu(req->hdr.lrh[3]); + if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { ret = -EINVAL; goto free_req; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 83565e5f46d0..d279f53fc66e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -506,24 +506,6 @@ again: } } -static u8 get_opcode(struct hfi1_opa_header *hdr) -{ - struct ib_other_headers *ohdr; - - if (hdr->hdr_type) { - if (hfi1_16B_get_l4(&hdr->opah) == OPA_16B_L4_IB_LOCAL) - ohdr = &hdr->opah.u.oth; - else - ohdr = &hdr->opah.u.l.oth; - } else { - if (ib_get_lnh(&hdr->ibh) == HFI1_LRH_BTH) - ohdr = &hdr->ibh.u.oth; - else - ohdr = &hdr->ibh.u.l.oth; - } - return ib_bth_get_opcode(ohdr); -} - /* * Make sure the QP is ready and able to accept the given opcode. */ @@ -830,12 +812,27 @@ static int build_verbs_tx_desc( int ret = 0; struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; + u32 *hdr; + u8 extra_bytes = 0; + static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ + if (tx->phdr.hdr.hdr_type) { + /* + * hdrbytes accounts for PBC. Need to subtract 8 bytes + * before calculating padding. + */ + extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) + + (SIZE_OF_CRC << 2) + SIZE_OF_LT; + hdr = (u32 *)&phdr->hdr.opah; + } else { + hdr = (u32 *)&phdr->hdr.ibh; + } if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( &tx->txreq, ahg_info->tx_flags, - hdrbytes + length, + hdrbytes + length + + extra_bytes, ahg_info->ahgidx, 0, NULL, @@ -865,8 +862,17 @@ static int build_verbs_tx_desc( goto bail_txadd; } /* add the ulp payload - if any. tx->ss can be NULL for acks */ - if (tx->ss) + if (tx->ss) { ret = build_verbs_ulp_payload(sde, length, tx); + if (ret) + goto bail_txadd; + } + + /* add icrc, lt byte, and padding to flit */ + if (extra_bytes != 0) + ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, + trail_buf, extra_bytes); + bail_txadd: return ret; } @@ -878,26 +884,42 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; u32 len = ps->s_txreq->s_cur_size; - u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ + u32 plen; struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; u8 sc5 = priv->s_sc; - int ret; + u32 dwords; + bool bypass = false; + + if (ps->s_txreq->phdr.hdr.hdr_type) { + u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len); + + dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + + SIZE_OF_LT) >> 2; + bypass = true; + } else { + dwords = (len + 3) >> 2; + } + plen = hdrwords + dwords + 2; tx = ps->s_txreq; if (!sdma_txreq_built(&tx->txreq)) { if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); - u8 opcode = get_opcode(&tx->phdr.hdr); /* No vl15 here */ - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ + if (ps->s_txreq->phdr.hdr.hdr_type) + pbc |= PBC_PACKET_BYPASS | + PBC_INSERT_BYPASS_ICRC; + else + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) - pbc = hfi1_fault_tx(qp, opcode, pbc); + if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, + false))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, @@ -1000,10 +1022,10 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = ps->s_txreq->ss; u32 len = ps->s_txreq->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ + u32 dwords; + u32 plen; struct hfi1_pportdata *ppd = ps->ppd; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; + u32 *hdr; u8 sc5; unsigned long flags = 0; struct send_context *sc; @@ -1011,6 +1033,23 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int wc_status = IB_WC_SUCCESS; int ret = 0; pio_release_cb cb = NULL; + u32 lrh0_16b; + bool bypass = false; + u8 extra_bytes = 0; + + if (ps->s_txreq->phdr.hdr.hdr_type) { + u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len); + + extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; + dwords = (len + extra_bytes) >> 2; + hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; + lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0]; + bypass = true; + } else { + dwords = (len + 3) >> 2; + hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; + } + plen = hdrwords + dwords + 2; /* only RC/UC use complete */ switch (qp->ibqp.qp_type) { @@ -1028,13 +1067,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); - struct verbs_txreq *tx = ps->s_txreq; - u8 opcode = get_opcode(&tx->phdr.hdr); - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) - pbc = hfi1_fault_tx(qp, opcode, pbc); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ + if (ps->s_txreq->phdr.hdr.hdr_type) + pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + else + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } if (cb) @@ -1071,11 +1111,12 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } } - if (len == 0) { + if (dwords == 0) { pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); } else { + seg_pio_copy_start(pbuf, pbc, + hdr, hdrwords * 4); if (ss) { - seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4); while (len) { void *addr = ss->sge.vaddr; u32 slen = ss->sge.length; @@ -1086,8 +1127,20 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, seg_pio_copy_mid(pbuf, addr, slen); len -= slen; } - seg_pio_copy_end(pbuf); } + /* + * Bypass packet will need to copy additional + * bytes to accommodate for CRC and LT bytes + */ + if (extra_bytes) { + u8 *empty_buf; + + empty_buf = kcalloc(extra_bytes, sizeof(u8), + GFP_KERNEL); + seg_pio_copy_mid(pbuf, empty_buf, extra_bytes); + kfree(empty_buf); + } + seg_pio_copy_end(pbuf); } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), @@ -1137,10 +1190,10 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) /** * egress_pkey_check - check P_KEY of a packet - * @ppd: Physical IB port data - * @lrh: Local route header - * @bth: Base transport header - * @sc5: SC for packet + * @ppd: Physical IB port data + * @slid: SLID for packet + * @bkey: PKEY for header + * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this * function. @@ -1149,19 +1202,16 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * * Return: 0 on success, otherwise, 1 */ -int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, +int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index) { struct hfi1_devdata *dd; int i; - u16 pkey; int is_user_ctxt_mechanism = (s_pkey_index < 0); if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) return 0; - pkey = (u16)be32_to_cpu(bth[0]); - /* If SC15, pkey[0:14] must be 0x7fff */ if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) goto bad; @@ -1194,8 +1244,6 @@ bad: dd = ppd->dd; if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { - u16 slid = be16_to_cpu(lrh[3]); - dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; dd->err_info_xmit_constraint.slid = slid; @@ -1212,11 +1260,11 @@ bad: * and size */ static inline send_routine get_send_routine(struct rvt_qp *qp, - struct verbs_txreq *tx) + struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_opa_header *h = &tx->phdr.hdr; + struct verbs_txreq *tx = ps->s_txreq; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1228,11 +1276,9 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, break; case IB_QPT_UC: case IB_QPT_RC: { - u8 op = get_opcode(h); - if (piothreshold && tx->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(op & OPMASK) & pio_opmask[op >> 5]) && + (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; @@ -1257,25 +1303,38 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; - struct ib_header *hdr; send_routine sr; int ret; - u8 lnh; + u16 pkey; + u32 slid; - hdr = &ps->s_txreq->phdr.hdr.ibh; /* locate the pkey within the headers */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - ohdr = &hdr->u.oth; - - sr = get_send_routine(qp, ps->s_txreq); - ret = egress_pkey_check(dd->pport, - hdr->lrh, - ohdr->bth, - priv->s_sc, - qp->s_pkey_index); + if (ps->s_txreq->phdr.hdr.hdr_type) { + struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; + u8 l4 = hfi1_16B_get_l4(hdr); + + if (l4 == OPA_16B_L4_IB_GLOBAL) + ohdr = &hdr->u.l.oth; + else + ohdr = &hdr->u.oth; + slid = hfi1_16B_get_slid(hdr); + pkey = hfi1_16B_get_pkey(hdr); + } else { + struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh; + u8 lnh = ib_get_lnh(hdr); + + if (lnh == HFI1_LRH_GRH) + ohdr = &hdr->u.l.oth; + else + ohdr = &hdr->u.oth; + slid = ib_get_slid(hdr); + pkey = ib_bth_get_pkey(ohdr); + } + + ps->opcode = ib_bth_get_opcode(ohdr); + sr = get_send_routine(qp, ps); + ret = egress_pkey_check(dd->pport, slid, pkey, + priv->s_sc, qp->s_pkey_index); if (unlikely(ret)) { /* * The value we are returning here does not get propagated to diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index b164298a51c6..56ead87a7f65 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -163,6 +163,7 @@ struct hfi1_pkt_state { unsigned long timeout; unsigned long timeout_int; int cpu; + u8 opcode; bool in_thread; bool pkts_sent; }; -- cgit v1.2.3-59-g8ed1b From 7221403dc958456c77f39b8a7ce6f95d8e65ffa3 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Fri, 4 Aug 2017 13:54:53 -0700 Subject: IB/hfi1: Enable RDMA_CAP_OPA_AH in hfi driver to support extended LIDs Enabling this bit helps core components query for extended address support using the rdma_cap_opa_ah interface. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index d279f53fc66e..e232f3c608b4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1911,7 +1911,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; - dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; + dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA | + RDMA_CORE_CAP_OPA_AH; dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; -- cgit v1.2.3-59-g8ed1b From 028e0a67ea679ff958176eac97d1319a8d685fad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 5 Aug 2017 14:11:50 +0100 Subject: IB/hfi1: fix spelling mistake: "Maximim" -> "Maximum" Trivial fix to spelling mistake in pr_warn_ratelimited warning message Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index cdcb4d021480..37b19bfae02a 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -173,7 +173,7 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, trap_list->list_len++; list_add_tail(&trap->list, &trap_list->list); } else { - pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n", + pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n", trap->data.generic_type); kfree(trap); } -- cgit v1.2.3-59-g8ed1b From b6422bc01259aea7e4e11f1cd052e80acdf1275b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sun, 13 Aug 2017 08:08:22 -0700 Subject: IB/hfi1: Check xchg returned value for queuing link down entry Check xchg returned value for queuing link down entry to guarantee proper atomic value reads. Fixes: 626c077c025f ("IB/hfi1: Prevent link down request double queuing") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index cbda37386982..bfb50a4e7e5f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7810,16 +7810,22 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) if (queue_link_down) { /* * if the link is already going down or disabled, do not - * queue another + * queue another. If there's a link down entry already + * queued, don't queue another one. */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || - ppd->link_enabled == 0 || ppd->is_link_down_queued) { - dd_dev_info(dd, "%s: not queuing link down\n", - __func__); + ppd->link_enabled == 0) { + dd_dev_info(dd, "%s: not queuing link down. host_link_state %x, link_enabled %x\n", + __func__, ppd->host_link_state, + ppd->link_enabled); } else { - xchg(&ppd->is_link_down_queued, 1); - queue_work(ppd->link_wq, &ppd->link_down_work); + if (xchg(&ppd->is_link_down_queued, 1) == 1) + dd_dev_info(dd, + "%s: link down request already queued\n", + __func__); + else + queue_work(ppd->link_wq, &ppd->link_down_work); } } } -- cgit v1.2.3-59-g8ed1b From 55774d09b7a1f0d2bfec3320ed71d30fe05466a1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 13 Aug 2017 08:08:28 -0700 Subject: IB/hfi1: Document phys port state bits not used in IB A couple bits are used by OPA for link physical state that are not present as part of InfiniBand. Add a short blurb what those states mean and removed an unused state. Cc: Leon Romanovsky Reviewed-by: Todd Rimmer Reviewed-by: Brent Rothermel Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/opa_compat.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h index 6ef3c1cbdcd7..774215b95df5 100644 --- a/drivers/infiniband/hw/hfi1/opa_compat.h +++ b/drivers/infiniband/hw/hfi1/opa_compat.h @@ -84,7 +84,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps) /* * OPA port physical states * IB Volume 1, Table 146 PortInfo/IB Volume 2 Section 5.4.2(1) PortPhysState - * values. + * values are the same in OmniPath Architecture. OPA leverages some of the same + * concepts as InfiniBand, but has a few other states as well. * * When writing, only values 0-3 are valid, other values are ignored. * When reading, 0 is reserved. @@ -92,6 +93,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps) * Returned by the ibphys_portstate() routine. */ enum opa_port_phys_state { + /* Values 0-7 have the same meaning in OPA as in InfiniBand. */ + IB_PORTPHYSSTATE_NOP = 0, /* 1 is reserved */ IB_PORTPHYSSTATE_POLLING = 2, @@ -101,9 +104,23 @@ enum opa_port_phys_state { IB_PORTPHYSSTATE_LINK_ERROR_RECOVERY = 6, IB_PORTPHYSSTATE_PHY_TEST = 7, /* 8 is reserved */ + + /* + * Offline: Port is quiet (transmitters disabled) due to lack of + * physical media, unsupported media, or transition between link up + * and next link up attempt + */ OPA_PORTPHYSSTATE_OFFLINE = 9, - OPA_PORTPHYSSTATE_GANGED = 10, + + /* 10 is reserved */ + + /* + * Phy_Test: Specific test patterns are transmitted, and receiver BER + * can be monitored. This facilitates signal integrity testing for the + * physical layer of the port. + */ OPA_PORTPHYSSTATE_TEST = 11, + OPA_PORTPHYSSTATE_MAX = 11, /* values 12-15 are reserved/ignored */ }; -- cgit v1.2.3-59-g8ed1b From 9161860463e38e1046a5fd57130be150cc0cac5d Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:34 -0700 Subject: IB/hfi1: Add flag for platform config scratch register read Add flag in pport data structure to determine when platform config was read from scratch registers. Change conditions in parse_platform_config and get_platform_config_field to use the new flag. Reviewed-by: Easwar Hariharan Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 6 ++++-- drivers/infiniband/hw/hfi1/hfi.h | 3 +++ drivers/infiniband/hw/hfi1/platform.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 885714b93557..fb095e69a554 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1789,6 +1789,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table) int parse_platform_config(struct hfi1_devdata *dd) { struct platform_config_cache *pcfgcache = &dd->pcfg_cache; + struct hfi1_pportdata *ppd = dd->pport; u32 *ptr = NULL; u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0; u32 record_idx = 0, table_type = 0, table_length_dwords = 0; @@ -1800,7 +1801,7 @@ int parse_platform_config(struct hfi1_devdata *dd) * scratch register bitmap, thus there is no platform config to parse. * Skip parsing in these situations. */ - if (is_integrated(dd) && !platform_config_load) + if (ppd->config_from_scratch) return 0; if (!dd->platform_config.data) { @@ -2089,13 +2090,14 @@ int get_platform_config_field(struct hfi1_devdata *dd, int ret = 0, wlen = 0, seek = 0; u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL; struct platform_config_cache *pcfgcache = &dd->pcfg_cache; + struct hfi1_pportdata *ppd = dd->pport; if (data) memset(data, 0, len); else return -EINVAL; - if (is_integrated(dd) && !platform_config_load) { + if (ppd->config_from_scratch) { /* * Use saved configuration from ppd for integrated platforms */ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index dff3d3f02b2d..181feca3cb6f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -677,6 +677,9 @@ struct hfi1_pportdata { u8 default_atten; u8 max_power_class; + /* did we read platform config from scratch registers? */ + bool config_from_scratch; + /* GUIDs for this interface, in host order, guids[0] is a port guid */ u64 guids[HFI1_GUIDS_PER_PORT]; diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 5c38a24484ab..ab221b05d553 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -138,6 +138,8 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> QSFP_MAX_POWER_SHIFT; + + ppd->config_from_scratch = true; } void get_platform_config(struct hfi1_devdata *dd) -- cgit v1.2.3-59-g8ed1b From 76ae6222a4a37098610c0601a814c9caba94ba0b Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:40 -0700 Subject: IB/hfi1: Load fallback platform configuration per HFI device Currently fallback configuration is loaded once per driver instance. With multiple HFI devices in the same system the current code may not load the platform config data for the device. Change fallback platform config data loading to be per device. Reviewed-by: Easwar Hariharan Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 1 - drivers/infiniband/hw/hfi1/firmware.c | 51 +++-------------------------------- drivers/infiniband/hw/hfi1/platform.c | 41 +++++++++++++++++++--------- 3 files changed, 31 insertions(+), 62 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index f7083025fb04..fc21097e7a56 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -648,7 +648,6 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, #define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */ extern const u8 pcie_serdes_broadcast[]; extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES]; -extern uint platform_config_load; /* SBus commands */ #define RESET_SBUS_RECEIVER 0x20 diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index fb095e69a554..5aea8f47e670 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -64,7 +64,6 @@ #define DEFAULT_FW_FABRIC_NAME "hfi1_fabric.fw" #define DEFAULT_FW_SBUS_NAME "hfi1_sbus.fw" #define DEFAULT_FW_PCIE_NAME "hfi1_pcie.fw" -#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat" #define ALT_FW_8051_NAME_ASIC "hfi1_dc8051_d.fw" #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" @@ -76,19 +75,11 @@ static uint fw_fabric_serdes_load = 1; static uint fw_pcie_serdes_load = 1; static uint fw_sbus_load = 1; -/* - * Access required in platform.c - * Maintains state of whether the platform config was fetched via the - * fallback option - */ -uint platform_config_load; - /* Firmware file names get set in hfi1_firmware_init() based on the above */ static char *fw_8051_name; static char *fw_fabric_serdes_name; static char *fw_sbus_name; static char *fw_pcie_serdes_name; -static char *platform_config_name; #define SBUS_MAX_POLL_COUNT 100 #define SBUS_COUNTER(reg, name) \ @@ -178,7 +169,6 @@ static struct firmware_details fw_8051; static struct firmware_details fw_fabric; static struct firmware_details fw_pcie; static struct firmware_details fw_sbus; -static const struct firmware *platform_config; /* flags for turn_off_spicos() */ #define SPICO_SBUS 0x1 @@ -684,7 +674,6 @@ done: static int obtain_firmware(struct hfi1_devdata *dd) { unsigned long timeout; - int err = 0; mutex_lock(&fw_mutex); @@ -708,38 +697,11 @@ static int obtain_firmware(struct hfi1_devdata *dd) } /* not in FW_TRY state */ - if (fw_state == FW_FINAL) { - if (platform_config) { - dd->platform_config.data = platform_config->data; - dd->platform_config.size = platform_config->size; - } - goto done; /* already acquired */ - } else if (fw_state == FW_ERR) { - goto done; /* already tried and failed */ - } - /* fw_state is FW_EMPTY */ - /* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */ - __obtain_firmware(dd); - - if (platform_config_load) { - platform_config = NULL; - err = request_firmware(&platform_config, platform_config_name, - &dd->pcidev->dev); - if (err) { - platform_config = NULL; - dd_dev_err(dd, - "%s: No default platform config file found\n", - __func__); - goto done; - } - dd->platform_config.data = platform_config->data; - dd->platform_config.size = platform_config->size; - } + if (fw_state == FW_EMPTY) + __obtain_firmware(dd); -done: mutex_unlock(&fw_mutex); - return fw_err; } @@ -761,9 +723,6 @@ void dispose_firmware(void) dispose_one_firmware(&fw_pcie); dispose_one_firmware(&fw_sbus); - release_firmware(platform_config); - platform_config = NULL; - /* retain the error state, otherwise revert to empty */ if (fw_state != FW_ERR) fw_state = FW_EMPTY; @@ -1725,10 +1684,8 @@ int hfi1_firmware_init(struct hfi1_devdata *dd) } /* no 8051 or QSFP on simulator */ - if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { + if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) fw_8051_load = 0; - platform_config_load = 0; - } if (!fw_8051_name) { if (dd->icode == ICODE_RTL_SILICON) @@ -1742,8 +1699,6 @@ int hfi1_firmware_init(struct hfi1_devdata *dd) fw_sbus_name = DEFAULT_FW_SBUS_NAME; if (!fw_pcie_serdes_name) fw_pcie_serdes_name = DEFAULT_FW_PCIE_NAME; - if (!platform_config_name) - platform_config_name = DEFAULT_PLATFORM_CONFIG_NAME; return obtain_firmware(dd); } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index ab221b05d553..8004f1da7548 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -45,10 +45,14 @@ * */ +#include + #include "hfi.h" #include "efivar.h" #include "eprom.h" +#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat" + static int validate_scratch_checksum(struct hfi1_devdata *dd) { u64 checksum = 0, temp_scratch = 0; @@ -147,6 +151,7 @@ void get_platform_config(struct hfi1_devdata *dd) int ret = 0; u8 *temp_platform_config = NULL; u32 esize; + const struct firmware *platform_config_file = NULL; if (is_integrated(dd)) { if (validate_scratch_checksum(dd)) { @@ -167,23 +172,33 @@ void get_platform_config(struct hfi1_devdata *dd) dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", __func__); - /* fall back to request firmware */ - platform_config_load = 1; -} -void free_platform_config(struct hfi1_devdata *dd) -{ - if (!platform_config_load) { - /* - * was loaded from EFI or the EPROM, release memory - * allocated by read_efi_var/eprom_read_platform_config - */ - kfree(dd->platform_config.data); + ret = request_firmware(&platform_config_file, + DEFAULT_PLATFORM_CONFIG_NAME, + &dd->pcidev->dev); + if (ret) { + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); + return; } + /* - * else do nothing, dispose_firmware will release - * struct firmware platform_config on driver exit + * Allocate separate memory block to store data and free firmware + * structure. This allows free_platform_config to treat EPROM and + * fallback configs in the same manner. */ + dd->platform_config.data = kmemdup(platform_config_file->data, + platform_config_file->size, + GFP_KERNEL); + dd->platform_config.size = platform_config_file->size; + release_firmware(platform_config_file); +} + +void free_platform_config(struct hfi1_devdata *dd) +{ + /* Release memory allocated for eprom or fallback file read. */ + kfree(dd->platform_config.data); } void get_port_type(struct hfi1_pportdata *ppd) -- cgit v1.2.3-59-g8ed1b From ec0d8b8a63ee760bca1bccc6769d6210e05ded29 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugame Date: Sun, 13 Aug 2017 08:08:46 -0700 Subject: IB/hfi1: Stricter bounds checking of MAD trap index The macro size is valid. This change makes it less ambiguous. Bounds check trap type for better security. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 13 ++++++++++++- include/rdma/rdma_vt.h | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 37b19bfae02a..661ba707fc60 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -151,13 +151,24 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, unsigned long flags; unsigned long timeout; int found = 0; + unsigned int queue_id; + static int trap_count; + + queue_id = trap->data.generic_type & 0x0F; + if (queue_id >= RVT_MAX_TRAP_LISTS) { + trap_count++; + pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n", + trap->data.generic_type, trap_count); + kfree(trap); + return NULL; + } /* * Since the retry (handle timeout) does not remove a trap request * from the list, all we have to do is compare the node. */ spin_lock_irqsave(&ibp->rvp.lock, flags); - trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F]; + trap_list = &ibp->rvp.trap_lists[queue_id]; list_for_each_entry(node, &trap_list->list, list) { if (node == trap) { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1d94f3c264ba..1ba84a78f1c5 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -64,7 +64,7 @@ #define RVT_MAX_PKEY_VALUES 16 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ -#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/ #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ struct trap_list { -- cgit v1.2.3-59-g8ed1b From d392a673e718767049824f99c76bb483d893b881 Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:52 -0700 Subject: IB/hfi1: Remove pstate from hfi1_pportdata Do not track physical state separately from host_link_state. Deduce physical state from host_link_state when required. Change cache_physical_state to log_physical_state to make sure host_link_state reflects hardwares physical state properly. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 54 ++++++++++++++++++++++----------------- drivers/infiniband/hw/hfi1/chip.h | 3 +-- drivers/infiniband/hw/hfi1/hfi.h | 24 ----------------- drivers/infiniband/hw/hfi1/mad.c | 2 +- 4 files changed, 33 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bfb50a4e7e5f..eb27c7fbfebd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1068,6 +1068,8 @@ static int thermal_init(struct hfi1_devdata *dd); static void update_statusp(struct hfi1_pportdata *ppd, u32 state); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); +static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); @@ -10450,11 +10452,11 @@ static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state) } /* - * driver_physical_state - convert the driver's notion of a port's + * driver_pstate - convert the driver's notion of a port's * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*). * Return -1 (converted to a u32) to indicate error. */ -u32 driver_physical_state(struct hfi1_pportdata *ppd) +u32 driver_pstate(struct hfi1_pportdata *ppd) { switch (ppd->host_link_state) { case HLS_UP_INIT: @@ -10720,7 +10722,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ret) goto_offline(ppd, 0); else - cache_physical_state(ppd); + log_physical_state(ppd, PLS_POLLING); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10769,7 +10771,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; - cache_physical_state(ppd); + log_physical_state(ppd, PLS_CONFIGPHY_VERIFYCAP); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12743,25 +12745,30 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return 0; } +static void log_state_transition(struct hfi1_pportdata *ppd, u32 state) +{ + u32 ib_pstate = chip_to_opa_pstate(ppd->dd, state); + + dd_dev_info(ppd->dd, + "physical state changed to %s (0x%x), phy 0x%x\n", + opa_pstate_name(ib_pstate), ib_pstate, state); +} + /* - * Read the physical hardware link state and set the driver's cached value - * of it. + * Read the physical hardware link state and check if it matches host + * drivers anticipated state. */ -void cache_physical_state(struct hfi1_pportdata *ppd) +static void log_physical_state(struct hfi1_pportdata *ppd, u32 state) { - u32 read_pstate; - u32 ib_pstate; + u32 read_state = read_physical_state(ppd->dd); - read_pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); - /* check if OPA pstate changed */ - if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { - dd_dev_info(ppd->dd, - "%s: physical state changed to %s (0x%x), phy 0x%x\n", - __func__, opa_pstate_name(ib_pstate), ib_pstate, - read_pstate); + if (read_state == state) { + log_state_transition(ppd, state); + } else { + dd_dev_err(ppd->dd, + "anticipated phy link state 0x%x, read 0x%x\n", + state, read_state); } - ppd->pstate = read_pstate; } /* @@ -12776,22 +12783,24 @@ void cache_physical_state(struct hfi1_pportdata *ppd) static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs) { + u32 read_state; unsigned long timeout; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { - cache_physical_state(ppd); - if (ppd->pstate == state) + read_state = read_physical_state(ppd->dd); + if (read_state == state) break; if (time_after(jiffies, timeout)) { dd_dev_err(ppd->dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, ppd->pstate); + "timeout waiting for phy link state 0x%x\n", + state); return -ETIMEDOUT; } usleep_range(1950, 2050); /* sleep 2ms-ish */ } + log_state_transition(ppd, state); return 0; } @@ -14896,7 +14905,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fc21097e7a56..b8345a60a0fb 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -747,10 +747,9 @@ int is_ax(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); -void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); -u32 driver_physical_state(struct hfi1_pportdata *ppd); +u32 driver_pstate(struct hfi1_pportdata *ppd); u32 driver_lstate(struct hfi1_pportdata *ppd); int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 181feca3cb6f..b15749eab921 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -758,7 +758,6 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1428,29 +1427,6 @@ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) int hfi1_reset_device(int); -/* return the driver's idea of the physical OPA port state */ -static inline u32 driver_pstate(struct hfi1_pportdata *ppd) -{ - /* - * When DC is shut down and state is changed, its CSRs are not - * impacted, therefore host_link_state should be used to get - * current physical state. - */ - if (ppd->dd->dc_shutdown) - return driver_physical_state(ppd); - /* - * The driver does some processing from the time the physical - * link state is at LINKUP to the time the SM can be notified - * as such. Return IB_PORTPHYSSTATE_TRAINING until the software - * state is ready. - */ - if (ppd->pstate == PLS_LINKUP && - !(ppd->host_link_state & HLS_UP)) - return IB_PORTPHYSSTATE_TRAINING; - else - return chip_to_opa_pstate(ppd->dd, ppd->pstate); -} - void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 661ba707fc60..4849130d49bc 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1181,7 +1181,7 @@ static int physical_transition_allowed(int old, int new) static int port_states_transition_allowed(struct hfi1_pportdata *ppd, u32 logical_new, u32 physical_new) { - u32 physical_old = driver_physical_state(ppd); + u32 physical_old = driver_pstate(ppd); u32 logical_old = driver_lstate(ppd); int ret, logical_allowed, physical_allowed; -- cgit v1.2.3-59-g8ed1b From 6165467921a891bc79675cdb7f2791d8331f592c Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Sun, 13 Aug 2017 08:08:58 -0700 Subject: IB/hfi1: Remove HFI1_VERBS_31BIT_PSN option Remove HFI1_VERBS_31BIT_PSN Kconfig option leaving only 31-bit PSNs available. The option was implemented in the early days of the driver and is no longer needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Kconfig | 7 ------- drivers/infiniband/hw/hfi1/verbs.h | 5 ----- 2 files changed, 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index f6ea0881765a..7b146b67a80f 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -13,13 +13,6 @@ config HFI1_DEBUG_SDMA_ORDER ---help--- This is a debug flag to test for out of order sdma completions for unit testing -config HFI1_VERBS_31BIT_PSN - bool "HFI1 enable 31 bit PSN" - depends on INFINIBAND_HFI1 - default y - ---help--- - Setting this enables 31 BIT PSN - For verbs RC/UC config SDMA_VERBOSITY bool "Config SDMA Verbosity" depends on INFINIBAND_HFI1 diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 56ead87a7f65..87d1285a3340 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -280,13 +280,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, * necessarily be at least one bit less than * the container holding the PSN. */ -#ifndef CONFIG_HFI1_VERBS_31BIT_PSN -#define PSN_MASK 0xFFFFFF -#define PSN_SHIFT 8 -#else #define PSN_MASK 0x7FFFFFFF #define PSN_SHIFT 1 -#endif #define PSN_MODIFY_MASK 0xFFFFFF /* -- cgit v1.2.3-59-g8ed1b From bf808b5039c66f9843cdc30f18c0608dbbf11374 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sun, 13 Aug 2017 08:09:04 -0700 Subject: IB/hfi1: Add kernel receive context info to debugfs Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 47 ++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/driver.c | 59 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/hfi.h | 1 + 3 files changed, 107 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 550119c6f1ee..c6a472f01025 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -368,6 +368,52 @@ DEBUGFS_SEQ_FILE_OPS(sdes); DEBUGFS_SEQ_FILE_OPEN(sdes) DEBUGFS_FILE_OPS(sdes); +static void *_rcds_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->rcd || *pos >= dd->n_krcv_queues) + return NULL; + return pos; +} + +static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->rcd || *pos >= dd->n_krcv_queues) + return NULL; + return pos; +} + +static void _rcds_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _rcds_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; + loff_t *spos = v; + loff_t i = *spos; + + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + seqfile_dump_rcd(s, rcd); + hfi1_rcd_put(rcd); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(rcds); +DEBUGFS_SEQ_FILE_OPEN(rcds) +DEBUGFS_FILE_OPS(rcds); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1321,6 +1367,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index fc7085d6cf3f..7372cc00cb2d 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1689,3 +1689,62 @@ int process_receive_invalid(struct hfi1_packet *packet) rhf_rcv_type(packet->rhf)); return RHF_RCV_CONTINUE; } + +void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) +{ + struct hfi1_packet packet; + struct ps_mdata mdata; + + seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n", + rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize, + HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + "dma_rtail" : "nodma_rtail", + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & + RCV_HDR_HEAD_HEAD_MASK, + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL)); + + init_packet(rcd, &packet); + init_ps_mdata(&mdata, &packet); + + while (1) { + struct hfi1_devdata *dd = rcd->dd; + __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + + dd->rhf_offset; + struct ib_header *hdr; + u64 rhf = rhf_to_cpu(rhf_addr); + u32 etype = rhf_rcv_type(rhf), qpn; + u8 opcode; + u32 psn; + u8 lnh; + + if (ps_done(&mdata, rhf, rcd)) + break; + + if (ps_skip(&mdata, rhf, rcd)) + goto next; + + if (etype > RHF_RCV_TYPE_IB) + goto next; + + packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + hdr = packet.hdr; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + + if (lnh == HFI1_LRH_BTH) + packet.ohdr = &hdr->u.oth; + else if (lnh == HFI1_LRH_GRH) + packet.ohdr = &hdr->u.l.oth; + else + goto next; /* just in case */ + + opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24); + qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK; + psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2])); + + seq_printf(s, "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n", + mdata.ps_head, opcode, qpn, psn); +next: + update_ps_mdata(&mdata, rcd); + } +} diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b15749eab921..eadb735d958c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1987,6 +1987,7 @@ int process_receive_error(struct hfi1_packet *packet); int kdeth_process_expected(struct hfi1_packet *packet); int kdeth_process_eager(struct hfi1_packet *packet); int process_receive_invalid(struct hfi1_packet *packet); +void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ extern unsigned int hfi1_max_mtu; -- cgit v1.2.3-59-g8ed1b From d26875b43d45644e87f4c0b6bb2d7abf3c61d529 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 8 Aug 2017 20:38:45 -0500 Subject: i40iw: Improve CQP timeout logic The current timeout logic for Control Queue-Pair (CQP) OPs does not take into account whether CQP makes progress but rather blindly waits for a large timeout value, 100000 jiffies for the completion event. Improve this by setting the timeout based on whether the CQP is making progress or not. If the CQP is hung, the timeout will happen sooner, in 5000 jiffies. Each time the CQP progress is detetcted, the timeout extends by 5000 jiffies. Signed-off-by: Shiraz Saleem Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 11 +++++++++++ drivers/infiniband/hw/i40iw/i40iw_p.h | 14 +++++++++----- drivers/infiniband/hw/i40iw/i40iw_type.h | 5 +++++ drivers/infiniband/hw/i40iw/i40iw_utils.c | 22 ++++++++++++++-------- 4 files changed, 39 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index a49ff2eb6fb3..d1f5345f04f0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -54,6 +54,17 @@ static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) set_64bit_val(wqe, 24, header); } +void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev) +{ + if (cqp_timeout->compl_cqp_cmds != dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]) { + cqp_timeout->compl_cqp_cmds = dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]; + cqp_timeout->count = 0; + } else { + if (dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] != cqp_timeout->compl_cqp_cmds) + cqp_timeout->count++; + } +} + /** * i40iw_get_cqp_reg_info - get head and tail for cqp using registers * @cqp: struct for cqp hw diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index 28a92fee0822..e217a1259f57 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -35,11 +35,13 @@ #ifndef I40IW_P_H #define I40IW_P_H -#define PAUSE_TIMER_VALUE 0xFFFF -#define REFRESH_THRESHOLD 0x7FFF -#define HIGH_THRESHOLD 0x800 -#define LOW_THRESHOLD 0x200 -#define ALL_TC2PFC 0xFF +#define PAUSE_TIMER_VALUE 0xFFFF +#define REFRESH_THRESHOLD 0x7FFF +#define HIGH_THRESHOLD 0x800 +#define LOW_THRESHOLD 0x200 +#define ALL_TC2PFC 0xFF +#define CQP_COMPL_WAIT_TIME 0x3E8 +#define CQP_TIMEOUT_THRESHOLD 5 void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask, char *desc, u64 *buf, u32 size); @@ -51,6 +53,8 @@ void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp); u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch); +void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev); + enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp, struct i40iw_fast_reg_stag_info *info, bool post_sq); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 959ec81fba99..63118f6d5ab4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -1345,4 +1345,9 @@ struct i40iw_virtchnl_work_info { void *worker_vf_dev; }; +struct i40iw_cqp_timeout { + u64 compl_cqp_cmds; + u8 count; +}; + #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e311ec559f4e..62f1f45b8737 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -445,23 +445,29 @@ static int i40iw_wait_event(struct i40iw_device *iwdev, { struct cqp_commands_info *info = &cqp_request->info; struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_timeout cqp_timeout; bool cqp_error = false; int err_code = 0; - int timeout_ret = 0; + memset(&cqp_timeout, 0, sizeof(cqp_timeout)); + cqp_timeout.compl_cqp_cmds = iwdev->sc_dev.cqp_cmd_stats[OP_COMPLETED_COMMANDS]; + do { + if (wait_event_timeout(cqp_request->waitq, + cqp_request->request_done, CQP_COMPL_WAIT_TIME)) + break; - timeout_ret = wait_event_timeout(cqp_request->waitq, - cqp_request->request_done, - I40IW_EVENT_TIMEOUT); - if (!timeout_ret) { - i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n", - info->cqp_cmd, timeout_ret); + i40iw_check_cqp_progress(&cqp_timeout, &iwdev->sc_dev); + + if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD) + continue; + + i40iw_pr_err("error cqp command 0x%x timed out", info->cqp_cmd); err_code = -ETIME; if (!iwdev->reset) { iwdev->reset = true; i40iw_request_reset(iwdev); } goto done; - } + } while (1); cqp_error = cqp_request->compl_info.error; if (cqp_error) { i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n", -- cgit v1.2.3-59-g8ed1b From 0c98568c1f00eed16c4b452557ec40f5ac64784d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Fri, 11 Aug 2017 00:12:11 +0300 Subject: IB/pvrdma: Remove unused function The function pvrdma_idx_ring_is_valid_idx is not in used so let's remove it. Signed-off-by: Yuval Shaia Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h index ed9022a91a1d..8b558ae234c8 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h @@ -111,21 +111,4 @@ static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r, return PVRDMA_INVALID_IDX; } -static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r, - __u32 max_elems, __u32 *idx) -{ - const __u32 tail = atomic_read(&r->prod_tail); - const __u32 head = atomic_read(&r->cons_head); - - if (pvrdma_idx_valid(tail, max_elems) && - pvrdma_idx_valid(head, max_elems) && - pvrdma_idx_valid(*idx, max_elems)) { - if (tail > head && (*idx < tail && *idx >= head)) - return true; - else if (head > tail && (*idx >= head || *idx < tail)) - return true; - } - return false; -} - #endif /* __PVRDMA_RING_H__ */ -- cgit v1.2.3-59-g8ed1b From 7be05753ccc27ce056d45f06a50d150927a88ed7 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 17 Aug 2017 07:58:07 -0700 Subject: RDMA: Fix return value check for ib_get_eth_speed() ib_get_eth_speed() return 0 on success. Fixing the condition checking and prevent reporting failure for query_port verb. Fixes: d41861942fc5 ("Add generic function to extract IB speed from netdev") Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 24e2785774b8..43b3dee4b6ba 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -264,8 +264,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, * IB stack to avoid race in the NETDEV_UNREG path */ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - if (!ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, - &port_attr->active_width)) + if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width)) return -EINVAL; return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e5f57dd49980..97dd79ebb590 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -309,8 +309,8 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); mutex_lock(&us_ibdev->usdev_lock); - if (!ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { + if (ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width)) { mutex_unlock(&us_ibdev->usdev_lock); return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From b588300801f3502a7de5ca897af68019fbb3bc79 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 16 Aug 2017 23:31:22 +1000 Subject: IB/mlx5: use kvmalloc_array for mlx5_ib_wq We observed multiple times on our Lustre OSS servers that when the system memory is fragmented, kmalloc() in create_kernel_qp() could fail order 4/5 allocations while we still have many free pages. Switch to kvmalloc_array() to allow the operation to contine. Signed-off-by: Li Dongyang Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 35 ++++++++++++++++++++--------------- drivers/infiniband/hw/mlx5/srq.c | 4 ++-- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5c7ce9bd466e..e098c97e027a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -965,11 +965,16 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, goto err_free; } - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); - qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); - qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); - qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt, + sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wqe_head), GFP_KERNEL); if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || !qp->sq.w_list || !qp->sq.wqe_head) { @@ -981,11 +986,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, return 0; err_wrid: - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); err_free: @@ -998,11 +1003,11 @@ err_buf: static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); mlx5_buf_free(dev->mdev, &qp->buf); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 43707b101f47..30b3ddd8e1ab 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -196,7 +196,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } mlx5_fill_page_array(&srq->buf, in->pas); - srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { err = -ENOMEM; goto err_in; @@ -230,7 +230,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); mlx5_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } -- cgit v1.2.3-59-g8ed1b From e9105cdefbf64cd7aea300f934c92051e7cb7cff Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 16 Aug 2017 23:31:23 +1000 Subject: IB/mlx4: use kvmalloc_array to allocate wrid We could use kvmalloc_array instead of the kmalloc and __vmalloc combination. After this we don't need to include linux/vmalloc.h Signed-off-by: Li Dongyang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 15 ++++----------- drivers/infiniband/hw/mlx4/srq.c | 13 ++++--------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e42acfb20588..b40d50c643e1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -1174,16 +1173,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); - if (!qp->sq.wrid) - qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); - qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); - if (!qp->rq.wrid) - qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); + qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(u64), GFP_KERNEL); + qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt, + sizeof(u64), GFP_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 0facaf5f6d23..dd7a2fce9df4 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "mlx4_ib.h" #include @@ -171,15 +170,11 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; - srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); + srq->wrid = kvmalloc_array(srq->msrq.max, + sizeof(u64), GFP_KERNEL); if (!srq->wrid) { - srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); - if (!srq->wrid) { - err = -ENOMEM; - goto err_mtt; - } + err = -ENOMEM; + goto err_mtt; } } -- cgit v1.2.3-59-g8ed1b From fba02e6cb775078bc0bdf207197e0a44b1f85aca Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:34 +0300 Subject: RDMA/mlx4: Don't use uninitialized variable Avoid usage of uninitialized variable. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b40d50c643e1..f7ee1792ba6d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2027,8 +2027,8 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) */ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num) { + int err = 0; int i; - int err; for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; -- cgit v1.2.3-59-g8ed1b From 31f97bad06446ca2d929a363a4b5cd494f276ba5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:40 +0300 Subject: RDMA/mlx4: Remove gfp_mask argument from acquire_group call All callers of acquire_group() passed the same gfp_mask to it and it is safe to remove it. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index b73f89700ef9..70eb9f917303 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -808,8 +808,7 @@ static ssize_t sysfs_show_group(struct device *dev, struct device_attribute *attr, char *buf); static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, - union ib_gid *mgid, int create, - gfp_t gfp_mask) + union ib_gid *mgid, int create) { struct mcast_group *group, *cur_group; int is_mgid0; @@ -825,7 +824,7 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, if (!create) return ERR_PTR(-ENOENT); - group = kzalloc(sizeof *group, gfp_mask); + group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -892,7 +891,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, case IB_MGMT_METHOD_GET_RESP: case IB_SA_METHOD_DELETE_RESP: mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL); + group = acquire_group(ctx, &rec->mgid, 0); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) { @@ -954,7 +953,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, req->sa_mad = *sa_mad; mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL); + group = acquire_group(ctx, &rec->mgid, may_create); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { kfree(req); -- cgit v1.2.3-59-g8ed1b From 5d50f400e56fbc7a14ef3f8d42ba47710e455881 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:41 +0300 Subject: RDMA/usnic: Fix remove address space warning Sparse tool complains with the following error: drivers/infiniband/hw/usnic/usnic_ib_main.c:445:16: warning: cast removes address space of expression Fix it by doing casting on correct field and convert function helper which sets ifaddr to be void, because there are no users who are interested in returned value. Fixes: c7845bcafe4d ("IB/usnic: Add UDP support in u*verbs.c, u*main.c and u*util.h") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_fwd.c | 12 ++---------- drivers/infiniband/hw/usnic/usnic_fwd.h | 2 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 10 ++++++---- 3 files changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 3c37dd59c04e..995a26b65156 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -110,20 +110,12 @@ void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) spin_unlock(&ufdev->lock); } -int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) +void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) { - int status; - spin_lock(&ufdev->lock); - if (ufdev->inaddr == 0) { + if (!ufdev->inaddr) ufdev->inaddr = inaddr; - status = 0; - } else { - status = -EFAULT; - } spin_unlock(&ufdev->lock); - - return status; } void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index b2ac22be0731..0b2cc4e79707 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -75,7 +75,7 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); -int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); +void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index e86700f994cb..f45e99a938e0 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -351,7 +351,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) { struct usnic_ib_dev *us_ibdev; union ib_gid gid; - struct in_ifaddr *in; + struct in_device *ind; struct net_device *netdev; usnic_dbg("\n"); @@ -441,9 +441,11 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (netif_carrier_ok(us_ibdev->netdev)) usnic_fwd_carrier_up(us_ibdev->ufdev); - in = ((struct in_device *)(netdev->ip_ptr))->ifa_list; - if (in != NULL) - usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address); + ind = in_dev_get(netdev); + if (ind->ifa_list) + usnic_fwd_add_ipaddr(us_ibdev->ufdev, + ind->ifa_list->ifa_address); + in_dev_put(ind); usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, us_ibdev->ufdev->inaddr, &gid.raw[0]); -- cgit v1.2.3-59-g8ed1b From 623248955d985b86cd778c63efe6a7ad03a018c4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:42 +0300 Subject: RDMA/mthca: Make explicit conversion to 64bit value The "lg" variable is declared as int so in all places where this variable is used as a shift operand, the output will be int too. This produces the following smatch warning: drivers/infiniband/hw/mthca/mthca_cmd.c:701 mthca_map_cmd() warn: should '1 << lg' be a 64 bit type? Simple declaration of "1" to be "1ULL" will fix the issue. Signed-off-by: Leon Romanovsky Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index a1900d2a371b..419a2a20c047 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -698,7 +698,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); - virt += 1 << lg; + virt += 1ULL << lg; } pages[nent * 2 + 1] = -- cgit v1.2.3-59-g8ed1b From faa9141c2268ccbd469bd876ba97e98b38f50fae Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Thu, 17 Aug 2017 15:50:43 +0300 Subject: IB/mlx4: Fix some spelling mistakes Fix spelling mistakes in remarks "retrun"->"return" "cancell"->"cancel" Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0e4f60cfc59d..155b4dfc0ae8 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -781,7 +781,7 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) { - /* If there is pending one should cancell then run, otherwise + /* If there is pending one should cancel then run, otherwise * won't run till previous one is ended as same work * struct is used. */ diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 95382faa7ad1..cab796341697 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -637,7 +637,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, struct mlx4_ib_qp *qp; *npolled = 0; - /* Find uncompleted WQEs belonging to that cq and retrun + /* Find uncompleted WQEs belonging to that cq and return * simulated FLUSH_ERR completions */ list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { -- cgit v1.2.3-59-g8ed1b From 4edf8d5caf52968f3ebb58140835cfa1cfd72a2d Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Thu, 17 Aug 2017 15:50:44 +0300 Subject: IB/mlx5: Fix some spelling mistakes Fix spelling mistakes in remarks "retrun"->"return" "Decalring"->"Declaring" Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/hw/mlx5/mad.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index c155df465c44..2aa53f427685 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -499,7 +499,7 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, struct mlx5_ib_qp *qp; *npolled = 0; - /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { sw_send_comp(qp, num_entries, wc + *npolled, npolled); if (*npolled >= num_entries) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 18cfe5bf0fa3..1003b0133a49 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -204,7 +204,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, int err; void *out_cnt; - /* Decalring support of extended counters */ + /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; -- cgit v1.2.3-59-g8ed1b From 4a5fd5d2965cb889f30ad94a3bfd83da70aa2c9c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 17 Aug 2017 15:50:45 +0300 Subject: IB/mlx5: Add necessary delay drop assignment Assign the statistics and configuration structure pointer on success. Fixes: fe248c3a5837 ('IB/mlx5: Add delay drop configuration and statistics') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 55045e204518..1a67668f092f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3793,6 +3793,8 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) if (!dbg->timeout_debugfs) goto out_debugfs; + dev->delay_drop.dbg = dbg; + return 0; out_debugfs: -- cgit v1.2.3-59-g8ed1b From c3f1ee292f23cf49b43c0e9cf8c52bec821b6649 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:46 +0300 Subject: IB/mlx4: Fix RSS QP type in creation verb The mlx4 was designed to support QP type of MLX4_IB_QPT_RAW_PACKET. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f7ee1792ba6d..0d2923c2225f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -748,7 +748,7 @@ static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); - qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE; + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; qp->state = IB_QPS_RESET; /* Set dummy send resources to be compatible with HV and PRM */ -- cgit v1.2.3-59-g8ed1b From 078b3573030346df0cdc46d798c0f434dc53c2cc Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:47 +0300 Subject: IB/mlx4: Fix struct mlx4_ib_create_wq alignment The mlx4 ABI defines to have structures with alignment of 64B. Fixes: 400b1ebcfe31 ("IB/mlx4: Add support for WQ related verbs") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 9 ++++----- include/uapi/rdma/mlx4-abi.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0d2923c2225f..c3958fcfed75 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1046,9 +1046,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (src == MLX4_IB_RWQ_SRC) { - if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || - ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || - ucmd.wq.reserved[2]) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] || + ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) { pr_debug("user command isn't supported\n"); err = -EOPNOTSUPP; goto err; @@ -4146,8 +4145,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!(udata && pd->uobject)) return ERR_PTR(-EINVAL); - required_cmd_sz = offsetof(typeof(ucmd), reserved) + - sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + + sizeof(ucmd.comp_mask); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); return ERR_PTR(-EINVAL); diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 21cce1a4c4dd..0e10102861b5 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -121,7 +121,6 @@ struct mlx4_ib_create_wq { __u8 log_range_size; __u8 reserved[3]; __u32 comp_mask; - __u32 reserved1; }; struct mlx4_ib_modify_wq { -- cgit v1.2.3-59-g8ed1b From f9bfea992e7d7557c40a58e8536ad8c6f177de25 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:49 +0300 Subject: IB/mlx4: Check that reserved fields in mlx4_ib_create_qp_rss are zero According to mlx4 convention, need to fail the command due to a non-zero value in the user data which is expected to be zero. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c3958fcfed75..1ac148ddb250 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -812,6 +812,9 @@ static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, return ERR_PTR(-EFAULT); } + if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved))) + return ERR_PTR(-EOPNOTSUPP); + if (ucmd.comp_mask || ucmd.reserved1) return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3-59-g8ed1b From 84305d71d6ca9a2e6a33d9b403639f1cdeeee846 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:53 +0300 Subject: RDMA/mlx5: Limit scope of get vector affinity local function The mlx5_ib_get_vector_affinity() call is local to main.c file and there is no need to be declared globally visible. Fixes: 40b24403f33e ("mlx5: support ->get_vector_affinity") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1a67668f092f..762ef6bf219e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3819,8 +3819,8 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) +static const struct cpumask * +mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); -- cgit v1.2.3-59-g8ed1b From 17bf1ad2e80db7c9bdafec9c2da72389247194e9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:54 +0300 Subject: RDMA/mlx4: Properly annotate link layer variable The rdma_port_get_link_layer() returns enum rdma_link_layer as a return value, hence it is better to store the return value in specially annotated variable and not in int. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 1ac148ddb250..2747abde2ea8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2725,19 +2725,17 @@ enum { static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ll = IB_LINK_LAYER_UNSPECIFIED; - } else { + if (cur_state != new_state || cur_state != IB_QPS_RESET) { int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; ll = rdma_port_get_link_layer(&dev->ib_dev, port); } -- cgit v1.2.3-59-g8ed1b From 4b7ee6781f6aab3d236021af36aa4c95a089f402 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:55 +0300 Subject: RDMA/nes: Remove zeroed parameter from port query callback There is no need to explicitly zero parameters, because the structure requested to be filled already initialized to zeros. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_verbs.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index c2943e39d2f9..f0dc5f4aa177 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -481,21 +481,16 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); props->lid = 1; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; if (netif_queue_stopped(netdev)) props->state = IB_PORT_DOWN; else if (nesvnic->linkup) props->state = IB_PORT_ACTIVE; else props->state = IB_PORT_DOWN; - props->phys_state = 0; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_SDR; props->max_msg_sz = 0x80000000; -- cgit v1.2.3-59-g8ed1b From 05297b66ad874f6b650498a39af5a4e353e5ba19 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Tue, 22 Aug 2017 23:19:00 -0700 Subject: RDMA/vmw_pvrdma: Add RoCEv2 support The driver version is bumped for compatibility purposes. Also, send correct GID type during register to device. Added compatibility check macros for the device. Reviewed-by: Jorgen Hansen Reviewed-by: Aditya Sarwade Signed-off-by: Bryan Tan Signed-off-by: Adit Ranadive Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 2 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 28 +++++++++++++++++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 36 ++++++++++------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c | 7 +++++ 4 files changed, 51 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 8e2f0a11690f..663a0c301c43 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -194,6 +194,7 @@ struct pvrdma_dev { void *resp_slot; unsigned long flags; struct list_head device_link; + unsigned int dsr_version; /* Locking and interrupt information. */ spinlock_t cmd_lock; /* Command lock. */ @@ -444,6 +445,7 @@ void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst, const struct pvrdma_ah_attr *src); void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, const struct rdma_ah_attr *src); +u8 ib_gid_type_to_pvrdma(enum ib_gid_type gid_type); int pvrdma_uar_table_init(struct pvrdma_dev *dev); void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 09078ccfaec7..3a308ffe00bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -50,7 +50,15 @@ #include "pvrdma_verbs.h" -#define PVRDMA_VERSION 17 +/* + * PVRDMA version macros. Some new features require updates to PVRDMA_VERSION. + * These macros allow us to check for different features if necessary. + */ + +#define PVRDMA_ROCEV1_VERSION 17 +#define PVRDMA_ROCEV2_VERSION 18 +#define PVRDMA_VERSION PVRDMA_ROCEV2_VERSION + #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -123,6 +131,24 @@ #define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0) #define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1) +/* + * Version checks. This checks whether each version supports specific + * capabilities from the device. + */ + +#define PVRDMA_IS_VERSION17(_dev) \ + (_dev->dsr_version == PVRDMA_ROCEV1_VERSION && \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) + +#define PVRDMA_IS_VERSION18(_dev) \ + (_dev->dsr_version >= PVRDMA_ROCEV2_VERSION && \ + (_dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1 || \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2)) \ + +#define PVRDMA_SUPPORTED(_dev) \ + ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) && \ + (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev))) + enum pvrdma_pci_resource { PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 5b00156dff30..6ce709a67959 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -128,10 +128,14 @@ static int pvrdma_init_device(struct pvrdma_dev *dev) static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { + struct pvrdma_dev *dev = to_vdev(ibdev); struct ib_port_attr attr; int err; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; + else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; err = ib_query_port(ibdev, port_num, &attr); if (err) @@ -569,6 +573,7 @@ static void pvrdma_free_slots(struct pvrdma_dev *dev) static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, const union ib_gid *gid, + u8 gid_type, int index) { int ret; @@ -586,7 +591,7 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); cmd_bind->vlan = 0xfff; cmd_bind->index = index; - cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; + cmd_bind->gid_type = gid_type; ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) { @@ -607,7 +612,9 @@ static int pvrdma_add_gid(struct ib_device *ibdev, { struct pvrdma_dev *dev = to_vdev(ibdev); - return pvrdma_add_gid_at_index(dev, gid, index); + return pvrdma_add_gid_at_index(dev, gid, + ib_gid_type_to_pvrdma(attr->gid_type), + index); } static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) @@ -722,7 +729,6 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, int ret; unsigned long start; unsigned long len; - unsigned int version; dma_addr_t slot_dma = 0; dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); @@ -819,13 +825,9 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, goto err_unmap_regs; } - version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); + dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); dev_info(&pdev->dev, "device version %d, driver version %d\n", - version, PVRDMA_VERSION); - if (version < PVRDMA_VERSION) { - dev_err(&pdev->dev, "incompatible device version\n"); - goto err_uar_unmap; - } + dev->dsr_version, PVRDMA_VERSION); dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), &dev->dsrbase, GFP_KERNEL); @@ -896,17 +898,9 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, /* Make sure the write is complete before reading status. */ mb(); - /* Currently, the driver only supports RoCE mode. */ - if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { - dev_err(&pdev->dev, "unsupported transport %d\n", - dev->dsr->caps.mode); - ret = -EFAULT; - goto err_free_cq_ring; - } - - /* Currently, the driver only supports RoCE V1. */ - if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { - dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); + /* The driver supports RoCE V1 and V2. */ + if (!PVRDMA_SUPPORTED(dev)) { + dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); ret = -EFAULT; goto err_free_cq_ring; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index ec6a4ca1eeb7..fb0c5c0976b3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -303,3 +303,10 @@ void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, dst->port_num = rdma_ah_get_port_num(src); memcpy(&dst->dmac, src->roce.dmac, sizeof(dst->dmac)); } + +u8 ib_gid_type_to_pvrdma(enum ib_gid_type gid_type) +{ + return (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? + PVRDMA_GID_TYPE_FLAG_ROCE_V2 : + PVRDMA_GID_TYPE_FLAG_ROCE_V1; +} -- cgit v1.2.3-59-g8ed1b From a31a2a3b27f18bce0830581057a3fff75b830924 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Tue, 22 Aug 2017 23:19:01 -0700 Subject: RDMA/vmw_pvrdma: Update device query parameters and port caps Added support for two device caps - max_sge_rd, max_fast_reg_page_list_len and the IP_BASED_GIDS port cap flag. Reviewed-by: Jorgen Hansen Reviewed-by: Bryan Tan Reviewed-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 9 ++++++++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 3a308ffe00bd..df0a6b525021 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -149,6 +149,13 @@ ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) && \ (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev))) +/* + * Get capability values based on device version. + */ + +#define PVRDMA_GET_CAP(_dev, _old_val, _val) \ + ((PVRDMA_IS_VERSION18(_dev)) ? _val : _old_val) + enum pvrdma_pci_resource { PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ @@ -251,7 +258,7 @@ struct pvrdma_device_caps { u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */ u8 bmme_flags; /* FRWR Mem Mgmt Extensions */ u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */ - u8 reserved[4]; + u32 max_fast_reg_page_list_len; }; struct pvrdma_ring_page_info { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 28517042011d..48776f5ffb0e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -83,6 +83,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; props->max_sge = dev->dsr->caps.max_sge; + props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, + dev->dsr->caps.max_sge_rd); props->max_cq = dev->dsr->caps.max_cq; props->max_cqe = dev->dsr->caps.max_cqe; props->max_mr = dev->dsr->caps.max_mr; @@ -101,8 +103,14 @@ int pvrdma_query_device(struct ib_device *ibdev, (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev, + PVRDMA_MAX_FAST_REG_PAGES, + dev->dsr->caps.max_fast_reg_page_list_len); } + props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_RC_RNR_NAK_GEN; + return 0; } @@ -143,6 +151,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); + props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; -- cgit v1.2.3-59-g8ed1b From 847cb1a3566ca5b47b94a8e90f881b533f35cf07 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Aug 2017 09:25:53 +0100 Subject: RDMA/qedr: fix spelling mistake: "invlaid" -> "invalid" Trivial fix to spelling mistake in DP_ERR error message Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Reviewed-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9ee2dce3e5bb..769ac07c3c8e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -491,7 +491,7 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, (udata && context) ? "User Lib" : "Kernel"); if (!dev->rdma_ctx) { - DP_ERR(dev, "invlaid RDMA context\n"); + DP_ERR(dev, "invalid RDMA context\n"); return ERR_PTR(-EINVAL); } -- cgit v1.2.3-59-g8ed1b From accbef5cc624be745c1de903dd3a05681aaa0ac1 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Thu, 24 Aug 2017 20:11:42 +0300 Subject: RDMA/i40iw: Remove unused argument None of the calls to i40iw_netdev_vlan_ipv6 are using mac so let's remove it from func's args-list. Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index a2b135039e5a..14f36ba4e5be 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1582,15 +1582,14 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( } /** - * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac + * i40iw_netdev_vlan_ipv6 - Gets the netdev and vlan * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address - * @mac: mac address for the given IPv6 address * * Returns the net_device of the IPv6 address and also sets the - * vlan id and mac for that address. + * vlan id for that address. */ -static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) +static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id) { struct net_device *ip_dev = NULL; struct in6_addr laddr6; @@ -1600,15 +1599,11 @@ static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *ma i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) *vlan_id = I40IW_NO_VLAN; - if (mac) - eth_zero_addr(mac); rcu_read_lock(); for_each_netdev_rcu(&init_net, ip_dev) { if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) { if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(ip_dev); - if (ip_dev->dev_addr && mac) - ether_addr_copy(mac, ip_dev->dev_addr); break; } } @@ -3588,7 +3583,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; - i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL); + i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id); } i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, @@ -3787,7 +3782,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) raddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); - i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); + i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id); } cm_info.cm_id = cm_id; cm_info.tos = cm_id->tos; @@ -3929,8 +3924,7 @@ int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_info.loc_port = ntohs(laddr6->sin6_port); if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) i40iw_netdev_vlan_ipv6(cm_info.loc_addr, - &cm_info.vlan_id, - NULL); + &cm_info.vlan_id); else wildcard = true; } -- cgit v1.2.3-59-g8ed1b From 96dc3fc5f1d66b20cdf839d571c7b907e08d5d00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 17 Aug 2017 15:52:28 +0300 Subject: IB/mlx5: Expose software parsing for Raw Ethernet QP Software parsing (SWP) is a feature that can be used to instruct the device to stop using its internal parser and to parse packets on the transmit path according to offsets set for each packets. Through this feature, the device allows the handling of checksum and LSO by the hardware according to the location of IP and TCP/UDP headers. Enable SW parsing on Raw Ethernet send queue by default if firmware supports it and report these capabilities to user space. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 762ef6bf219e..07789450ec42 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -811,6 +811,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), reserved, uhw->outlen)) resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), sw_parsing_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.sw_parsing_caps); + if (MLX5_CAP_ETH(mdev, swp)) { + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING; + + if (MLX5_CAP_ETH(mdev, swp_csum)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_CSUM; + + if (MLX5_CAP_ETH(mdev, swp_lso)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_LSO; + + if (resp.sw_parsing_caps.sw_parsing_offloads) + resp.sw_parsing_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bc49d14e0a00..656773196f27 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1088,6 +1088,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, swp)) + MLX5_SET(sqc, sqc, allow_swp, 1); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0b3d30837a9f..64d398e662cd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,22 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_sw_parsing_offloads { + MLX5_IB_SW_PARSING = 1 << 0, + MLX5_IB_SW_PARSING_CSUM = 1 << 1, + MLX5_IB_SW_PARSING_LSO = 1 << 2, +}; + +struct mlx5_ib_sw_parsing_caps { + __u32 sw_parsing_offloads; /* enum mlx5_ib_sw_parsing_offloads */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -177,6 +193,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; + struct mlx5_ib_sw_parsing_caps sw_parsing_caps; }; struct mlx5_ib_create_cq { -- cgit v1.2.3-59-g8ed1b From 8b7ff7f3b301de52924cb2cf3fed47b181893116 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:29 +0300 Subject: IB/mlx5: Enable UMR for MRs created with reg_create This patch is the first step in decoupling UMR usage and allocation from the MR cache. The only functional change in this patch is to enables UMR for MRs created with reg_create. This change fixes a bug where ODP memory regions that were not allocated from the MR cache did not have UMR enabled. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 33 ++++++++++++++------------------- include/linux/mlx5/driver.h | 2 +- 3 files changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7ac991070020..b3380e8beacf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -503,7 +503,7 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; - int umred; + bool allocated_from_cache; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a0eb2f96179a..bc87016021e3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,8 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); -static int max_umr_order(struct mlx5_ib_dev *dev); -static int use_umr(struct mlx5_ib_dev *dev, int order); +static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -184,7 +183,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->umred = 1; + mr->allocated_from_cache = 1; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -497,7 +496,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) int i; c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, max_umr_order(dev)); + last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; @@ -677,12 +676,12 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); queue_work(cache->wq, &ent->work); - if (i > MAX_UMR_CACHE_ENTRY) { + if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); continue; } - if (!use_umr(dev, ent->order)) + if (ent->order > mr_cache_max_order(dev)) continue; ent->page = PAGE_SHIFT; @@ -819,18 +818,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int max_umr_order(struct mlx5_ib_dev *dev) +static int mr_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MAX_UMR_CACHE_ENTRY + 2; + return MR_CACHE_LAST_STD_ENTRY + 2; return MLX5_MAX_UMR_SHIFT; } -static int use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= max_umr_order(dev); -} - static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int access_flags, struct ib_umem **umem, int *npages, int *page_shift, int *ncont, @@ -1149,6 +1143,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); @@ -1231,7 +1226,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { + if (order <= mr_cache_max_order(dev)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { @@ -1355,7 +1350,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->umred) { + if (mr->allocated_from_cache) { err = unreg_umr(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to unregister MR\n"); @@ -1373,7 +1368,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (IS_ERR(mr)) return PTR_ERR(mr); - mr->umred = 0; + mr->allocated_from_cache = 0; } else { /* * Send a UMR WQE @@ -1461,7 +1456,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int umred = mr->umred; + int allocated_from_cache = mr->allocated_from_cache; int err; if (mr->sig) { @@ -1479,7 +1474,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!umred) { + if (!allocated_from_cache) { err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", @@ -1490,7 +1485,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_mr_cache_free(dev, mr); } - if (!umred) + if (!allocated_from_cache) kfree(mr); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index db40bc4055c7..99d88624ad07 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,7 +1093,7 @@ enum { }; enum { - MAX_UMR_CACHE_ENTRY = 20, + MR_CACHE_LAST_STD_ENTRY = 20, MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES -- cgit v1.2.3-59-g8ed1b From ff740aefecb98da4605df1cada7904c44eaee161 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:30 +0300 Subject: IB/mlx5: Decouple MR allocation and population flows mlx5 compatible devices have two ways of populating the MTT table of an MKEY: using a FW command and using a UMR WQE. A UMR is much faster, so it should be used whenever possible. Unfortunately the code today uses UMR only if the MKEY was allocated from the MR cache. Fix the code to use UMR even for MKEYs that were allocated using a FW command. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 75 ++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bc87016021e3..aa6f71570b77 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -898,7 +898,8 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, +static struct mlx5_ib_mr *alloc_mr_from_cache( + struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, u64 len, int npages, int page_shift, int order, int access_flags) { @@ -930,16 +931,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->mmkey.size = len; mr->mmkey.pd = to_mpd(pd)->pdn; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - MLX5_IB_UPD_XLT_ENABLE); - - if (err) { - mlx5_mr_cache_free(dev, mr); - return ERR_PTR(err); - } - - mr->live = 1; - return mr; } @@ -1105,7 +1096,8 @@ free_xlt: static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, u64 virt_addr, u64 length, struct ib_umem *umem, int npages, - int page_shift, int access_flags) + int page_shift, int access_flags, + bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1120,15 +1112,19 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + - sizeof(*pas) * ((npages + 1) / 2) * 2; + mr->ibmr.pd = pd; + mr->access_flags = access_flags; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + if (populate) + inlen += sizeof(*pas) * roundup(npages, 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (!(access_flags & IB_ACCESS_ON_DEMAND)) + if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); @@ -1137,6 +1133,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); @@ -1153,8 +1150,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, 1 << page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + if (populate) { + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + } err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { @@ -1163,9 +1162,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->umem = umem; mr->dev = dev; - mr->live = 1; kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1205,6 +1202,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ncont; int order; int err; + bool use_umr = true; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); @@ -1223,27 +1221,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (err < 0) + if (err < 0) return ERR_PTR(err); if (order <= mr_cache_max_order(dev)) { - mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, - order, access_flags); + mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, + page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } - } else if (access_flags & IB_ACCESS_ON_DEMAND && - !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); - goto error; + } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { + if (access_flags & IB_ACCESS_ON_DEMAND) { + err = -EINVAL; + pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); + goto error; + } + use_umr = false; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1261,8 +1261,22 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); #endif - return &mr->ibmr; + if (use_umr) { + int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; + + if (access_flags & IB_ACCESS_ON_DEMAND) + update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; + err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, + update_xlt_flags); + if (err) { + mlx5_ib_dereg_mr(&mr->ibmr); + return ERR_PTR(err); + } + } + + mr->live = 1; + return &mr->ibmr; error: ib_umem_release(umem); return ERR_PTR(err); @@ -1363,12 +1377,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags); + page_shift, access_flags, true); if (IS_ERR(mr)) return PTR_ERR(mr); mr->allocated_from_cache = 0; + mr->live = 1; } else { /* * Send a UMR WQE -- cgit v1.2.3-59-g8ed1b From 5942d8ae411775b76e5e1ab0cce57b0666516f2d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 17 Aug 2017 15:52:31 +0300 Subject: IB/mlx5: Fix memory leak in clean_mr error path In clean_mr error path the 'mr' should be freed. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Kamal Heib Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index aa6f71570b77..8cf53eb13148 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1490,19 +1490,19 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); if (!allocated_from_cache) { + u32 key = mr->mmkey.key; + err = destroy_mkey(dev, mr); + kfree(mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmkey.key, err); + key, err); return err; } } else { mlx5_mr_cache_free(dev, mr); } - if (!allocated_from_cache) - kfree(mr); - return 0; } -- cgit v1.2.3-59-g8ed1b From 7b4cdaae73ee833975a767cf54a3354d355b3f8d Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:32 +0300 Subject: IB/mlx5: Fix integer overflow when page_shift == 31 Fix a bug where MR registration fails when mlx5_ib_cont_pages indicates that the MR can be mapped using 2GB pages (page_shift == 31). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf53eb13148..0e2789d9bb4d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -808,13 +808,14 @@ err_free: return ERR_PTR(err); } -static int get_octo_len(u64 addr, u64 len, int page_size) +static int get_octo_len(u64 addr, u64 len, int page_shift) { + u64 page_size = 1ULL << page_shift; u64 offset; int npages; offset = addr & (page_size - 1); - npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + npages = ALIGN(len + offset, page_size) >> page_shift; return (npages + 1) / 2; } @@ -1147,12 +1148,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + get_octo_len(virt_addr, length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + get_octo_len(virt_addr, length, page_shift)); } err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); -- cgit v1.2.3-59-g8ed1b From a550ddfc543e250798048cf4eabe721cd85ac724 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Aug 2017 15:52:33 +0300 Subject: IB/mlx5: Add support for multi underlay QP Set underlay QPN as part of flow rule when it's applicable. There is one root flow table in the NIC RX namespace and all the underlay QPs steer the traffic to this flow table. In order to prevent QP to get traffic which is not target to its underlay QP, we need to set the underlay QP number as part of the steering matching. Note: When multicast traffic is sent the QPN filtering is done by the firmware as some early step. Adding the QPN match on the flow table entry is wrong as by that time the target QPN holds the multicast address (e.g. FF(s)) and it won't match. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 49 +++++++++++++++++++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 8 +++++-- 2 files changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 07789450ec42..8f7e46090f9a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2125,7 +2125,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, * it won't fall into the multicast flow steering table and this rule * could steal other multicast packets. */ -static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) { union ib_flow_spec *flow_spec; @@ -2337,10 +2337,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return err ? ERR_PTR(err) : prio; } -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; @@ -2376,6 +2397,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); if (is_drop) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; @@ -2415,6 +2439,14 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); +} + static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, @@ -2551,6 +2583,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; + int underlay_qpn; if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -2591,8 +2624,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? + mqp->underlay_qpn : 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, + dst, underlay_qpn); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5bae70eb25af..6563500c85de 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -295,8 +295,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; + u8 reserved_at_40[0x1a]; + u8 bth_dst_qp[0x1]; - u8 reserved_at_40[0x40]; + u8 reserved_at_5b[0x25]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -432,7 +434,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_at_120[0xe0]; + u8 reserved_at_120[0x28]; + u8 bth_dst_qp[0x18]; + u8 reserved_at_160[0xa0]; }; struct mlx5_ifc_cmd_pas_bits { -- cgit v1.2.3-59-g8ed1b From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:34 +0300 Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports Set the field to allow posting multi packet send WQEs if hardware supports this feature. This doesn't mean the send WQEs will be for multi packet unless the send WQE was prepared according to multi packet send WQE format. User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware supports MPW and allows MPW in SQ context. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++-- drivers/infiniband/hw/mlx5/qp.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 5 +++++ 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8f7e46090f9a..ba0a97d6f677 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -802,8 +802,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, uhw->outlen)) { - resp.mlx5_ib_support_multi_pkt_send_wqes = - MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_IB_ALLOW_MPW; resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 656773196f27..d6df88a78d5e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1083,6 +1083,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) + MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6563500c85de..6865e60ba473 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2445,7 +2445,7 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x1]; + u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 64d398e662cd..a61a512e5747 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + MLX5_IB_ALLOW_MPW = 1 << 1, +}; + enum mlx5_ib_sw_parsing_offloads { MLX5_IB_SW_PARSING = 1 << 0, MLX5_IB_SW_PARSING_CSUM = 1 << 1, -- cgit v1.2.3-59-g8ed1b From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:35 +0300 Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability Expose enhanced multi packet WQE capability to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba0a97d6f677..62e6298810e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -805,6 +805,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; + + if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes |= + MLX5_IB_SUPPORT_EMPW; + resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6865e60ba473..4eff0b8a1482 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -604,7 +604,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; - u8 reserved_at_1a[0x1]; + u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a61a512e5747..1791bf123ba9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps { enum mlx5_ib_mpw_caps { MPW_RESERVED = 1 << 0, MLX5_IB_ALLOW_MPW = 1 << 1, + MLX5_IB_SUPPORT_EMPW = 1 << 2, }; enum mlx5_ib_sw_parsing_offloads { -- cgit v1.2.3-59-g8ed1b From 5b0ef650bd0f820e922fcc42f1985d4621ae19cf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Aug 2017 18:26:20 -0700 Subject: IB/{qib, hfi1}: Avoid flow control testing for RDMA write operation Section 9.7.7.2.5 of the 1.3 IBTA spec clearly says that receive credits should never apply to RDMA write. qib and hfi1 were doing that. The following situation will result in a QP hang: - A prior SEND or RDMA_WRITE with immmediate consumed the last credit for a QP using RC receive buffer credits - The prior op is acked so there are no more acks - The peer ULP fails to post receive for some reason - An RDMA write sees that the credits are exhausted and waits - The peer ULP posts receive buffers - The ULP posts a send or RDMA write that will be hung The fix is to avoid the credit test for the RDMA write operation. Cc: Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 3 ++- drivers/infiniband/hw/qib/qib_rc.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 99defcc0ce45..e1cf0c08ca6f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -442,7 +442,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -450,6 +450,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } +no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, &ohdr->u.rc.reth); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 4ddbcac5eabe..e9a91736b12d 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -348,7 +348,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -356,7 +356,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - +no_flow_control: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = -- cgit v1.2.3-59-g8ed1b From 3b7169338c1caad4d17ed5d9ed412f813c919cd2 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:26:26 -0700 Subject: IB/qib: Remove unnecessary memory allocation for boardname Remove all the memory allocation implemented for boardname and directly assign the defined string literal. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba6120.c | 20 ++++++----------- drivers/infiniband/hw/qib/qib_iba7220.c | 22 +++++++----------- drivers/infiniband/hw/qib/qib_iba7322.c | 40 ++++++++++++++------------------- drivers/infiniband/hw/qib/qib_init.c | 1 - 4 files changed, 32 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 46045fc28fa0..3259a60e4f4f 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1742,38 +1742,32 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) */ static void pe_boardname(struct qib_devdata *dd) { - char *n; - u32 boardid, namelen; + u32 boardid; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 2: - n = "InfiniPath_QLE7140"; + dd->boardname = "InfiniPath_QLE7140"; break; default: qib_dev_err(dd, "Unknown 6120 board with ID %u\n", boardid); - n = "Unknown_InfiniPath_6120"; + dd->boardname = "Unknown_InfiniPath_6120"; break; } - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) qib_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->majrev, dd->minrev); + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); - + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); } /* diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 49cd6e3beb72..04bdd3d487b1 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -2050,41 +2050,35 @@ static void qib_setup_7220_interrupt(struct qib_devdata *dd) */ static void qib_7220_boardname(struct qib_devdata *dd) { - char *n; - u32 boardid, namelen; + u32 boardid; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 1: - n = "InfiniPath_QLE7240"; + dd->boardname = "InfiniPath_QLE7240"; break; case 2: - n = "InfiniPath_QLE7280"; + dd->boardname = "InfiniPath_QLE7280"; break; default: qib_dev_err(dd, "Unknown 7220 board with ID %u\n", boardid); - n = "Unknown_InfiniPath_7220"; + dd->boardname = "Unknown_InfiniPath_7220"; break; } - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); - if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) qib_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->majrev, dd->minrev); + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); } /* diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 2653064ce9e9..92ae68c8e76f 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3582,75 +3582,69 @@ bail:; static unsigned qib_7322_boardname(struct qib_devdata *dd) { /* Will need enumeration of board-types here */ - char *n; - u32 boardid, namelen; - unsigned features = DUAL_PORT_CAP; + u32 boardid; + unsigned int features = DUAL_PORT_CAP; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 0: - n = "InfiniPath_QLE7342_Emulation"; + dd->boardname = "InfiniPath_QLE7342_Emulation"; break; case 1: - n = "InfiniPath_QLE7340"; + dd->boardname = "InfiniPath_QLE7340"; dd->flags |= QIB_HAS_QSFP; features = PORT_SPD_CAP; break; case 2: - n = "InfiniPath_QLE7342"; + dd->boardname = "InfiniPath_QLE7342"; dd->flags |= QIB_HAS_QSFP; break; case 3: - n = "InfiniPath_QMI7342"; + dd->boardname = "InfiniPath_QMI7342"; break; case 4: - n = "InfiniPath_Unsupported7342"; + dd->boardname = "InfiniPath_Unsupported7342"; qib_dev_err(dd, "Unsupported version of QMH7342\n"); features = 0; break; case BOARD_QMH7342: - n = "InfiniPath_QMH7342"; + dd->boardname = "InfiniPath_QMH7342"; features = 0x24; break; case BOARD_QME7342: - n = "InfiniPath_QME7342"; + dd->boardname = "InfiniPath_QME7342"; break; case 8: - n = "InfiniPath_QME7362"; + dd->boardname = "InfiniPath_QME7362"; dd->flags |= QIB_HAS_QSFP; break; case BOARD_QMH7360: - n = "Intel IB QDR 1P FLR-QSFP Adptr"; + dd->boardname = "Intel IB QDR 1P FLR-QSFP Adptr"; dd->flags |= QIB_HAS_QSFP; break; case 15: - n = "InfiniPath_QLE7342_TEST"; + dd->boardname = "InfiniPath_QLE7342_TEST"; dd->flags |= QIB_HAS_QSFP; break; default: - n = "InfiniPath_QLE73xy_UNKNOWN"; + dd->boardname = "InfiniPath_QLE73xy_UNKNOWN"; qib_dev_err(dd, "Unknown 7322 board type %u\n", boardid); break; } dd->board_atten = 1; /* index into txdds_Xdr */ - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); - snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { qib_devinfo(dd->pcidev, - "IB%u: Forced to single port mode by module parameter\n", - dd->unit); + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index cce4ab468b29..c5a4c65636d6 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1398,7 +1398,6 @@ static void cleanup_device_data(struct qib_devdata *dd) qib_free_ctxtdata(dd, rcd); } kfree(tmp); - kfree(dd->boardname); } /* -- cgit v1.2.3-59-g8ed1b From 27147273a68ecf5738ef87ccc6cd4bb8883fca84 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:26:32 -0700 Subject: IB/qib: Stricter bounds checking for copy and array access Added checking on index value of array 'guids' in qib_ruc.c. Pass in corrrect size of array for memset operation in qib_mad.c. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_mad.c | 4 ++-- drivers/infiniband/hw/qib/qib_ruc.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 549c71971a1f..82d9da9b6997 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1573,8 +1573,8 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; - memset(pmp->reserved, 0, sizeof(pmp->reserved) + - sizeof(pmp->data)); + memset(pmp->reserved, 0, sizeof(pmp->reserved)); + memset(pmp->data, 0, sizeof(pmp->data)); /* * Set top 3 bits to indicate interval in picoseconds in diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index e6a42a8972f5..53efbb0b40c4 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -645,8 +645,10 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; - hdr->sgid.global.interface_id = grh->sgid_index ? - ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid; + if (!grh->sgid_index) + hdr->sgid.global.interface_id = ppd_from_ibp(ibp)->guid; + else if (grh->sgid_index < QIB_GUIDS_PER_PORT) + hdr->sgid.global.interface_id = ibp->guids[grh->sgid_index - 1]; hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ -- cgit v1.2.3-59-g8ed1b From de42de80d7bb4f82e30d16f5c64da90e64b6c644 Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Mon, 21 Aug 2017 18:26:38 -0700 Subject: IB/hfi1: Ratelimit prints from sdma_interrupt Ratelimit error prints from sdma_interrupt function that could swarm dmesg otherwise. Reviewed-by: Dennis Dalessandro Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index eb27c7fbfebd..b2ed4b9cda6e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8290,8 +8290,8 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); } else { - dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", - sde->this_idx); + dd_dev_err_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n", + sde->this_idx); } return IRQ_HANDLED; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index eadb735d958c..3ac9c307a285 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2119,9 +2119,15 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) #define dd_dev_emerg(dd, fmt, ...) \ dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) + #define dd_dev_err(dd, fmt, ...) \ dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define dd_dev_err_ratelimited(dd, fmt, ...) \ + dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ + get_unit_name((dd)->unit), ##__VA_ARGS__) + #define dd_dev_warn(dd, fmt, ...) \ dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) -- cgit v1.2.3-59-g8ed1b From 7956371ea48bd00956219a82fd3af655dd216073 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 21 Aug 2017 18:26:45 -0700 Subject: IB/hfi1: Improve local kmem_cache_alloc performance Performance analysis shows that the cache callback function sdma_kmem_cache_ctor contributes to 1/2 of the kmem_cache_allocs time. Since all of the fields in the allocated data structure are initialized in the code path, remove the _ctor function. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 9 +-------- drivers/infiniband/hw/hfi1/verbs_txreq.c | 11 ++--------- drivers/infiniband/hw/hfi1/vnic_sdma.c | 13 +++---------- 3 files changed, 6 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index aae1f40016e4..9b89df584943 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -331,13 +331,6 @@ static void activate_packet_queue(struct iowait *wait, int reason) wake_up(&wait->wait_dma); }; -static void sdma_kmem_cache_ctor(void *obj) -{ - struct user_sdma_txreq *tx = obj; - - memset(tx, 0, sizeof(*tx)); -} - int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd) { @@ -391,7 +384,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, - sdma_kmem_cache_ctor); + NULL); if (!pq->txreq_cache) { dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", uctxt->ctxt); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 5d23172c470f..873e48ea923f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -119,13 +119,6 @@ out: return tx; } -static void verbs_txreq_kmem_cache_ctor(void *obj) -{ - struct verbs_txreq *tx = (struct verbs_txreq *)obj; - - memset(tx, 0, sizeof(*tx)); -} - int verbs_txreq_init(struct hfi1_ibdev *dev) { char buf[TXREQ_LEN]; @@ -135,7 +128,7 @@ int verbs_txreq_init(struct hfi1_ibdev *dev) dev->verbs_txreq_cache = kmem_cache_create(buf, sizeof(struct verbs_txreq), 0, SLAB_HWCACHE_ALIGN, - verbs_txreq_kmem_cache_ctor); + NULL); if (!dev->verbs_txreq_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 7815d7405462..c3c96c5869ed 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -303,22 +303,15 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) } } -static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj) -{ - struct vnic_txreq *tx = (struct vnic_txreq *)obj; - - memset(tx, 0, sizeof(*tx)); -} - int hfi1_vnic_txreq_init(struct hfi1_devdata *dd) { char buf[HFI1_VNIC_TXREQ_NAME_LEN]; snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit); dd->vnic.txreq_cache = kmem_cache_create(buf, - sizeof(struct vnic_txreq), - 0, SLAB_HWCACHE_ALIGN, - hfi1_vnic_txreq_kmem_cache_ctor); + sizeof(struct vnic_txreq), + 0, SLAB_HWCACHE_ALIGN, + NULL); if (!dd->vnic.txreq_cache) return -ENOMEM; return 0; -- cgit v1.2.3-59-g8ed1b From 9dc117095570e7a230f8f9229a70713c335d9e2a Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:26:51 -0700 Subject: IB/hfi1: Clean up hfi1_user_exp_rcv_setup function Clean up hfi1_user_exp_rcv_setup function by moving page pinning and unpinning related code to separate functions. In order to reduce the number of parameters passed between functions, a new data structure struct tid_user_buf is defined and used. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 232 ++++++++++++++++++------------ drivers/infiniband/hw/hfi1/user_exp_rcv.h | 9 ++ 2 files changed, 153 insertions(+), 88 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index d9036ba3f4eb..04be178b72f8 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -75,20 +75,21 @@ struct tid_pageset { static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, struct hfi1_filedata *fd); -static u32 find_phys_blocks(struct page **pages, unsigned npages, - struct tid_pageset *list); -static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, +static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages); +static int set_rcvarray_entry(struct hfi1_filedata *fd, + struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, - struct page **pages, unsigned npages); + u16 pageidx, unsigned int npages); static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); static void tid_rb_remove(void *arg, struct mmu_rb_node *node); static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); -static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, - struct tid_group *grp, struct tid_pageset *sets, - unsigned start, u16 count, struct page **pages, - u32 *tidlist, unsigned *tididx, unsigned *pmapped); +static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, + struct tid_group *grp, + unsigned int start, u16 count, + u32 *tidlist, unsigned int *tididx, + unsigned int *pmapped); static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); @@ -198,6 +199,92 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } +/** + * Release pinned receive buffer pages. + * + * @mapped - true if the pages have been DMA mapped. false otherwise. + * @idx - Index of the first page to unpin. + * @npages - No of pages to unpin. + * + * If the pages have been DMA mapped (indicated by mapped parameter), their + * info will be passed via a struct tid_rb_node. If they haven't been mapped, + * their info will be passed via a struct tid_user_buf. + */ +static void unpin_rcv_pages(struct hfi1_filedata *fd, + struct tid_user_buf *tidbuf, + struct tid_rb_node *node, + unsigned int idx, + unsigned int npages, + bool mapped) +{ + struct page **pages; + struct hfi1_devdata *dd = fd->uctxt->dd; + + if (mapped) { + pci_unmap_single(dd->pcidev, node->dma_addr, + node->mmu.len, PCI_DMA_FROMDEVICE); + pages = &node->pages[idx]; + } else { + pages = &tidbuf->pages[idx]; + } + hfi1_release_user_pages(fd->mm, pages, npages, mapped); + fd->tid_n_pinned -= npages; +} + +/** + * Pin receive buffer pages. + */ +static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) +{ + int pinned; + unsigned int npages; + unsigned long vaddr = tidbuf->vaddr; + struct page **pages = NULL; + struct hfi1_devdata *dd = fd->uctxt->dd; + + /* Get the number of pages the user buffer spans */ + npages = num_user_pages(vaddr, tidbuf->length); + if (!npages) + return -EINVAL; + + if (npages > fd->uctxt->expected_count) { + dd_dev_err(dd, "Expected buffer too big\n"); + return -EINVAL; + } + + /* Verify that access is OK for the user buffer */ + if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, + npages * PAGE_SIZE)) { + dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", + (void *)vaddr, npages); + return -EFAULT; + } + /* Allocate the array of struct page pointers needed for pinning */ + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + /* + * Pin all the pages of the user buffer. If we can't pin all the + * pages, accept the amount pinned so far and program only that. + * User space knows how to deal with partially programmed buffers. + */ + if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { + kfree(pages); + return -ENOMEM; + } + + pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + if (pinned <= 0) { + kfree(pages); + return pinned; + } + tidbuf->pages = pages; + tidbuf->npages = npages; + fd->tid_n_pinned += pinned; + return pinned; +} + /* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: @@ -253,62 +340,33 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, int ret = 0, need_group = 0, pinned; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, + unsigned int ngroups, pageidx = 0, pageset_count, tididx = 0, mapped, mapped_pages = 0; - unsigned long vaddr = tinfo->vaddr; - struct page **pages = NULL; u32 *tidlist = NULL; - struct tid_pageset *pagesets = NULL; - - /* Get the number of pages the user buffer spans */ - npages = num_user_pages(vaddr, tinfo->length); - if (!npages) - return -EINVAL; - - if (npages > uctxt->expected_count) { - dd_dev_err(dd, "Expected buffer too big\n"); - return -EINVAL; - } - - /* Verify that access is OK for the user buffer */ - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, - npages * PAGE_SIZE)) { - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, npages); - return -EFAULT; - } + struct tid_user_buf *tidbuf; - pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets), - GFP_KERNEL); - if (!pagesets) + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); + if (!tidbuf) return -ENOMEM; - /* Allocate the array of struct page pointers needed for pinning */ - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - goto bail; - } - - /* - * Pin all the pages of the user buffer. If we can't pin all the - * pages, accept the amount pinned so far and program only that. - * User space knows how to deal with partially programmed buffers. - */ - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { - ret = -ENOMEM; - goto bail; + tidbuf->vaddr = tinfo->vaddr; + tidbuf->length = tinfo->length; + tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), + GFP_KERNEL); + if (!tidbuf->psets) { + kfree(tidbuf); + return -ENOMEM; } - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - ret = pinned; - goto bail; + kfree(tidbuf->psets); + kfree(tidbuf); + return pinned; } - fd->tid_n_pinned += npages; /* Find sets of physically contiguous pages */ - npagesets = find_phys_blocks(pages, pinned, pagesets); + tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); /* * We don't need to access this under a lock since tid_used is per @@ -316,10 +374,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, * and hfi1_user_exp_rcv_setup() at the same time. */ spin_lock(&fd->tid_lock); - if (fd->tid_used + npagesets > fd->tid_limit) + if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else - pageset_count = npagesets; + pageset_count = tidbuf->n_psets; spin_unlock(&fd->tid_lock); if (!pageset_count) @@ -347,9 +405,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct tid_group *grp = tid_group_pop(&uctxt->tid_group_list); - ret = program_rcvarray(fd, vaddr, grp, pagesets, + ret = program_rcvarray(fd, tidbuf, grp, pageidx, dd->rcv_entries.group_size, - pages, tidlist, &tididx, &mapped); + tidlist, &tididx, &mapped); /* * If there was a failure to program the RcvArray * entries for the entire group, reset the grp fields @@ -393,8 +451,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned use = min_t(unsigned, pageset_count - pageidx, grp->size - grp->used); - ret = program_rcvarray(fd, vaddr, grp, pagesets, - pageidx, use, pages, tidlist, + ret = program_rcvarray(fd, tidbuf, grp, + pageidx, use, tidlist, &tididx, &mapped); if (ret < 0) { hfi1_cdbg(TID, @@ -454,16 +512,14 @@ nomem: * If not everything was mapped (due to insufficient RcvArray entries, * for example), unpin all unmapped pages so we can pin them nex time. */ - if (mapped_pages != pinned) { - hfi1_release_user_pages(fd->mm, &pages[mapped_pages], - pinned - mapped_pages, - false); - fd->tid_n_pinned -= pinned - mapped_pages; - } + if (mapped_pages != pinned) + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, + (pinned - mapped_pages), false); bail: - kfree(pagesets); - kfree(pages); + kfree(tidbuf->psets); kfree(tidlist); + kfree(tidbuf->pages); + kfree(tidbuf); return ret > 0 ? 0 : ret; } @@ -553,11 +609,12 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, return ret; } -static u32 find_phys_blocks(struct page **pages, unsigned npages, - struct tid_pageset *list) +static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages) { unsigned pagecount, pageidx, setcount = 0, i; unsigned long pfn, this_pfn; + struct page **pages = tidbuf->pages; + struct tid_pageset *list = tidbuf->psets; if (!npages) return 0; @@ -620,13 +677,13 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, /** * program_rcvarray() - program an RcvArray group with receive buffers * @fd: filedata pointer - * @vaddr: starting user virtual address + * @tbuf: pointer to struct tid_user_buf that has the user buffer starting + * virtual address, buffer length, page pointers, pagesets (array of + * struct tid_pageset holding information on physically contiguous + * chunks from the user buffer), and other fields. * @grp: RcvArray group - * @sets: array of struct tid_pageset holding information on physically - * contiguous chunks from the user buffer * @start: starting index into sets array * @count: number of struct tid_pageset's to program - * @pages: an array of struct page * for the user buffer * @tidlist: the array of u32 elements when the information about the * programmed RcvArray entries is to be encoded. * @tididx: starting offset into tidlist @@ -644,11 +701,11 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or * number of RcvArray entries programmed. */ -static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, +static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, struct tid_group *grp, - struct tid_pageset *sets, - unsigned start, u16 count, struct page **pages, - u32 *tidlist, unsigned *tididx, unsigned *pmapped) + unsigned int start, u16 count, + u32 *tidlist, unsigned int *tididx, + unsigned int *pmapped) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -687,11 +744,11 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, } rcventry = grp->base + useidx; - npages = sets[setidx].count; - pageidx = sets[setidx].idx; + npages = tbuf->psets[setidx].count; + pageidx = tbuf->psets[setidx].idx; - ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE), - rcventry, grp, pages + pageidx, + ret = set_rcvarray_entry(fd, tbuf, + rcventry, grp, pageidx, npages); if (ret) return ret; @@ -712,15 +769,17 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, return idx; } -static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, + struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, - struct page **pages, unsigned npages) + u16 pageidx, unsigned int npages) { int ret; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; dma_addr_t phys; + struct page **pages = tbuf->pages + pageidx; /* * Allocate the node first so we can handle a potential @@ -741,7 +800,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, return -EFAULT; } - node->mmu.addr = vaddr; + node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); node->mmu.len = npages * PAGE_SIZE; node->phys = page_to_phys(pages[0]); node->npages = npages; @@ -820,10 +879,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) */ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); - pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, - PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(fd->mm, node->pages, node->npages, true); - fd->tid_n_pinned -= node->npages; + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 6cbaa4cf4970..8c4eb5d8547e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,6 +51,15 @@ #include "exp_rcv.h" +struct tid_user_buf { + unsigned long vaddr; + unsigned long length; + unsigned int npages; + struct page **pages; + struct tid_pageset *psets; + unsigned int n_psets; +}; + int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); -- cgit v1.2.3-59-g8ed1b From 624b9ac15c3cf0896721e76f4e23ab0d25148119 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:26:57 -0700 Subject: IB/hfi1: Clean up user_sdma_send_pkts() function user_sdma_send_pkts() function is unnecessarily long. Clean it up by moving some of its code into separate functions. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 141 +++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 9b89df584943..d5a2572cc5a1 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -847,6 +847,84 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); } +static int user_sdma_txadd_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + u32 datalen) +{ + int ret; + u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); + u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + /* + * Copy the request header into the tx header + * because the HW needs a cacheline-aligned + * address. + * This copy can be optimized out if the hdr + * member of user_sdma_request were also + * cacheline aligned. + */ + memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); + if (PBC2LRH(pbclen) != lrhlen) { + pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); + tx->hdr.pbc[0] = cpu_to_le16(pbclen); + } + ret = check_header_template(req, &tx->hdr, lrhlen, datalen); + if (ret) + return ret; + ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, + sizeof(tx->hdr) + datalen, req->ahg_idx, + 0, NULL, 0, user_sdma_txreq_cb); + if (ret) + return ret; + ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); + if (ret) + sdma_txclean(pq->dd, &tx->txreq); + return ret; +} + +static int user_sdma_txadd(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, u32 datalen, + u32 *queued_ptr, u32 *data_sent_ptr, + u64 *iov_offset_ptr) +{ + int ret; + unsigned int pageidx, len; + unsigned long base, offset; + u64 iov_offset = *iov_offset_ptr; + u32 queued = *queued_ptr, data_sent = *data_sent_ptr; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + base = (unsigned long)iovec->iov.iov_base; + offset = offset_in_page(base + iovec->offset + iov_offset); + pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> + PAGE_SHIFT); + len = offset + req->info.fragsize > PAGE_SIZE ? + PAGE_SIZE - offset : req->info.fragsize; + len = min((datalen - queued), len); + ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], + offset, len); + if (ret) { + SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); + return ret; + } + iov_offset += len; + queued += len; + data_sent += len; + if (unlikely(queued < datalen && pageidx == iovec->npages && + req->iov_idx < req->data_iovs - 1)) { + iovec->offset += iov_offset; + iovec = &req->iovs[++req->iov_idx]; + iov_offset = 0; + } + + *queued_ptr = queued; + *data_sent_ptr = data_sent; + *iov_offset_ptr = iov_offset; + return ret; +} + static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { int ret = 0, count; @@ -943,39 +1021,9 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (req->ahg_idx >= 0) { if (!req->seqnum) { - u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); - u32 lrhlen = get_lrh_len(req->hdr, - pad_len(datalen)); - /* - * Copy the request header into the tx header - * because the HW needs a cacheline-aligned - * address. - * This copy can be optimized out if the hdr - * member of user_sdma_request were also - * cacheline aligned. - */ - memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); - if (PBC2LRH(pbclen) != lrhlen) { - pbclen = (pbclen & 0xf000) | - LRH2PBC(lrhlen); - tx->hdr.pbc[0] = cpu_to_le16(pbclen); - } - ret = check_header_template(req, &tx->hdr, - lrhlen, datalen); - if (ret) - goto free_tx; - ret = sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_AHG_COPY, - sizeof(tx->hdr) + datalen, - req->ahg_idx, 0, NULL, 0, - user_sdma_txreq_cb); + ret = user_sdma_txadd_ahg(req, tx, datalen); if (ret) goto free_tx; - ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, - &tx->hdr, - sizeof(tx->hdr)); - if (ret) - goto free_txreq; } else { int changes; @@ -1006,35 +1054,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) */ while (queued < datalen && (req->sent + data_sent) < req->data_len) { - unsigned long base, offset; - unsigned pageidx, len; - - base = (unsigned long)iovec->iov.iov_base; - offset = offset_in_page(base + iovec->offset + - iov_offset); - pageidx = (((iovec->offset + iov_offset + - base) - (base & PAGE_MASK)) >> PAGE_SHIFT); - len = offset + req->info.fragsize > PAGE_SIZE ? - PAGE_SIZE - offset : req->info.fragsize; - len = min((datalen - queued), len); - ret = sdma_txadd_page(pq->dd, &tx->txreq, - iovec->pages[pageidx], - offset, len); - if (ret) { - SDMA_DBG(req, "SDMA txreq add page failed %d\n", - ret); + ret = user_sdma_txadd(req, tx, iovec, datalen, + &queued, &data_sent, &iov_offset); + if (ret) goto free_txreq; - } - iov_offset += len; - queued += len; - data_sent += len; - if (unlikely(queued < datalen && - pageidx == iovec->npages && - req->iov_idx < req->data_iovs - 1)) { - iovec->offset += iov_offset; - iovec = &req->iovs[++req->iov_idx]; - iov_offset = 0; - } } /* * The txreq was submitted successfully so we can update -- cgit v1.2.3-59-g8ed1b From 4c6c9aa6cbb0574c96635f3e4e1c5791e8fafca7 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:03 -0700 Subject: IB/hfi1: Clean up pin_vector_pages() function Clean up pin_vector_pages() function by moving page pinning related code to a separate function since it really stands on its own. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 79 +++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d5a2572cc5a1..6f26253d2b7a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1124,11 +1124,53 @@ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) return evict_data.cleared; } +static int pin_sdma_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec, + struct sdma_mmu_node *node, + int npages) +{ + int pinned, cleared; + struct page **pages; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + SDMA_DBG(req, "Failed page array alloc"); + return -ENOMEM; + } + memcpy(pages, node->pages, node->npages * sizeof(*pages)); + + npages -= node->npages; +retry: + if (!hfi1_can_pin_pages(pq->dd, pq->mm, + atomic_read(&pq->n_locked), npages)) { + cleared = sdma_cache_evict(pq, npages); + if (cleared >= npages) + goto retry; + } + pinned = hfi1_acquire_user_pages(pq->mm, + ((unsigned long)iovec->iov.iov_base + + (node->npages * PAGE_SIZE)), npages, 0, + pages + node->npages); + if (pinned < 0) { + kfree(pages); + return pinned; + } + if (pinned != npages) { + unpin_vector_pages(pq->mm, pages, node->npages, pinned); + return -EFAULT; + } + kfree(node->pages); + node->rb.len = iovec->iov.iov_len; + node->pages = pages; + atomic_add(pinned, &pq->n_locked); + return pinned; +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { - int ret = 0, pinned, npages, cleared; - struct page **pages; + int ret = 0, pinned, npages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; @@ -1162,44 +1204,13 @@ static int pin_vector_pages(struct user_sdma_request *req, npages = num_user_pages(&iovec->iov); if (node->npages < npages) { - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - SDMA_DBG(req, "Failed page array alloc"); - ret = -ENOMEM; - goto bail; - } - memcpy(pages, node->pages, node->npages * sizeof(*pages)); - - npages -= node->npages; - -retry: - if (!hfi1_can_pin_pages(pq->dd, pq->mm, - atomic_read(&pq->n_locked), npages)) { - cleared = sdma_cache_evict(pq, npages); - if (cleared >= npages) - goto retry; - } - pinned = hfi1_acquire_user_pages(pq->mm, - ((unsigned long)iovec->iov.iov_base + - (node->npages * PAGE_SIZE)), npages, 0, - pages + node->npages); + pinned = pin_sdma_pages(req, iovec, node, npages); if (pinned < 0) { - kfree(pages); ret = pinned; goto bail; } - if (pinned != npages) { - unpin_vector_pages(pq->mm, pages, node->npages, - pinned); - ret = -EFAULT; - goto bail; - } - kfree(node->pages); - node->rb.len = iovec->iov.iov_len; - node->pages = pages; node->npages += pinned; npages = node->npages; - atomic_add(pinned, &pq->n_locked); } iovec->pages = node->pages; iovec->npages = npages; -- cgit v1.2.3-59-g8ed1b From 04a646df12766e4e11ecf09cd0336dab69d75034 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:09 -0700 Subject: IB/hfi1: Fix the bail out code in pin_vector_pages() function In pin_vector_pages() function, if there is any error while pinning the pages or while adding a pinned buffer to the cache, the bail out code needs to unpin any pinned pages that are not in the cache and adjust the n_locked counter that counts the total pages pinned. The current bail out code doesn't seem to be doing it right in two cases: 1. Before pinning required pages for a buffer, the SDMA pinned buffer cache is searched to see if the virtual address range that needs to be pinned is already pinned. If there isn't a hit in the cache, a new node is created for the buffer and is added to the cache after the buffer is pinned. If adding the new node to the cache fails, the n_locked count is decremented properly but the pinned pages are not freed. This commit fixes this issue. 2. If there is a hit in the SDMA cache, but the cached buffer doesn't have enough pages to cover the entire address range that needs to be pinned, the node for the cached buffer is extracted from the cache, remaining pages needed are pinned and added to the node. The node is finally added back into the cache. If there is an error pinning the extra pages, the bail out code frees all the pages in the node but the n_locked count is not being decremented by the no of pages in the node that are freed. This commit fixes this issue. This commit fixes the above two issues by creating a new function that frees the pages in a node and decrements the n_locked count by the number of pages freed. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6f26253d2b7a..a3a9925f408a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1167,6 +1167,14 @@ retry: return pinned; } +static void unpin_sdma_pages(struct sdma_mmu_node *node) +{ + if (node->npages) { + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); + atomic_sub(node->npages, &node->pq->n_locked); + } +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { @@ -1218,14 +1226,12 @@ static int pin_vector_pages(struct user_sdma_request *req, ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); if (ret) { - atomic_sub(node->npages, &pq->n_locked); iovec->node = NULL; goto bail; } return 0; bail: - if (rb_node) - unpin_vector_pages(pq->mm, node->pages, 0, node->npages); + unpin_sdma_pages(node); kfree(node); return ret; } @@ -1671,10 +1677,7 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - atomic_sub(node->npages, &node->pq->n_locked); - - unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); - + unpin_sdma_pages(node); kfree(node); } -- cgit v1.2.3-59-g8ed1b From ddd3affb50015c3c7ee218b83e73e646aa48e212 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:16 -0700 Subject: IB/hfi1: Remove duplicate definitions of num_user_pages() function num_user_pages() function has been defined in both user_exp_rcv.c file and user_sdma.c file. Move the function definition to a header file so there is only one definition in the source repo. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 5 ----- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 9 +++++++++ drivers/infiniband/hw/hfi1/user_sdma.c | 18 +++--------------- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 04be178b72f8..026790ee0e84 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -67,11 +67,6 @@ struct tid_pageset { u16 count; }; -#define num_user_pages(vaddr, len) \ - (1 + (((((unsigned long)(vaddr) + \ - (unsigned long)(len) - 1) & PAGE_MASK) - \ - ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) - static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, struct hfi1_filedata *fd); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 8c4eb5d8547e..7461d11b01c4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -60,6 +60,15 @@ struct tid_user_buf { unsigned int n_psets; }; +static inline int num_user_pages(unsigned long addr, + unsigned long len) +{ + const unsigned long spage = addr & PAGE_MASK; + const unsigned long epage = (addr + len - 1) & PAGE_MASK; + + return 1 + ((epage - spage) >> PAGE_SHIFT); +} + int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a3a9925f408a..2837407d4daa 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -246,7 +246,6 @@ struct user_sdma_txreq { static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts); -static int num_user_pages(const struct iovec *iov); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); @@ -1101,19 +1100,6 @@ free_tx: return ret; } -/* - * How many pages in this iovec element? - */ -static inline int num_user_pages(const struct iovec *iov) -{ - const unsigned long addr = (unsigned long)iov->iov_base; - const unsigned long len = iov->iov_len; - const unsigned long spage = addr & PAGE_MASK; - const unsigned long epage = (addr + len - 1) & PAGE_MASK; - - return 1 + ((epage - spage) >> PAGE_SHIFT); -} - static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { struct evict_data evict_data; @@ -1182,6 +1168,7 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; + struct iovec *iov; bool extracted; extracted = @@ -1210,7 +1197,8 @@ static int pin_vector_pages(struct user_sdma_request *req, atomic_set(&node->refcount, 0); } - npages = num_user_pages(&iovec->iov); + iov = &iovec->iov; + npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); if (node->npages < npages) { pinned = pin_sdma_pages(req, iovec, node, npages); if (pinned < 0) { -- cgit v1.2.3-59-g8ed1b From 637f4600a8d3be44146ad7fbb5188484c3b0a1d4 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:23 -0700 Subject: IB/hfi1: Move structure definitions from user_exp_rcv.c to user_exp_rcv.h Clean up user_exp_rcv.c file by moving structure definitions into header file user_exp_rcv.h. Since these structure definitions depend on the structure definitions in mmu_rb.h, move #include "mmu_rb.h" above the include "user_exp_rcv.h" or include of header files that include user_exp_rcv.h Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 18 +----------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 16 ++++++++++++++++ drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index ab8eb2bf48d8..e0fd8fc0a7ab 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,10 +58,10 @@ #include "device.h" #include "common.h" #include "trace.h" +#include "mmu_rb.h" #include "user_sdma.h" #include "user_exp_rcv.h" #include "aspm.h" -#include "mmu_rb.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 026790ee0e84..6f6c14df383e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -47,25 +47,9 @@ #include #include +#include "mmu_rb.h" #include "user_exp_rcv.h" #include "trace.h" -#include "mmu_rb.h" - -struct tid_rb_node { - struct mmu_rb_node mmu; - unsigned long phys; - struct tid_group *grp; - u32 rcventry; - dma_addr_t dma_addr; - bool freed; - unsigned npages; - struct page *pages[0]; -}; - -struct tid_pageset { - u16 idx; - u16 count; -}; static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 7461d11b01c4..e383cc01a2bf 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,6 +51,11 @@ #include "exp_rcv.h" +struct tid_pageset { + u16 idx; + u16 count; +}; + struct tid_user_buf { unsigned long vaddr; unsigned long length; @@ -60,6 +65,17 @@ struct tid_user_buf { unsigned int n_psets; }; +struct tid_rb_node { + struct mmu_rb_node mmu; + unsigned long phys; + struct tid_group *grp; + u32 rcventry; + dma_addr_t dma_addr; + bool freed; + unsigned int npages; + struct page *pages[0]; +}; + static inline int num_user_pages(unsigned long addr, unsigned long len) { diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 2837407d4daa..8a1653ab2f65 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -64,11 +64,11 @@ #include "hfi.h" #include "sdma.h" +#include "mmu_rb.h" #include "user_sdma.h" #include "verbs.h" /* for the headers */ #include "common.h" /* for struct hfi1_tid_info */ #include "trace.h" -#include "mmu_rb.h" static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); -- cgit v1.2.3-59-g8ed1b From 32500f2763906602725006f1dc833c8ea28dfd07 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:29 -0700 Subject: IB/hfi1: Move structure and MACRO definitions in user_sdma.c to user_sdma.h Clean up user_sdma.c by moving the structure and MACRO definitions into the header file user_sdma.h Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 168 --------------------------------- drivers/infiniband/hw/hfi1/user_sdma.h | 166 ++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 168 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8a1653ab2f65..dacb0fce49c6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -74,176 +74,8 @@ static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); -/* The maximum number of Data io vectors per message/request */ -#define MAX_VECTORS_PER_REQ 8 -/* - * Maximum number of packet to send from each message/request - * before moving to the next one. - */ -#define MAX_PKTS_PER_QUEUE 16 - -#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT)) - -#define req_opcode(x) \ - (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_version(x) \ - (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_iovcnt(x) \ - (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK) - -/* Number of BTH.PSN bits used for sequence number in expected rcvs */ -#define BTH_SEQ_MASK 0x7ffull - -#define AHG_KDETH_INTR_SHIFT 12 -#define AHG_KDETH_SH_SHIFT 13 -#define AHG_KDETH_ARRAY_SIZE 9 - -#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) -#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) - -#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ - do { \ - if ((idx) < ARRAY_SIZE((arr))) \ - (arr)[(idx++)] = sdma_build_ahg_descriptor( \ - (__force u16)(value), (dw), (bit), \ - (width)); \ - else \ - return -ERANGE; \ - } while (0) - -/* Tx request flag bits */ -#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ -#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ - -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) - -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - static unsigned initial_pkt_count = 8; -#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ - -struct sdma_mmu_node; - -struct user_sdma_iovec { - struct list_head list; - struct iovec iov; - /* number of pages in this vector */ - unsigned npages; - /* array of pinned pages for this vector */ - struct page **pages; - /* - * offset into the virtual address space of the vector at - * which we last left off. - */ - u64 offset; - struct sdma_mmu_node *node; -}; - -struct sdma_mmu_node { - struct mmu_rb_node rb; - struct hfi1_user_sdma_pkt_q *pq; - atomic_t refcount; - struct page **pages; - unsigned npages; -}; - -/* evict operation argument */ -struct evict_data { - u32 cleared; /* count evicted so far */ - u32 target; /* target count to evict */ -}; - -struct user_sdma_request { - /* This is the original header from user space */ - struct hfi1_pkt_header hdr; - - /* Read mostly fields */ - struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; - struct hfi1_user_sdma_comp_q *cq; - /* - * Pointer to the SDMA engine for this request. - * Since different request could be on different VLs, - * each request will need it's own engine pointer. - */ - struct sdma_engine *sde; - struct sdma_req_info info; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - /* total length of the data in the request */ - u32 data_len; - /* number of elements copied to the tids array */ - u16 n_tids; - /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors - */ - u8 data_iovs; - s8 ahg_idx; - - /* Writeable fields shared with interrupt */ - u64 seqcomp ____cacheline_aligned_in_smp; - u64 seqsubmitted; - /* status of the last txreq completed */ - int status; - - /* Send side fields */ - struct list_head txps ____cacheline_aligned_in_smp; - u64 seqnum; - /* - * KDETH.OFFSET (TID) field - * The offset can cover multiple packets, depending on the - * size of the TID entry. - */ - u32 tidoffset; - /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. - */ - u32 koffset; - u32 sent; - /* TID index copied from the tid_iov vector */ - u16 tididx; - /* progress index moving along the iovs array */ - u8 iov_idx; - u8 done; - u8 has_error; - - struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; -} ____cacheline_aligned_in_smp; - -/* - * A single txreq could span up to 3 physical pages when the MTU - * is sufficiently large (> 4K). Each of the IOV pointers also - * needs it's own set of flags so the vector has been handled - * independently of each other. - */ -struct user_sdma_txreq { - /* Packet header for the txreq */ - struct hfi1_pkt_header hdr; - struct sdma_txreq txreq; - struct list_head list; - struct user_sdma_request *req; - u16 flags; - unsigned busycount; - u64 seqnum; -}; - -#define SDMA_DBG(req, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ - (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ - ##__VA_ARGS__) -#define SDMA_Q_DBG(pq, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ - (pq)->subctxt, ##__VA_ARGS__) - static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 84c199d3e003..6c10484e972f 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -53,6 +53,67 @@ #include "iowait.h" #include "user_exp_rcv.h" +/* The maximum number of Data io vectors per message/request */ +#define MAX_VECTORS_PER_REQ 8 +/* + * Maximum number of packet to send from each message/request + * before moving to the next one. + */ +#define MAX_PKTS_PER_QUEUE 16 + +#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT)) + +#define req_opcode(x) \ + (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) +#define req_version(x) \ + (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) +#define req_iovcnt(x) \ + (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK) + +/* Number of BTH.PSN bits used for sequence number in expected rcvs */ +#define BTH_SEQ_MASK 0x7ffull + +#define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 + +#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) +#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) + +#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ + do { \ + if ((idx) < ARRAY_SIZE((arr))) \ + (arr)[(idx++)] = sdma_build_ahg_descriptor( \ + (__force u16)(value), (dw), (bit), \ + (width)); \ + else \ + return -ERANGE; \ + } while (0) + +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ + +#define SDMA_PKT_Q_INACTIVE BIT(0) +#define SDMA_PKT_Q_ACTIVE BIT(1) +#define SDMA_PKT_Q_DEFERRED BIT(2) + +/* + * Maximum retry attempts to submit a TX request + * before putting the process to sleep. + */ +#define MAX_DEFER_RETRY_COUNT 1 + +#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ + +#define SDMA_DBG(req, fmt, ...) \ + hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ + (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ + ##__VA_ARGS__) +#define SDMA_Q_DBG(pq, fmt, ...) \ + hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ + (pq)->subctxt, ##__VA_ARGS__) + extern uint extended_psn; struct hfi1_user_sdma_pkt_q { @@ -79,6 +140,111 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; +struct sdma_mmu_node { + struct mmu_rb_node rb; + struct hfi1_user_sdma_pkt_q *pq; + atomic_t refcount; + struct page **pages; + unsigned int npages; +}; + +struct user_sdma_iovec { + struct list_head list; + struct iovec iov; + /* number of pages in this vector */ + unsigned int npages; + /* array of pinned pages for this vector */ + struct page **pages; + /* + * offset into the virtual address space of the vector at + * which we last left off. + */ + u64 offset; + struct sdma_mmu_node *node; +}; + +/* evict operation argument */ +struct evict_data { + u32 cleared; /* count evicted so far */ + u32 target; /* target count to evict */ +}; + +struct user_sdma_request { + /* This is the original header from user space */ + struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; + /* + * Pointer to the SDMA engine for this request. + * Since different request could be on different VLs, + * each request will need it's own engine pointer. + */ + struct sdma_engine *sde; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; + /* + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors + */ + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; + /* + * KDETH.OFFSET (TID) field + * The offset can cover multiple packets, depending on the + * size of the TID entry. + */ + u32 tidoffset; + /* + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. + */ + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; + /* progress index moving along the iovs array */ + u8 iov_idx; + u8 done; + u8 has_error; + + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; +} ____cacheline_aligned_in_smp; + +/* + * A single txreq could span up to 3 physical pages when the MTU + * is sufficiently large (> 4K). Each of the IOV pointers also + * needs it's own set of flags so the vector has been handled + * independently of each other. + */ +struct user_sdma_txreq { + /* Packet header for the txreq */ + struct hfi1_pkt_header hdr; + struct sdma_txreq txreq; + struct list_head list; + struct user_sdma_request *req; + u16 flags; + unsigned int busycount; + u64 seqnum; +}; + int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, -- cgit v1.2.3-59-g8ed1b From d68e68e5fb7134427c6d79c58beaaf869543f3f7 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:27:35 -0700 Subject: IB/hfi1: Fix whitespace alignment issue for MAD Fix a tab alignment issue present in pr_err_ratelimited error message. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4849130d49bc..f4c0ffc040cc 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -158,7 +158,7 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, if (queue_id >= RVT_MAX_TRAP_LISTS) { trap_count++; pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n", - trap->data.generic_type, trap_count); + trap->data.generic_type, trap_count); kfree(trap); return NULL; } -- cgit v1.2.3-59-g8ed1b From 642aaab5a63c043c593ba9e9a86aadd388313e56 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 21 Aug 2017 18:27:41 -0700 Subject: IB/hfi1: Add received request info to qp_stats The rvt_ack_entry pointed to by s_tail_ack_queue provides important info about the request that has just been processed or is being processed on the responder side of a RC connection. This patch adds this info to the qp_stats to assist debugging. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 0fca6dfe8d9f..ee836c0978cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -626,12 +626,15 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) struct hfi1_qp_priv *priv = qp->priv; struct sdma_engine *sde; struct send_context *send_context; + struct rvt_ack_entry *e = NULL; sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); + if (qp->s_ack_queue) + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d E %x %x %x\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -672,7 +675,11 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) send_context ? send_context->sw_index : 0, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, - qp->pid); + qp->pid, + /* ack queue information */ + e ? e->opcode : 0, + e ? e->psn : 0, + e ? e->lpsn : 0); } void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) -- cgit v1.2.3-59-g8ed1b From 280ad49a97ee7adf5c86f70f7ea1ff60a755b294 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Aug 2017 18:27:48 -0700 Subject: IB/hfi1: Add opcode states to qp_stats These fields allow for debugging send engine processing. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index ee836c0978cc..b87889d4e9e4 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -634,7 +634,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) if (qp->s_ack_queue) e = &qp->s_ack_queue[qp->s_tail_ack_queue]; seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d E %x %x %x\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -676,6 +676,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, qp->pid, + qp->s_state, + qp->s_ack_state, /* ack queue information */ e ? e->opcode : 0, e ? e->psn : 0, -- cgit v1.2.3-59-g8ed1b From d518a44d317d92f4c297ea26a308b1ac1a980d33 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 25 Aug 2017 11:43:59 +0300 Subject: IB/usnic: check for allocation failure usnic_uiom_get_dev_list() can return ERR_PTR(-ENOMEM) so we should check for that. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Dan Carpenter Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 97dd79ebb590..e4113ef09315 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -164,6 +164,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, if (usnic_ib_share_vf) { /* Try to find resouces on a used vf which is in pd */ dev_list = usnic_uiom_get_dev_list(pd->umem_pd); + if (IS_ERR(dev_list)) + return ERR_CAST(dev_list); for (i = 0; dev_list[i]; i++) { dev = dev_list[i]; vf = pci_get_drvdata(to_pci_dev(dev)); -- cgit v1.2.3-59-g8ed1b From 2d72d6c4ec1e3761e8cfc69d58d498b2dbe46341 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Fri, 25 Aug 2017 21:41:02 +0530 Subject: RDMA/bnxt_re: remove unnecessary call to memset call to memset to assign 0 value immediately after allocating memory with kzalloc is unnecesaary as kzalloc allocates the memory filled with 0 value. Signed-off-by: Himanshu Jha Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 43b3dee4b6ba..01eee15bbd65 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -911,7 +911,6 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah if (!ah) return NULL; - memset(ah, 0, sizeof(*ah)); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; @@ -958,7 +957,6 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp if (!qp) return NULL; - memset(qp, 0, sizeof(*qp)); qp->rdev = rdev; /* Initialize the shadow QP structure from the QP1 values */ -- cgit v1.2.3-59-g8ed1b From 733da3bcb3a1293fab1e7703982540dde847d716 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 28 Aug 2017 09:59:28 +0530 Subject: IB/hfi1: constify vm_operations_struct vm_operations_struct are not supposed to change at runtime. vm_area_struct structure working with const vm_operations_struct. So mark the non-const vm_operations_struct structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e0fd8fc0a7ab..d893582e4450 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -120,7 +120,7 @@ static const struct file_operations hfi1_file_ops = { .llseek = noop_llseek, }; -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .fault = vma_fault, }; -- cgit v1.2.3-59-g8ed1b From cfeca08faf452acaf807576859275968cdb7e7a2 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 28 Aug 2017 21:51:23 +0530 Subject: i40iw: make some structures const Make some structures const as they are only used during a copy operation. Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 1060725d18bc..0aadb7a0d1aa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -912,7 +912,7 @@ enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data return 0; } -static struct i40iw_qp_uk_ops iw_qp_uk_ops = { +static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, @@ -926,14 +926,14 @@ static struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_post_nop = i40iw_nop }; -static struct i40iw_cq_ops iw_cq_ops = { +static const struct i40iw_cq_ops iw_cq_ops = { .iw_cq_request_notification = i40iw_cq_request_notification, .iw_cq_poll_completion = i40iw_cq_poll_completion, .iw_cq_post_entries = i40iw_cq_post_entries, .iw_cq_clean = i40iw_clean_cq }; -static struct i40iw_device_uk_ops iw_device_uk_ops = { +static const struct i40iw_device_uk_ops iw_device_uk_ops = { .iwarp_cq_uk_init = i40iw_cq_uk_init, .iwarp_qp_uk_init = i40iw_qp_uk_init, }; -- cgit v1.2.3-59-g8ed1b From ba81a427c35c827bb220f1a87231b3460e0bd879 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 28 Aug 2017 11:23:21 -0700 Subject: IB/hfi1: Acquire QSFP cable information on loopback Currently, QSFP information is not queried in cases where loopback was set up and QSFP module is present. Acquire QSFP information in case of loopback. Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 8004f1da7548..a8af96d2b1b0 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -944,6 +944,21 @@ void tune_serdes(struct hfi1_pportdata *ppd) if (loopback != LOOPBACK_NONE || ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { ppd->driver_link_ready = 1; + + if (qsfp_mod_present(ppd)) { + ret = acquire_chip_resource(ppd->dd, + qsfp_resource(ppd->dd), + QSFP_WAIT); + if (ret) { + dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n", + __func__, (int)ppd->dd->hfi1_id); + goto bail; + } + + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); + } + return; } -- cgit v1.2.3-59-g8ed1b From 34ab4de77fe512ba3aeabdcdc97504bd19791511 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 28 Aug 2017 11:23:27 -0700 Subject: IB/hif1: Remove static tracing from SDMA hot path The hfi1_cdbg() macro can be instantiated in the hot path even when it is not in use. This shows up on perf profiles. Rework the macros (for SDMA and MMU), to use the trace interface directly to eliminate this performance hit. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 3 +- drivers/infiniband/hw/hfi1/mmu_rb.c | 13 ++-- drivers/infiniband/hw/hfi1/trace.h | 3 +- drivers/infiniband/hw/hfi1/trace_mmu.h | 95 +++++++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_tx.h | 136 ++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/user_sdma.c | 31 ++++---- drivers/infiniband/hw/hfi1/user_sdma.h | 3 - 7 files changed, 255 insertions(+), 29 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/trace_mmu.h (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d893582e4450..2bc89260235a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -430,8 +430,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) if (!iter_is_iovec(from) || !dim) return -EINVAL; - hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", - fd->uctxt->ctxt, fd->subctxt, dim); + trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) return -ENOSPC; diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index d41fd87a39f2..13dcef08cac1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -172,9 +172,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, unsigned long flags; int ret = 0; + trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len); spin_lock_irqsave(&handler->lock, flags); - hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, - mnode->len); node = __mmu_rb_search(handler, mnode->addr, mnode->len); if (node) { ret = -EINVAL; @@ -200,7 +199,7 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, { struct mmu_rb_node *node = NULL; - hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); + trace_hfi1_mmu_rb_search(addr, len); if (!handler->ops->filter) { node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); @@ -281,8 +280,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, unsigned long flags; /* Validity of handler and node pointers has been checked by caller. */ - hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, - node->len); + trace_hfi1_mmu_rb_remove(node->addr, node->len); spin_lock_irqsave(&handler->lock, flags); __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ @@ -321,8 +319,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, node; node = ptr) { /* Guard against node removal. */ ptr = __mmu_int_rb_iter_next(node, start, end - 1); - hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", - node->addr, node->len); + trace_hfi1_mmu_mem_invalidate(node->addr, node->len); if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); /* move from LRU list to delete list */ diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 92dc88f013c9..af50c0793450 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -51,3 +51,4 @@ #include "trace_rc.h" #include "trace_rx.h" #include "trace_tx.h" +#include "trace_mmu.h" diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h new file mode 100644 index 000000000000..3b7abbc382c2 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_mmu.h @@ -0,0 +1,95 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_MMU_H + +#include +#include + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_mmu + +DECLARE_EVENT_CLASS(hfi1_mmu_rb_template, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len), + TP_STRUCT__entry(__field(unsigned long, addr) + __field(unsigned long, len) + ), + TP_fast_assign(__entry->addr = addr; + __entry->len = len; + ), + TP_printk("MMU node addr 0x%lx, len %lu", + __entry->addr, + __entry->len + ) +); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +#endif /* __HFI1_TRACE_RC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mmu +#include diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index c59809a7f121..c57af3b31fe1 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -198,6 +198,140 @@ TRACE_EVENT(hfi1_sdma_engine_select, ) ); +TRACE_EVENT(hfi1_sdma_user_free_queues, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt), + TP_ARGS(dd, ctxt, subctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + ), + TP_printk("[%s] SDMA [%u:%u] Freeing user SDMA queues", + __get_str(dev), + __entry->ctxt, + __entry->subctxt + ) +); + +TRACE_EVENT(hfi1_sdma_user_process_request, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx), + TP_ARGS(dd, ctxt, subctxt, comp_idx), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + ), + TP_printk("[%s] SDMA [%u:%u] Using req/comp entry: %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx + ) +); + +DECLARE_EVENT_CLASS( + hfi1_sdma_value_template, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, u16 comp_idx, + u32 value), + TP_ARGS(dd, ctxt, subctxt, comp_idx, value), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + __field(u32, value) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + __entry->value = value; + ), + TP_printk("[%s] SDMA [%u:%u:%u] value: %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx, + __entry->value + ) +); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_initial_tidoffset, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 tidoffset), + TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset)); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_data_length, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 data_len), + TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len)); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_compute_length, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 data_len), + TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len)); + +TRACE_EVENT(hfi1_sdma_user_tid_info, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 tidoffset, u32 units, u8 shift), + TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset, units, shift), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + __field(u32, tidoffset) + __field(u32, units) + __field(u8, shift) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + __entry->tidoffset = tidoffset; + __entry->units = units; + __entry->shift = shift; + ), + TP_printk("[%s] SDMA [%u:%u:%u] TID offset %ubytes %uunits om %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx, + __entry->tidoffset, + __entry->units, + __entry->shift + ) +); + +TRACE_EVENT(hfi1_sdma_request, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + unsigned long dim), + TP_ARGS(dd, ctxt, subctxt, dim), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(unsigned long, dim) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->dim = dim; + ), + TP_printk("[%s] SDMA from %u:%u (%lu)", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->dim + ) +); + DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, TP_PROTO(struct sdma_engine *sde, u64 status), TP_ARGS(sde, status), diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index dacb0fce49c6..c0c0e0445cbf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -266,8 +266,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, { struct hfi1_user_sdma_pkt_q *pq; - hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, - uctxt->ctxt, fd->subctxt); + trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); + pq = fd->pq; if (pq) { if (pq->handler) @@ -349,7 +349,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (info.comp_idx >= hfi1_sdma_comp_ring_size) { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Invalid comp index", @@ -386,8 +385,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* * All safety checks have been done and this request has been claimed. */ - hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, - uctxt->ctxt, fd->subctxt, info.comp_idx); + trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx); req = pq->reqs + info.comp_idx; req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ req->data_len = 0; @@ -487,7 +486,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? KDETH_OM_LARGE : KDETH_OM_SMALL); - SDMA_DBG(req, "Initial TID offset %u", req->tidoffset); + trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->tidoffset); idx++; /* Save all the IO vector structures */ @@ -505,8 +505,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->data_len += req->iovs[i].iov.iov_len; } - SDMA_DBG(req, "total data length %u", req->data_len); - + trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->data_len); if (pcount > req->info.npkts) pcount = req->info.npkts; /* @@ -661,7 +661,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req, } else { len = min(req->data_len - req->sent, (u32)req->info.fragsize); } - SDMA_DBG(req, "Data Length = %u", len); + trace_hfi1_sdma_user_compute_length(req->pq->dd, + req->pq->ctxt, + req->pq->subctxt, + req->info.comp_idx, + len); return len; } @@ -1231,9 +1235,10 @@ static int set_txreq_header(struct user_sdma_request *req, * Set the KDETH.OFFSET and KDETH.OM based on size of * transfer. */ - SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset >> omfactor, - omfactor != KDETH_OM_SMALL_SHIFT); + trace_hfi1_sdma_user_tid_info( + pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, @@ -1441,8 +1446,6 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, u16 idx, enum hfi1_sdma_comp_state state, int ret) { - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", - pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); if (state == ERROR) cq->comps[idx].errcode = -ret; smp_wmb(); /* make sure errcode is visible first */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 6c10484e972f..9b8bb5634c0d 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -110,9 +110,6 @@ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ ##__VA_ARGS__) -#define SDMA_Q_DBG(pq, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ - (pq)->subctxt, ##__VA_ARGS__) extern uint extended_psn; -- cgit v1.2.3-59-g8ed1b From 6167a5b4b5f97d9fc09a99842bd3aa149654ffcf Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 28 Aug 2017 11:23:33 -0700 Subject: IB/qib: Stricter bounds checking for copy to buffer Replace 'strcpy' with 'strncpy' to restrict the number of bytes copied to the buffer. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 92ae68c8e76f..14cadf6d6214 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6175,7 +6175,7 @@ static int setup_txselect(const char *str, struct kernel_param *kp) unsigned long val; char *n; - if (strlen(str) >= MAX_ATTEN_LEN) { + if (strlen(str) >= ARRAY_SIZE(txselect_list)) { pr_info("txselect_values string too long\n"); return -ENOSPC; } @@ -6186,7 +6186,7 @@ static int setup_txselect(const char *str, struct kernel_param *kp) TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); return -EINVAL; } - strcpy(txselect_list, str); + strncpy(txselect_list, str, ARRAY_SIZE(txselect_list) - 1); list_for_each_entry(dd, &qib_dev_list, list) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) -- cgit v1.2.3-59-g8ed1b From 4b9796b0a6fbb6e4678002b3f09392192502fd14 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 28 Aug 2017 11:23:39 -0700 Subject: IB/hfi1: Use accessor to determine ring size The qp_stats print will soon be moving to rdmavt, so use the proper accessor to get the ring size rather than a driver supplied constant. Fixes: Commit ff8d836efe06 ("IB/hfi1: Add receiving queue info to qp_stats") Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index b87889d4e9e4..6daa39630dd5 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -659,7 +659,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_avail, /* ack_queue ring pointers, size */ qp->s_tail_ack_queue, qp->r_head_ack_queue, - HFI1_MAX_RDMA_ATOMIC, + rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi), /* remote QP info */ qp->remote_qpn, rdma_ah_get_dlid(&qp->remote_ah_attr), -- cgit v1.2.3-59-g8ed1b From dff2fe7e8cd92435a93555b29d9d95fed955c558 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:51 -0700 Subject: IB/hfi1: Convert hfi1_error_port_qps() to use new QP iterator Change hfi1_error_port_qps() to use the new rvt_qp_iter() in its QP scanning. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 79 +++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 6daa39630dd5..af711318f84b 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -869,6 +869,45 @@ void notify_error_qp(struct rvt_qp *qp) } } +/** + * hfi1_qp_iter_cb - callback for iterator + * @qp - the qp + * @v - the sl in low bits of v + * + * This is called from the iterator callback to work + * on an individual qp. + */ +static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v) +{ + int lastwqe; + struct ib_event ev; + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u8 sl = (u8)v; + + if (qp->port_num != ppd->port || + (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) || + rdma_ah_get_sl(&qp->remote_ah_attr) != sl || + !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) + return; + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + /** * hfi1_error_port_qps - put a port's RC/UC qps into error state * @ibp: the ibport. @@ -880,44 +919,8 @@ void notify_error_qp(struct rvt_qp *qp) */ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) { - struct rvt_qp *qp = NULL; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; - int n; - int lastwqe; - struct ib_event ev; - - rcu_read_lock(); - - /* Deal only with RC/UC qps that use the given SL. */ - for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { - for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) { - if (qp->port_num == ppd->port && - (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) && - rdma_ah_get_sl(&qp->remote_ah_attr) == sl && - (ib_rvt_state_ops[qp->state] & - RVT_POST_SEND_OK)) { - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - lastwqe = rvt_error_qp(qp, - IB_WC_WR_FLUSH_ERR); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - if (lastwqe) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = - IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, - qp->ibqp.qp_context); - } - } - } - } - rcu_read_unlock(); + rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb); } -- cgit v1.2.3-59-g8ed1b From e5c197ac35950659a44b2032d6b31defbe631bf9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:58 -0700 Subject: IB/hfi1: Convert qp_stats debugfs interface to use new iterator API Continue moving copy/paste code into rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 15 +++---- drivers/infiniband/hw/hfi1/qp.c | 83 +++--------------------------------- drivers/infiniband/hw/hfi1/qp.h | 23 +--------- 3 files changed, 14 insertions(+), 107 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index c6a472f01025..36ae1fd86502 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,4 +1,3 @@ -#ifdef CONFIG_DEBUG_FS /* * Copyright(c) 2015-2017 Intel Corporation. * @@ -267,10 +266,10 @@ DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) __acquires(RCU) { - struct qp_iter *iter; + struct rvt_qp_iter *iter; loff_t n = *pos; - iter = qp_iter_init(s->private); + iter = rvt_qp_iter_init(s->private, 0, NULL); /* stop calls rcu_read_unlock */ rcu_read_lock(); @@ -279,7 +278,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) return NULL; do { - if (qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -292,11 +291,11 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) __must_hold(RCU) { - struct qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; (*pos)++; - if (qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -312,7 +311,7 @@ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) { - struct qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; if (!iter) return 0; @@ -1535,5 +1534,3 @@ void hfi1_dbg_exit(void) debugfs_remove_recursive(hfi1_dbg_root); hfi1_dbg_root = NULL; } - -#endif diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index af711318f84b..4b01ccd895b4 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -530,82 +530,6 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) sc5); } -struct qp_iter { - struct hfi1_ibdev *dev; - struct rvt_qp *qp; - int specials; - int n; -}; - -struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) -{ - struct qp_iter *iter; - - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return NULL; - - iter->dev = dev; - iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - - return iter; -} - -int qp_iter_next(struct qp_iter *iter) -{ - struct hfi1_ibdev *dev = iter->dev; - int n = iter->n; - int ret = 1; - struct rvt_qp *pqp = iter->qp; - struct rvt_qp *qp; - - /* - * The approach is to consider the special qps - * as an additional table entries before the - * real hash table. Since the qp code sets - * the qp->next hash link to NULL, this works just fine. - * - * iter->specials is 2 * # ports - * - * n = 0..iter->specials is the special qp indices - * - * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are - * the potential hash bucket entries - * - */ - for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) { - if (pqp) { - qp = rcu_dereference(pqp->next); - } else { - if (n < iter->specials) { - struct hfi1_pportdata *ppd; - struct hfi1_ibport *ibp; - int pidx; - - pidx = n % dev->rdi.ibdev.phys_port_cnt; - ppd = &dd_from_dev(dev)->pport[pidx]; - ibp = &ppd->ibport_data; - - if (!(n & 1)) - qp = rcu_dereference(ibp->rvp.qp[0]); - else - qp = rcu_dereference(ibp->rvp.qp[1]); - } else { - qp = rcu_dereference( - dev->rdi.qp_dev->qp_table[ - (n - iter->specials)]); - } - } - pqp = qp; - if (qp) { - iter->qp = qp; - iter->n = n; - return 0; - } - } - return ret; -} - static const char * const qp_type_str[] = { "SMI", "GSI", "RC", "UC", "UD", }; @@ -619,7 +543,12 @@ static int qp_idle(struct rvt_qp *qp) qp->s_tail == qp->s_head; } -void qp_iter_print(struct seq_file *s, struct qp_iter *iter) +/** + * qp_iter_print - print the qp information to seq_file + * @s: the seq_file to emit the qp information on + * @iter: the iterator for the qp hash list + */ +void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { struct rvt_swqe *wqe; struct rvt_qp *qp = iter->qp; diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index 6fe542b6a927..c06d2f8348e0 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -94,26 +94,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag); struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5); struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5); -struct qp_iter; - -/** - * qp_iter_init - initialize the iterator for the qp hash list - * @dev: the hfi1_ibdev - */ -struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev); - -/** - * qp_iter_next - Find the next qp in the hash list - * @iter: the iterator for the qp hash list - */ -int qp_iter_next(struct qp_iter *iter); - -/** - * qp_iter_print - print the qp information to seq_file - * @s: the seq_file to emit the qp information on - * @iter: the iterator for the qp hash list - */ -void qp_iter_print(struct seq_file *s, struct qp_iter *iter); +void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); void _hfi1_schedule_send(struct rvt_qp *qp); void hfi1_schedule_send(struct rvt_qp *qp); -- cgit v1.2.3-59-g8ed1b From 557fafe1bfecb50c3da0bc4948ebcbc4d19f1619 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:24:04 -0700 Subject: IB/qib: Convert qp_stats debugfs interface to use new iterator API Continue porting copy/paste code into rdmavt from qib. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_debugfs.c | 18 +++++------- drivers/infiniband/hw/qib/qib_qp.c | 51 +++++---------------------------- drivers/infiniband/hw/qib/qib_verbs.h | 10 ++----- 3 files changed, 16 insertions(+), 63 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5bad8e3b40bb..5ed1ed93380f 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -1,6 +1,5 @@ -#ifdef CONFIG_DEBUG_FS /* - * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -191,10 +190,10 @@ DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) __acquires(RCU) { - struct qib_qp_iter *iter; + struct rvt_qp_iter *iter; loff_t n = *pos; - iter = qib_qp_iter_init(s->private); + iter = rvt_qp_iter_init(s->private, 0, NULL); /* stop calls rcu_read_unlock */ rcu_read_lock(); @@ -203,7 +202,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) return NULL; do { - if (qib_qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -216,11 +215,11 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) __must_hold(RCU) { - struct qib_qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; (*pos)++; - if (qib_qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -236,7 +235,7 @@ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) { - struct qib_qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; if (!iter) return 0; @@ -284,6 +283,3 @@ void qib_dbg_exit(void) debugfs_remove_recursive(qib_dbg_root); qib_dbg_root = NULL; } - -#endif - diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index a343e3b5d4cb..344e401915f7 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -415,53 +415,16 @@ int qib_check_send_wqe(struct rvt_qp *qp, #ifdef CONFIG_DEBUG_FS -struct qib_qp_iter { - struct qib_ibdev *dev; - struct rvt_qp *qp; - int n; -}; - -struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) -{ - struct qib_qp_iter *iter; - - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return NULL; - - iter->dev = dev; - - return iter; -} - -int qib_qp_iter_next(struct qib_qp_iter *iter) -{ - struct qib_ibdev *dev = iter->dev; - int n = iter->n; - int ret = 1; - struct rvt_qp *pqp = iter->qp; - struct rvt_qp *qp; - - for (; n < dev->rdi.qp_dev->qp_table_size; n++) { - if (pqp) - qp = rcu_dereference(pqp->next); - else - qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); - pqp = qp; - if (qp) { - iter->qp = qp; - iter->n = n; - return 0; - } - } - return ret; -} - static const char * const qp_type_str[] = { "SMI", "GSI", "RC", "UC", "UD", }; -void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +/** + * qib_qp_iter_print - print information to seq_file + * @s - the seq_file + * @iter - the iterator + */ +void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { struct rvt_swqe *wqe; struct rvt_qp *qp = iter->qp; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 95e370192948..f887737ac142 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -282,13 +282,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait); #ifdef CONFIG_DEBUG_FS -struct qib_qp_iter; - -struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); - -int qib_qp_iter_next(struct qib_qp_iter *iter); - -void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); +void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); #endif -- cgit v1.2.3-59-g8ed1b From 1a56ff6daab1e062aadec582eb10e7090f0b370a Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:04 +0300 Subject: IB/core: Separate CQ handle in SRQ context Before this change CQ attached to SRQ was part of XRC specific extension. Moving CQ handle out makes it available to other types extending SRQ functionality. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 27 +++++++++++++++++---------- drivers/infiniband/core/verbs.c | 16 +++++++++------- drivers/infiniband/hw/mlx4/srq.c | 4 ++-- drivers/infiniband/hw/mlx5/main.c | 10 +++++----- drivers/infiniband/hw/mlx5/srq.c | 11 +++++++---- include/rdma/ib_verbs.h | 31 ++++++++++++++++++++----------- 6 files changed, 60 insertions(+), 39 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8e9fea03dec4..9f690af46a7e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3497,10 +3497,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + } - attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle, - file->ucontext); - if (!attr.ext.xrc.cq) { + if (ib_srq_has_cq(cmd->srq_type)) { + attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle, + file->ucontext); + if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } @@ -3535,10 +3537,13 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + if (ib_srq_has_cq(cmd->srq_type)) { + srq->ext.cq = attr.ext.cq; + atomic_inc(&attr.ext.cq->usecnt); + } + if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.cq = attr.ext.xrc.cq; srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.cq->usecnt); atomic_inc(&attr.ext.xrc.xrcd->usecnt); } @@ -3561,10 +3566,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, goto err_copy; } - if (cmd->srq_type == IB_SRQT_XRC) { + if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); - uobj_put_obj_read(attr.ext.xrc.cq); - } + + if (ib_srq_has_cq(cmd->srq_type)) + uobj_put_obj_read(attr.ext.cq); + uobj_put_obj_read(pd); uobj_alloc_commit(&obj->uevent.uobject); @@ -3577,8 +3584,8 @@ err_put: uobj_put_obj_read(pd); err_put_cq: - if (cmd->srq_type == IB_SRQT_XRC) - uobj_put_obj_read(attr.ext.xrc.cq); + if (ib_srq_has_cq(cmd->srq_type)) + uobj_put_obj_read(attr.ext.cq); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b29d0ff94463..ecb6c395f19b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -622,11 +622,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } if (srq->srq_type == IB_SRQT_XRC) { srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; - srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; atomic_inc(&srq->ext.xrc.xrcd->usecnt); - atomic_inc(&srq->ext.xrc.cq->usecnt); } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); @@ -667,18 +669,18 @@ int ib_destroy_srq(struct ib_srq *srq) pd = srq->pd; srq_type = srq->srq_type; - if (srq_type == IB_SRQT_XRC) { + if (ib_srq_has_cq(srq_type)) + cq = srq->ext.cq; + if (srq_type == IB_SRQT_XRC) xrcd = srq->ext.xrc.xrcd; - cq = srq->ext.xrc.cq; - } ret = srq->device->destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) { + if (srq_type == IB_SRQT_XRC) atomic_dec(&xrcd->usecnt); + if (ib_srq_has_cq(srq_type)) atomic_dec(&cq->usecnt); - } } return ret; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index dd7a2fce9df4..ebee56cbc0e2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -178,8 +178,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - cqn = (init_attr->srq_type == IB_SRQT_XRC) ? - to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_mcq(init_attr->ext.cq)->mcq.cqn : 0; xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : (u16) dev->dev->caps.reserved_xrcds; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 62e6298810e7..7ad585257fd3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3217,7 +3217,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; - attr.ext.xrc.cq = devr->c0; + attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); @@ -3232,9 +3232,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; - devr->s0->ext.xrc.cq = devr->c0; + devr->s0->ext.cq = devr->c0; atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); - atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -3253,9 +3253,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s1->event_handler = NULL; devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; - devr->s1->ext.xrc.cq = devr->c0; + devr->s1->ext.cq = devr->c0; atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s0->usecnt, 0); + atomic_set(&devr->s1->usecnt, 0); for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { INIT_WORK(&devr->ports[port].pkey_change_work, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 30b3ddd8e1ab..e6be4f2927a7 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -292,13 +292,16 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) { + + if (init_attr->srq_type == IB_SRQT_XRC) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; - } else if (init_attr->srq_type == IB_SRQT_BASIC) { + else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + + if (ib_srq_has_cq(init_attr->srq_type)) + in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; + else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - } in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cab0bdcfad51..f0e46757185b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -994,6 +994,11 @@ enum ib_srq_type { IB_SRQT_XRC }; +static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) +{ + return srq_type == IB_SRQT_XRC; +} + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -1011,11 +1016,13 @@ struct ib_srq_init_attr { struct ib_srq_attr attr; enum ib_srq_type srq_type; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + } xrc; + }; } ext; }; @@ -1554,12 +1561,14 @@ struct ib_srq { enum ib_srq_type srq_type; atomic_t usecnt; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - u32 srq_num; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + u32 srq_num; + } xrc; + }; } ext; }; -- cgit v1.2.3-59-g8ed1b From eb761894351d0372248f2636c213d7b822e8775f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:09 +0300 Subject: IB/mlx5: Fill XRQ capabilities Provide driver specific values for XRQ capabilities. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 10 ++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7ad585257fd3..ab3c562d5ba7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -777,6 +777,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (MLX5_CAP_GEN(mdev, tag_matching)) { + props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; + props->xrq_caps.max_num_tags = + (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; + props->xrq_caps.flags = IB_TM_CAP_RC; + props->xrq_caps.max_ops = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; + } + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { resp.cqe_comp_caps.max_num = MLX5_CAP_GEN(dev->mdev, cqe_compression) ? diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b3380e8beacf..189e80cd6b2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -107,6 +107,11 @@ enum { MLX5_CQE_VERSION_V1, }; +enum { + MLX5_TM_MAX_RNDV_MSG_SIZE = 64, + MLX5_TM_MAX_SGE = 1, +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; -- cgit v1.2.3-59-g8ed1b From 3fd3307ef34fc9f7198af9249c763cf7a4ac653f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:11 +0300 Subject: IB/mlx5: Support IB_SRQT_TM Pass to mlx5_core flag to enable rendezvous offload, list_size and CQ when SRQ created with IB_SRQT_TM. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 9 +++++++-- drivers/infiniband/hw/mlx5/srq.c | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d6df88a78d5e..acb79d3a4f1d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1729,10 +1729,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); - else + } else { MLX5_SET(qpc, qpc, no_sq, 1); + if (init_attr->srq && + init_attr->srq->srq_type == IB_SRQT_TM) + MLX5_SET(qpc, qpc, offload_type, + MLX5_QPC_OFFLOAD_TYPE_RNDV); + } /* Set default resources */ switch (init_attr->qp_type) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e6be4f2927a7..6d5fadad9090 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -101,7 +101,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (in->type == IB_SRQT_XRC) { + if (in->type != IB_SRQT_BASIC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -145,7 +145,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = uidx; return 0; @@ -205,7 +205,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; @@ -298,6 +298,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + if (init_attr->srq_type == IB_SRQT_TM) { + in.tm_log_list_size = + ilog2(init_attr->ext.tag_matching.max_num_tags) + 1; + if (in.tm_log_list_size > + MLX5_CAP_GEN(dev->mdev, log_tag_matching_list_sz)) { + mlx5_ib_dbg(dev, "TM SRQ max_num_tags exceeding limit\n"); + err = -EINVAL; + goto err_usr_kern_srq; + } + in.flags |= MLX5_SRQ_FLAG_RNDV; + } + if (ib_srq_has_cq(init_attr->srq_type)) in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; else -- cgit v1.2.3-59-g8ed1b From 72f9b089ecd2cc2194d27cbb14fd80a0b1472e89 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Tue, 29 Aug 2017 15:51:29 -0700 Subject: RDMA/vmw_pvrdma: Report network header type in WC We should report the network header type in the work completion so that the kernel can infer the right RoCE type headers. Reviewed-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 1 + include/uapi/rdma/vmw_pvrdma-abi.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 90aa326fd7c0..8a12dc73b68e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -389,6 +389,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; + wc->network_hdr_type = cqe->network_hdr_type; /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c8c1d2d6df4d..c6569b0032ec 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -125,7 +125,8 @@ enum pvrdma_wc_flags { PVRDMA_WC_IP_CSUM_OK = 1 << 3, PVRDMA_WC_WITH_SMAC = 1 << 4, PVRDMA_WC_WITH_VLAN = 1 << 5, - PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN, + PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6, + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; struct pvrdma_alloc_ucontext_resp { @@ -283,7 +284,8 @@ struct pvrdma_cqe { __u8 dlid_path_bits; __u8 port_num; __u8 smac[6]; - __u8 reserved2[7]; /* Pad to next power of 2 (64). */ + __u8 network_hdr_type; + __u8 reserved2[6]; /* Pad to next power of 2 (64). */ }; #endif /* __VMW_PVRDMA_ABI_H__ */ -- cgit v1.2.3-59-g8ed1b From 14d6c3a83fbcd9c3d19e24d8d5820a912f2615c9 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Tue, 29 Aug 2017 15:51:30 -0700 Subject: RDMA/vmw_pvrdma: Fix a signedness Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 8a12dc73b68e..3562c0c30492 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -299,7 +299,7 @@ static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) { - int head; + unsigned int head; int has_data; if (!cq->is_kernel) -- cgit v1.2.3-59-g8ed1b