diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-07-20 07:07:56 +0300 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-07-20 07:07:56 +0300 |
commit | c39f2d9db0fd81ea20bb5cce9b3f082ca63753e2 (patch) | |
tree | 8e80ed5601b4fb8880a2ca8e08802bc8b1f850bd /drivers/infiniband/hw/hfi1 | |
parent | Merge branch 'next' into for-linus (diff) | |
parent | Input: alps - fix a mismatch between a condition check and its comment (diff) | |
download | linux-dev-c39f2d9db0fd81ea20bb5cce9b3f082ca63753e2.tar.xz linux-dev-c39f2d9db0fd81ea20bb5cce9b3f082ca63753e2.zip |
Merge branch 'next' into for-linus
Prepare second round of input updates for 5.3 merge window.
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
32 files changed, 529 insertions, 281 deletions
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index 7b146b67a80f..0653f4f7b26c 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HFI1 tristate "Intel OPA Gen1 support" depends on X86_64 && INFINIBAND_RDMAVT && I2C diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9784c6c0d2ec..d5b643a1d9fd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4104,6 +4104,9 @@ def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), +[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), +[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, CNTR_NORMAL), [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT, @@ -8365,7 +8368,6 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd) struct hfi1_devdata *dd = rcd->dd; u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg); - mmiowb(); /* make sure everything before is written */ write_csr(dd, addr, rcd->imask); /* force the above write on the chip and get a value back */ (void)read_csr(dd, addr); @@ -9848,6 +9850,7 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) /* disable the port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + cancel_work_sync(&ppd->freeze_work); } static inline int init_cpu_counters(struct hfi1_devdata *dd) @@ -11803,12 +11806,10 @@ void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, << RCV_EGR_INDEX_HEAD_HEAD_SHIFT; write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg); } - mmiowb(); reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) | (((u64)hd & RCV_HDR_HEAD_HEAD_MASK) << RCV_HDR_HEAD_HEAD_SHIFT); write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg); - mmiowb(); } u32 hdrqempty(struct hfi1_ctxtdata *rcd) @@ -13297,15 +13298,18 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); - * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 2. FECN (num_kernel_context - 1 + num_user_contexts + + * num_vnic_contexts); * 3. VNIC (num_vnic_contexts). - * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * It should be noted that FECN oversubscribe num_vnic_contexts * entries of RMT because both VNIC and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (HFI1_CAP_IS_KSET(TID_RDMA)) + rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, @@ -14028,6 +14032,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd) } /** + * hfi1_get_qp_map + * @dd: device data + * @idx: index to read + */ +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) +{ + u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8); + + reg >>= (idx % 8) * 8; + return reg; +} + +/** * init_qpmap_table * @dd - device data * @first_ctxt - first context @@ -14288,37 +14305,43 @@ bail: init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); } -static void init_user_fecn_handling(struct hfi1_devdata *dd, - struct rsm_map_table *rmt) +static void init_fecn_handling(struct hfi1_devdata *dd, + struct rsm_map_table *rmt) { struct rsm_rule_data rrd; u64 reg; - int i, idx, regoff, regidx; + int i, idx, regoff, regidx, start; u8 offset; u32 total_cnt; + if (HFI1_CAP_IS_KSET(TID_RDMA)) + /* Exclude context 0 */ + start = 1; + else + start = dd->first_dyn_alloc_ctxt; + + total_cnt = dd->num_rcv_contexts - start; + /* there needs to be enough room in the map table */ - total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { - dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); + dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n"); return; } /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). + * in the range start...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. * There are only enough bits in offset for the table size, so * start with that to allow for a "negative" offset. */ - offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_dyn_alloc_ctxt); + offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start); - for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; - i < dd->num_rcv_contexts; i++, idx++) { + for (i = start, idx = rmt->used; i < dd->num_rcv_contexts; + i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; regidx = idx / 8; @@ -14440,7 +14463,7 @@ static void init_rxe(struct hfi1_devdata *dd) rmt = alloc_rsm_map_table(dd); /* set up QOS, including the QPN map table */ init_qos(dd, rmt); - init_user_fecn_handling(dd, rmt); + init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); /* record number of used rsm map entries for vnic */ dd->vnic.rmt_start = rmt->used; @@ -14666,8 +14689,8 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) */ static int init_asic_data(struct hfi1_devdata *dd) { - unsigned long flags; - struct hfi1_devdata *tmp, *peer = NULL; + unsigned long index; + struct hfi1_devdata *peer; struct hfi1_asic_data *asic_data; int ret = 0; @@ -14676,14 +14699,12 @@ static int init_asic_data(struct hfi1_devdata *dd) if (!asic_data) return -ENOMEM; - spin_lock_irqsave(&hfi1_devs_lock, flags); + xa_lock_irq(&hfi1_dev_table); /* Find our peer device */ - list_for_each_entry(tmp, &hfi1_dev_list, list) { - if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) && - dd->unit != tmp->unit) { - peer = tmp; + xa_for_each(&hfi1_dev_table, index, peer) { + if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) && + dd->unit != peer->unit) break; - } } if (peer) { @@ -14695,7 +14716,7 @@ static int init_asic_data(struct hfi1_devdata *dd) mutex_init(&dd->asic_data->asic_resource_mutex); } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irq(&hfi1_dev_table); /* first one through - set up i2c devices */ if (!peer) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6c27c1c6a868..b76cf81f927f 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -858,6 +858,9 @@ static inline int idx_from_vl(int vl) /* Per device counter indexes */ enum { C_RCV_OVF = 0, + C_RX_LEN_ERR, + C_RX_ICRC_ERR, + C_RX_EBP, C_RX_TID_FULL, C_RX_TID_INVALID, C_RX_TID_FLGMS, @@ -1442,6 +1445,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd); void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); void reset_interrupts(struct hfi1_devdata *dd); +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c0800ea5a3f8..ab3589d17aee 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -380,6 +380,9 @@ #define DC_LCB_PRF_TX_FLIT_CNT (DC_LCB_CSRS + 0x000000000418) #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) +#define RCV_LENGTH_ERR_CNT 0 +#define RCV_ICRC_ERR_CNT 6 +#define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 #define RCV_CONTEXT_EGR_STALL 22 #define RCV_DATA_PKT_CNT 0 diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7310a5dba420..d47da7b0438f 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -286,7 +286,7 @@ struct diag_pkt { #define RHF_TID_ERR (0x1ull << 59) #define RHF_LEN_ERR (0x1ull << 60) #define RHF_ECC_ERR (0x1ull << 61) -#define RHF_VCRC_ERR (0x1ull << 62) +#define RHF_RESERVED (0x1ull << 62) #define RHF_ICRC_ERR (0x1ull << 63) #define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 427ba0ce74a5..15efb4a380b2 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1080,6 +1080,77 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp) return __qsfp_debugfs_release(in, fp, 1); } +#define EXPROM_WRITE_ENABLE BIT_ULL(14) + +static bool exprom_wp_disabled; + +static int exprom_wp_set(struct hfi1_devdata *dd, bool disable) +{ + u64 gpio_val = 0; + + if (disable) { + gpio_val = EXPROM_WRITE_ENABLE; + exprom_wp_disabled = true; + dd_dev_info(dd, "Disable Expansion ROM Write Protection\n"); + } else { + exprom_wp_disabled = false; + dd_dev_info(dd, "Enable Expansion ROM Write Protection\n"); + } + + write_csr(dd, ASIC_GPIO_OUT, gpio_val); + write_csr(dd, ASIC_GPIO_OE, gpio_val); + + return 0; +} + +static ssize_t exprom_wp_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t exprom_wp_debugfs_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + char cdata; + + if (count != 1) + return -EINVAL; + if (get_user(cdata, buf)) + return -EFAULT; + if (cdata == '0') + exprom_wp_set(ppd->dd, false); + else if (cdata == '1') + exprom_wp_set(ppd->dd, true); + else + return -EINVAL; + + return 1; +} + +static unsigned long exprom_in_use; + +static int exprom_wp_debugfs_open(struct inode *in, struct file *fp) +{ + if (test_and_set_bit(0, &exprom_in_use)) + return -EBUSY; + + return 0; +} + +static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) +{ + struct hfi1_pportdata *ppd = private2ppd(fp); + + if (exprom_wp_disabled) + exprom_wp_set(ppd->dd, false); + clear_bit(0, &exprom_in_use); + + return 0; +} + #define DEBUGFS_OPS(nm, readroutine, writeroutine) \ { \ .name = nm, \ @@ -1119,6 +1190,9 @@ static const struct counter_info port_cntr_ops[] = { qsfp1_debugfs_open, qsfp1_debugfs_release), DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), + DEBUGFS_XOPS("exprom_wp", exprom_wp_debugfs_read, + exprom_wp_debugfs_write, exprom_wp_debugfs_open, + exprom_wp_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), @@ -1302,15 +1376,15 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v) static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..01aa1f132f55 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -72,8 +72,6 @@ */ const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(hfi1_devs_lock); -LIST_HEAD(hfi1_dev_list); DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; @@ -175,11 +173,11 @@ int hfi1_count_active_units(void) { struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -190,7 +188,7 @@ int hfi1_count_active_units(void) } } } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return nunits_active; } @@ -264,7 +262,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, hfi1_dbg_fault_suppress_err(verbs_dev)) return; - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return; if (packet->etype == RHF_RCV_TYPE_BYPASS) { @@ -516,7 +514,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, */ do_cnp = prescan || (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && - opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(ACK); /* Call appropriate CNP handler */ if (!ignore_fecn && do_cnp && fecn) @@ -1581,7 +1581,7 @@ static void show_eflags_errs(struct hfi1_packet *packet) u32 rte = rhf_rcv_type_err(packet->rhf); dd_dev_err(rcd->dd, - "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", + "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n", rcd->ctxt, packet->rhf, packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", @@ -1589,7 +1589,6 @@ static void show_eflags_errs(struct hfi1_packet *packet) packet->rhf & RHF_TID_ERR ? "tid " : "", packet->rhf & RHF_LEN_ERR ? "len " : "", packet->rhf & RHF_ECC_ERR ? "ecc " : "", - packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", packet->rhf & RHF_ICRC_ERR ? "icrc " : "", rte); } diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 1be49a0d9c11..e9d5cc8b771a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - kfree(rcd->groups); rcd->groups = NULL; hfi1_exp_tid_group_init(rcd); diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 3fd3315d0fb0..93613e5def9b 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -153,6 +153,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, char *dash; unsigned long range_start, range_end, i; bool remove = false; + unsigned long bound = 1U << BITS_PER_BYTE; end = strchr(ptr, ','); if (end) @@ -178,6 +179,10 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, BITS_PER_BYTE); break; } + /* Check the inputs */ + if (range_start >= bound || range_end >= bound) + break; + for (i = range_start; i <= range_end; i++) { if (remove) clear_bit(i, fault->opcodes); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 048b5d73ba39..fa45350a9a1d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -54,7 +54,6 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/idr.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/completion.h> @@ -65,6 +64,7 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <linux/xarray.h> #include <rdma/ib_hdrs.h> #include <rdma/opa_addr.h> #include <linux/rhashtable.h> @@ -539,6 +539,37 @@ static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt, mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK); } +/** + * hfi1_get_rc_ohdr - get extended header + * @opah - the opaheader + */ +static inline struct ib_other_headers * +hfi1_get_rc_ohdr(struct hfi1_opa_header *opah) +{ + struct ib_other_headers *ohdr; + struct ib_header *hdr = NULL; + struct hfi1_16b_header *hdr_16b = NULL; + + /* Find out where the BTH is */ + if (opah->hdr_type == HFI1_PKT_TYPE_9B) { + hdr = &opah->ibh; + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + } else { + u8 l4; + + hdr_16b = &opah->opah; + l4 = hfi1_16B_get_l4(hdr_16b); + if (l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr_16b->u.oth; + else + ohdr = &hdr_16b->u.l.oth; + } + return ohdr; +} + struct rvt_sge_state; /* @@ -1021,8 +1052,8 @@ struct hfi1_asic_data { struct hfi1_vnic_data { struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; + struct xarray vesws; u8 num_vports; - struct idr vesw_idr; u8 rmt_start; u8 num_ctxt; }; @@ -1041,7 +1072,6 @@ struct sdma_vl_map; typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ - struct list_head list; /* pointers to related structs for this device */ /* pci access data structure */ struct pci_dev *pcidev; @@ -1426,8 +1456,7 @@ struct hfi1_filedata { struct mm_struct *mm; }; -extern struct list_head hfi1_dev_list; -extern spinlock_t hfi1_devs_lock; +extern struct xarray hfi1_dev_table; struct hfi1_devdata *hfi1_lookup(int unit); static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index faaaac8fbc55..71cb9525c074 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -49,7 +49,7 @@ #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/delay.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> @@ -124,7 +124,7 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); -static struct idr hfi1_unit_table; +DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -469,7 +469,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, if (rcd->egrbufs.size < hfi1_max_mtu) { rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); hfi1_cdbg(PROC, - "ctxt%u: eager bufs size too small. Adjusting to %zu\n", + "ctxt%u: eager bufs size too small. Adjusting to %u\n", rcd->ctxt, rcd->egrbufs.size); } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; @@ -805,7 +805,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) @@ -1018,21 +1019,9 @@ done: return ret; } -static inline struct hfi1_devdata *__hfi1_lookup(int unit) -{ - return idr_find(&hfi1_unit_table, unit); -} - struct hfi1_devdata *hfi1_lookup(int unit) { - struct hfi1_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - dd = __hfi1_lookup(unit); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - return dd; + return xa_load(&hfi1_dev_table, unit); } /* @@ -1200,7 +1189,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be - * holding hfi1_devs_lock. + * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { @@ -1236,13 +1225,10 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) struct hfi1_asic_data *ad; unsigned long flags; - spin_lock_irqsave(&hfi1_devs_lock, flags); - if (!list_empty(&dd->list)) { - idr_remove(&hfi1_unit_table, dd->unit); - list_del_init(&dd->list); - } + xa_lock_irqsave(&hfi1_dev_table, flags); + __xa_erase(&hfi1_dev_table, dd->unit); ad = release_asic_data(dd); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); finalize_asic_data(dd, ad); free_platform_config(dd); @@ -1286,13 +1272,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct hfi1_devdata *dd; int ret, nports; @@ -1307,21 +1290,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pport = (struct hfi1_pportdata *)(dd + 1); dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - - INIT_LIST_HEAD(&dd->list); - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&hfi1_devs_lock, flags); - - ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &hfi1_dev_list); - } dd->node = NUMA_NO_NODE; - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { dev_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1522,8 +1494,6 @@ static int __init hfi1_mod_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&hfi1_unit_table); - hfi1_dbg_init(); ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { @@ -1534,7 +1504,6 @@ static int __init hfi1_mod_init(void) bail_dev: hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); dev_cleanup(); bail: return ret; @@ -1552,7 +1521,7 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy_all(); hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); + WARN_ON(!xa_empty(&hfi1_dev_table)); dispose_firmware(); /* asymmetric with obtain_firmware() */ dev_cleanup(); } @@ -2071,7 +2040,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.size = alloced_bytes; hfi1_cdbg(PROC, - "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", + "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n", rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 5f2011cabc25..62f93c1dc082 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -47,12 +47,14 @@ * for future transactions */ +#include <linux/workqueue.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdmavt_qp.h> + /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 #define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX -struct ib_atomic_eth; - enum hfi1_opfn_codes { STL_VERBS_EXTD_NONE = 0, STL_VERBS_EXTD_TID_RDMA, diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index a1de566fe95e..4e5c2d1b8cfa 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -952,6 +952,22 @@ void sc_disable(struct send_context *sc) } } spin_unlock(&sc->release_lock); + + write_seqlock(&sc->waitlock); + while (!list_empty(&sc->piowait)) { + struct iowait *wait; + struct rvt_qp *qp; + struct hfi1_qp_priv *priv; + + wait = list_first_entry(&sc->piowait, struct iowait, list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; + hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); + } + write_sequnlock(&sc->waitlock); + spin_unlock_irq(&sc->alloc_lock); } @@ -1427,7 +1443,8 @@ void sc_stop(struct send_context *sc, int flag) * @cb: optional callback to call when the buffer is finished sending * @arg: argument for cb * - * Return a pointer to a PIO buffer if successful, NULL if not enough room. + * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM + * when link is down. */ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, pio_release_cb cb, void *arg) @@ -1443,7 +1460,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, spin_lock_irqsave(&sc->alloc_lock, flags); if (!(sc->flags & SCF_ENABLED)) { spin_unlock_irqrestore(&sc->alloc_lock, flags); - goto done; + return ERR_PTR(-ECOMM); } retry: @@ -1578,7 +1595,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) sc_del_credit_return_intr(sc); trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); if (needint) { - mmiowb(); sc_return_credits(sc); } } diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index eba300330a02..4e0e9fc0a777 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -742,6 +742,8 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) iowait_wakeup, iowait_sdma_drained, hfi1_init_priority); + /* Init to a value to start the running average correctly */ + priv->s_running_pkt_size = piothreshold / 2; return priv; } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5991211d72bd..7c8cfb149da0 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* FALLTHROUGH */ case OP(ATOMIC_ACKNOWLEDGE): /* @@ -343,7 +340,8 @@ write_resp: break; e->sent = 1; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + /* Do not free e->rdma_sge until all data are received */ + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); break; case TID_OP(READ_RESP): @@ -1434,7 +1432,7 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn) pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, sc_to_vlt(ppd->dd, sc5), plen); pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL); - if (!pbuf) { + if (IS_ERR_OR_NULL(pbuf)) { /* * We have no room to send at the moment. Pass * responsibility for sending the ACK to the send engine @@ -1703,6 +1701,36 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) } } +/** + * hfi1_rc_verbs_aborted - handle abort status + * @qp: the QP + * @opah: the opa header + * + * This code modifies both ACK bit in BTH[2] + * and the s_flags to go into send one mode. + * + * This serves to throttle the send engine to only + * send a single packet in the likely case the + * a link has gone down. + */ +void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah) +{ + struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah); + u8 opcode = ib_bth_get_opcode(ohdr); + u32 psn; + + /* ignore responses */ + if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(WRITE_RESP)) + return; + + psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK; + ohdr->bth[2] = cpu_to_be32(psn); + qp->s_flags |= RVT_S_SEND_ONE; +} + /* * This should be called with the QP s_lock held and interrupts disabled. */ @@ -1711,8 +1739,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) struct ib_other_headers *ohdr; struct hfi1_qp_priv *priv = qp->priv; struct rvt_swqe *wqe; - struct ib_header *hdr = NULL; - struct hfi1_16b_header *hdr_16b = NULL; u32 opcode, head, tail; u32 psn; struct tid_rdma_request *req; @@ -1721,24 +1747,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK)) return; - /* Find out where the BTH is */ - if (priv->hdr_type == HFI1_PKT_TYPE_9B) { - hdr = &opah->ibh; - if (ib_get_lnh(hdr) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - } else { - u8 l4; - - hdr_16b = &opah->opah; - l4 = hfi1_16B_get_l4(hdr_16b); - if (l4 == OPA_16B_L4_IB_LOCAL) - ohdr = &hdr_16b->u.oth; - else - ohdr = &hdr_16b->u.l.oth; - } - + ohdr = hfi1_get_rc_ohdr(opah); opcode = ib_bth_get_opcode(ohdr); if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) || @@ -1836,7 +1845,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], @@ -1884,7 +1893,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; trdma_clean_swqe(qp, wqe); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); @@ -2643,10 +2652,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = get_ib_reth_vaddr(reth); @@ -3088,10 +3094,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); reth = &ohdr->u.rc.reth; len = be32_to_cpu(reth->length); if (len) { @@ -3166,10 +3169,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* Process OPFN special virtual address */ if (opfn) { opfn_conn_response(qp, e, ateth); diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h index 8e0935b9bf2a..5ed5e85d5841 100644 --- a/drivers/infiniband/hw/hfi1/rc.h +++ b/drivers/infiniband/hw/hfi1/rc.h @@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, return rvt_restart_sge(ss, wqe, len); } +static inline void release_rdma_sge_mr(struct rvt_ack_entry *e) +{ + if (e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } +} + struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev, u8 *prev_ack, bool *scheduled); int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 124a3ec1e15c..23ac6057b211 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -524,7 +524,7 @@ void _hfi1_do_send(struct work_struct *work) /** * hfi1_do_send - perform a send on a QP - * @work: contains a pointer to the QP + * @qp: a pointer to the QP * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..28b66bd70b74 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -405,19 +405,33 @@ static void sdma_flush(struct sdma_engine *sde) struct sdma_txreq *txp, *txp_next; LIST_HEAD(flushlist); unsigned long flags; + uint seq; /* flush from head to tail */ sdma_flush_descq(sde); spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */ - list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) { - list_del_init(&txp->list); - list_add_tail(&txp->list, &flushlist); - } + list_splice_init(&sde->flushlist, &flushlist); spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); + /* wakeup QPs orphaned on the dmawait list */ + do { + struct iowait *w, *nw; + + seq = read_seqbegin(&sde->waitlock); + if (!list_empty(&sde->dmawait)) { + write_seqlock(&sde->waitlock); + list_for_each_entry_safe(w, nw, &sde->dmawait, list) { + if (w->wakeup) { + w->wakeup(w, SDMA_AVAIL_REASON); + list_del_init(&w->list); + } + } + write_sequnlock(&sde->waitlock); + } + } while (read_seqretry(&sde->waitlock, seq)); } /* @@ -2413,7 +2427,7 @@ unlock_noconn: list_add_tail(&tx->list, &sde->flushlist); spin_unlock(&sde->flushlist_lock); iowait_inc_wait_count(wait, tx->num_desc); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto unlock; nodesc: @@ -2511,7 +2525,7 @@ unlock_noconn: iowait_inc_wait_count(wait, tx->num_desc); } spin_unlock(&sde->flushlist_lock); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto update_tail; nodesc: diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 43cbce7a19ea..aa9c8d3ef87b 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -67,8 +67,6 @@ static u32 mask_generation(u32 a) #define TID_RDMA_DESTQP_FLOW_SHIFT 11 #define TID_RDMA_DESTQP_FLOW_MASK 0x1f -#define TID_FLOW_SW_PSN BIT(0) - #define TID_OPFN_QP_CTXT_MASK 0xff #define TID_OPFN_QP_CTXT_SHIFT 56 #define TID_OPFN_QP_KDETH_MASK 0xff @@ -128,6 +126,15 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn); +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -305,9 +312,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, if (qp->ibqp.qp_num == 0) ctxt = 0; else - ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % - (dd->n_krcv_queues - 1)) + 1; - + ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift); return dd->rcd[ctxt]; } @@ -776,7 +781,6 @@ int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp) rcd->flows[fs->index].generation = fs->generation; fs->generation = kern_setup_hw_flow(rcd, fs->index); fs->psn = 0; - fs->flags = 0; dequeue_tid_waiter(rcd, &rcd->flow_queue, qp); /* get head before dropping lock */ fqp = first_qp(rcd, &rcd->flow_queue); @@ -1807,6 +1811,7 @@ sync_check: goto done; hfi1_kern_clear_hw_flow(req->rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; req->state = TID_REQUEST_ACTIVE; } @@ -2036,10 +2041,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, if (psn != e->psn || len != req->total_len) goto unlock; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); vaddr = get_ib_reth_vaddr(reth); @@ -2238,7 +2240,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) struct ib_reth *reth; struct hfi1_qp_priv *qpriv = qp->priv; u32 bth0, psn, len, rkey; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -2248,7 +2250,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_read_req(qp, psn); @@ -2267,9 +2269,8 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* We've verified the request, insert it into the ack queue. */ @@ -2285,10 +2286,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); qp->r_len = len; @@ -2324,11 +2322,11 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -2345,8 +2343,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -2463,12 +2459,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 opcode, aeth; - bool is_fecn; + bool fecn; unsigned long flags; u32 kpsn, ipsn; trace_hfi1_sender_rcv_tid_read_resp(qp); - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); kpsn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -2481,8 +2477,43 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ - if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) + if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) + goto ack_done; + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto ack_op_err; + len = restart_sge(&ss, req->e.swqe, ipsn, pmtu); + if (unlikely(len < pmtu)) + goto ack_op_err; + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->s_flags |= HFI1_R_TID_SW_PSN; + } + goto ack_done; + } + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); req->ack_pending--; priv->pending_tid_r_segs--; qp->s_num_rd_atomic--; @@ -2524,6 +2555,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) req->comp_seg == req->cur_seg) || priv->tid_r_comp == priv->tid_r_reqs) { hfi1_kern_clear_hw_flow(priv->rcd, qp); + priv->s_flags &= ~HFI1_R_TID_SW_PSN; if (req->state == TID_REQUEST_SYNC) req->state = TID_REQUEST_ACTIVE; } @@ -2545,8 +2577,6 @@ ack_op_err: ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) @@ -2773,9 +2803,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return ret; } - if (priv->flow_state.flags & TID_FLOW_SW_PSN) { + if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, - priv->flow_state.r_next_psn); + flow->flow_state.r_next_psn); if (diff > 0) { if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) restart_tid_rdma_read_req(rcd, @@ -2811,22 +2841,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, qp->r_flags &= ~RVT_R_RDMAR_SEQ; } - priv->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); } else { - u64 reg; u32 last_psn; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - last_psn = mask_psn(reg); - - priv->flow_state.r_next_psn = last_psn; - priv->flow_state.flags |= TID_FLOW_SW_PSN; + last_psn = read_r_next_psn(dd, rcd->ctxt, + flow->idx); + flow->flow_state.r_next_psn = last_psn; + priv->s_flags |= HFI1_R_TID_SW_PSN; /* * If no request has been restarted yet, * restart the current one. @@ -2891,10 +2914,11 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, struct rvt_ack_entry *e; struct tid_rdma_request *req; struct tid_rdma_flow *flow; + int diff = 0; trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return ret; packet->ohdr = &hdr->u.oth; @@ -2974,17 +2998,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, switch (rte) { case RHF_RTE_EXPECTED_FLOW_SEQ_ERR: if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) { - u64 reg; - qpriv->s_flags |= HFI1_R_TID_SW_PSN; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - flow->flow_state.r_next_psn = mask_psn(reg); + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, + flow->idx); qpriv->r_next_psn_kdeth = flow->flow_state.r_next_psn; goto nak_psn; @@ -2997,10 +3014,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, * mismatch could be due to packets that were * already in flight. */ - if (psn != flow->flow_state.r_next_psn) { - psn = flow->flow_state.r_next_psn; + diff = cmp_psn(psn, + flow->flow_state.r_next_psn); + if (diff > 0) goto nak_psn; - } + else if (diff < 0) + break; qpriv->s_nak_state = 0; /* @@ -3011,8 +3030,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (psn == full_flow_psn(flow, flow->flow_state.lpsn)) ret = false; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); qpriv->r_next_psn_kdeth = - ++flow->flow_state.r_next_psn; + flow->flow_state.r_next_psn; } break; @@ -3517,8 +3538,10 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->r_tid_alloc == qpriv->r_tid_head) { /* If all data has been received, clear the flow */ if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS && - !qpriv->alloc_w_segs) + !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + } break; } @@ -3544,8 +3567,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->sync_pt && !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); qpriv->sync_pt = false; - if (qpriv->s_flags & HFI1_R_TID_SW_PSN) - qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; } /* Allocate flow if we don't have one */ @@ -3687,7 +3709,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) struct hfi1_qp_priv *qpriv = qp->priv; struct tid_rdma_request *req; u32 bth0, psn, len, rkey, num_segs; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -3696,7 +3718,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_write_req(qp, psn); @@ -3713,9 +3735,8 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len); diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* @@ -3751,10 +3772,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) goto update_head; } - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* The length needs to be in multiples of PAGE_SIZE */ if (!len || len & ~PAGE_MASK) @@ -3834,11 +3852,11 @@ update_head: /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -3855,8 +3873,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -4073,10 +4089,10 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) struct tid_rdma_flow *flow; enum ib_wc_status status; u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4216,7 +4232,6 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) qpriv->s_tid_cur = i; } qp->s_flags &= ~HFI1_S_WAIT_TID_RESP; - hfi1_schedule_tid_send(qp); goto ack_done; @@ -4225,9 +4240,9 @@ ack_op_err: ack_err: rvt_error_qp(qp, status); ack_done: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, @@ -4307,7 +4322,9 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) unsigned long flags; u32 psn, next; u8 opcode; + bool fecn; + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4320,9 +4337,53 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) req = ack_to_tid_req(e); flow = &req->flows[req->clear_tail]; if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; - flow->flow_state.r_next_psn++; + + flow->flow_state.r_next_psn = mask_psn(psn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto send_nak; + len = req->comp_seg * req->seg_len; + len += delta_psn(psn, + full_flow_psn(flow, flow->flow_state.spsn)) * + pmtu; + if (unlikely(req->total_len - len < pmtu)) + goto send_nak; + + /* + * The e->rdma_sge field is set when TID RDMA WRITE REQ + * is first received and is never modified thereafter. + */ + ss.sge = e->rdma_sge; + ss.sg_list = NULL; + ss.num_sge = 1; + ss.total_len = req->total_len; + rvt_skip_sge(&ss, len, false); + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->r_next_psn_kdeth = mask_psn(psn + 1); + priv->s_flags |= HFI1_R_TID_SW_PSN; + } goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); @@ -4347,6 +4408,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) priv->r_tid_ack = priv->r_tid_tail; if (opcode == TID_OP(WRITE_DATA_LAST)) { + release_rdma_sge_mr(e); for (next = priv->r_tid_tail + 1; ; next++) { if (next > rvt_size_atomic(&dev->rdi)) next = 0; @@ -4386,6 +4448,8 @@ done: hfi1_schedule_tid_send(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); return; @@ -4487,12 +4551,11 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; - bool is_fecn; unsigned long flags; u16 fidx; trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0); - is_fecn = process_ecn(qp, packet); + process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth); req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn)); @@ -4846,10 +4909,10 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) struct tid_rdma_flow *flow; struct tid_flow_state *fs = &qpriv->flow_state; u32 psn, generation, idx, gen_next; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT; @@ -4940,6 +5003,8 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->s_flags |= RVT_S_ACK_PENDING; hfi1_schedule_tid_send(qp); bail: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -5449,3 +5514,48 @@ bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e) } return false; } + +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) +{ + u64 reg; + + /* + * The only sane way to get the amount of + * progress is to read the HW flow state. + */ + reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); + return mask_psn(reg); +} + +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn) +{ + unsigned long flags; + + tid_rdma_rcv_error(packet, ohdr, qp, psn, diff); + if (fecn) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags |= RVT_S_ECN; + spin_unlock_irqrestore(&qp->s_lock, flags); + } +} + +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn) +{ + /* + * If a start/middle packet is delivered here due to + * RSM rule and FECN, we need to update the r_next_psn. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER && + !(priv->s_flags & HFI1_R_TID_SW_PSN)) { + struct hfi1_devdata *dd = rcd->dd; + + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, flow->idx); + } +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 53ab24ef4f02..1c536185261e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -76,10 +76,8 @@ struct tid_rdma_qp_params { struct tid_flow_state { u32 generation; u32 psn; - u32 r_next_psn; /* next PSN to be received (in TID space) */ u8 index; u8 last_index; - u8 flags; }; enum tid_rdma_req_state { diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index e62171fb7379..de7a87392b8d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -86,14 +86,14 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, * actual function to work and can not be in a macro. */ #define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ \ DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ TP_PROTO(const char *function, struct va_format *vaf), \ TP_ARGS(function, vaf)) #define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *func, char *fmt, ...)\ { \ struct va_format vaf = { \ .fmt = fmt, \ diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 548dfc45a407..4388b594ed1b 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "tid_r_comp %u pending_tid_r_segs %u " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \ - "fpsn 0x%x flow_flags 0x%x" + "fpsn 0x%x" #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \ @@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "pending_tid_w_segs %u sync_pt %s " \ "ps_nak_psn 0x%x ps_nak_state 0x%x " \ "prnr_nak_state 0x%x hw_flow_index %u generation "\ - "0x%x fpsn 0x%x flow_flags 0x%x resync %s" \ + "0x%x fpsn 0x%x resync %s" \ "r_next_psn_kdeth 0x%x" #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \ @@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) ), TP_fast_assign(/* assign */ struct hfi1_qp_priv *priv = qp->priv; @@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; ), TP_printk(/* print */ TID_READ_SENDER_PRN, @@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->s_state, __entry->hw_flow_index, __entry->generation, - __entry->fpsn, - __entry->flow_flags + __entry->fpsn ) ); @@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) __field(bool, resync) __field(u32, r_next_psn_kdeth) ), @@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; __entry->resync = priv->resync; __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth; ), @@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index, __entry->generation, __entry->fpsn, - __entry->flow_flags, __entry->resync ? "yes" : "no", __entry->r_next_psn_kdeth ) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f88ad425664a..4cb0fce5c096 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -683,7 +683,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) { + if (!IS_ERR_OR_NULL(pbuf)) { trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); @@ -738,7 +738,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) { + if (!IS_ERR_OR_NULL(pbuf)) { trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0cd71ce7cc71..3592a9ec155e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -324,6 +324,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + if (!PAGE_ALIGNED(tinfo->vaddr)) + return -EINVAL; + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 24b592c6522e..02eee8eff1db 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -104,8 +104,9 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np bool writable, struct page **pages) { int ret; + unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0); - ret = get_user_pages_fast(vaddr, npages, writable, pages); + ret = get_user_pages_fast(vaddr, npages, gup_flags, pages); if (ret < 0) return ret; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8bfbc6d7ea34..fd754a16475a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,20 +130,16 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct user_sdma_txreq *tx = - container_of(txreq, struct user_sdma_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) { - if (tx->busycount++ < MAX_DEFER_RETRY_COUNT) - goto eagain; - } + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) + goto eagain; /* * We are assuming that if the list is enqueued somewhere, it * is to the dmawait list since that is the only place where * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) { iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); @@ -151,6 +147,7 @@ static int defer_packet_queue( write_sequnlock(&sde->waitlock); return -EBUSY; eagain: + write_sequnlock(&sde->waitlock); return -EAGAIN; } @@ -804,7 +801,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) tx->flags = 0; tx->req = req; - tx->busycount = 0; INIT_LIST_HEAD(&tx->list); /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 14dfd757dafd..4d8510b0fc38 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -245,7 +245,6 @@ struct user_sdma_txreq { struct list_head list; struct user_sdma_request *req; u16 flags; - unsigned int busycount; u16 seqnum; }; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 55a56b3d7f83..bad3229bad37 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -638,6 +638,8 @@ static void verbs_sdma_complete( struct hfi1_opa_header *hdr; hdr = &tx->phdr.hdr; + if (unlikely(status == SDMA_TXREQ_S_ABORTED)) + hfi1_rc_verbs_aborted(qp, hdr); hfi1_rc_send_complete(qp, hdr); } spin_unlock(&qp->s_lock); @@ -1037,10 +1039,10 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(!pbuf)) { + if (unlikely(IS_ERR_OR_NULL(pbuf))) { if (cb) verbs_pio_complete(qp, 0); - if (ppd->host_link_state != HLS_UP_ACTIVE) { + if (IS_ERR(pbuf)) { /* * If we have filled the PIO buffers to capacity and are * not in an active state this request is not going to @@ -1095,15 +1097,15 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); rvt_send_complete(qp, qp->s_wqe, wc_status); - spin_unlock_irqrestore(&qp->s_lock, flags); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(wc_status == IB_WC_GENERAL_ERR)) + hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr); hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); } + spin_unlock_irqrestore(&qp->s_lock, flags); ret = 0; @@ -1223,15 +1225,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_UD: break; case IB_QPT_UC: - case IB_QPT_RC: { + case IB_QPT_RC: + priv->s_running_pkt_size = + (tx->s_cur_size + priv->s_running_pkt_size) / 2; if (piothreshold && - tx->s_cur_size <= min(piothreshold, qp->pmtu) && + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; - } default: break; } @@ -1355,8 +1358,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; @@ -1739,15 +1740,15 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 62ace0b2d17a..ae9582ddbc8f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -170,6 +170,7 @@ struct hfi1_qp_priv { struct tid_flow_state flow_state; struct tid_rdma_qp_params tid_rdma; struct rvt_qp *owner; + u16 s_running_pkt_size; u8 hdr_type; /* 9B or 16B */ struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */ atomic_t n_requests; /* # of TID RDMA requests in the */ @@ -415,6 +416,7 @@ void hfi1_rc_hdrerr( u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); +void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index c4ab2d5b4502..8f766dd3f61c 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -100,7 +100,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (tx) goto out; priv = qp->priv; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index b002e96eb335..bfa6e081cb56 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -72,6 +72,7 @@ struct hfi1_ibdev; struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp); +#define VERBS_TXREQ_GFP (GFP_ATOMIC | __GFP_NOWARN) static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) __must_hold(&qp->slock) @@ -79,7 +80,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct verbs_txreq *tx; struct hfi1_qp_priv *priv = qp->priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (unlikely(!tx)) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index a922db58be14..b49e60e8397d 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -162,12 +162,12 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, void hfi1_vnic_setup(struct hfi1_devdata *dd) { - idr_init(&dd->vnic.vesw_idr); + xa_init(&dd->vnic.vesws); } void hfi1_vnic_cleanup(struct hfi1_devdata *dd) { - idr_destroy(&dd->vnic.vesw_idr); + WARN_ON(!xa_empty(&dd->vnic.vesws)); } #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ @@ -423,8 +423,7 @@ tx_finish: static u16 hfi1_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); struct opa_vnic_skb_mdata *mdata; @@ -534,7 +533,7 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + vinfo = xa_load(&dd->vnic.vesws, vesw_id); /* * In case of invalid vesw id, count the error on @@ -542,9 +541,10 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - int id_tmp = 0; + unsigned long index = 0; - vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, + XA_PRESENT); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -598,8 +598,7 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) if (!vinfo->vesw_id) return -EINVAL; - rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, - vinfo->vesw_id + 1, GFP_NOWAIT); + rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); if (rc < 0) return rc; @@ -625,7 +624,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + xa_erase(&dd->vnic.vesws, vinfo->vesw_id); /* ensure irqs see the change */ msix_vnic_synchronize_irq(dd); |