From 270a9833b2697d455f6ae12669b8220d3ea48f18 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 26 Feb 2019 08:46:16 -0800 Subject: IB/hfi1: Add running average for adaptive pio The adaptive PIO implementation only considers the current packet size when deciding between SDMA and pio for a packet. This causes credit return forces if small and large packets are interleaved. Add a running average to avoid costly credit forces so that a large sequence of small packets is required to go below the threshold that chooses pio. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 2 ++ drivers/infiniband/hw/hfi1/verbs.c | 7 ++++--- drivers/infiniband/hw/hfi1/verbs.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9b643c2409cf..1390172b488e 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -742,6 +742,8 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) iowait_wakeup, iowait_sdma_drained, hfi1_init_priority); + /* Init to a value to start the running average correctly */ + priv->s_running_pkt_size = piothreshold / 2; return priv; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 55a56b3d7f83..b73ab7c64d91 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1223,15 +1223,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_UD: break; case IB_QPT_UC: - case IB_QPT_RC: { + case IB_QPT_RC: + priv->s_running_pkt_size = + (tx->s_cur_size + priv->s_running_pkt_size) / 2; if (piothreshold && - tx->s_cur_size <= min(piothreshold, qp->pmtu) && + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; - } default: break; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 62ace0b2d17a..7ecb8ed4a1d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -170,6 +170,7 @@ struct hfi1_qp_priv { struct tid_flow_state flow_state; struct tid_rdma_qp_params tid_rdma; struct rvt_qp *owner; + u16 s_running_pkt_size; u8 hdr_type; /* 9B or 16B */ struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */ atomic_t n_requests; /* # of TID RDMA requests in the */ -- cgit v1.2.3-59-g8ed1b From 920d10e45844d1448d4d279d07fa91e5a7cee4f1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Mar 2019 16:50:50 -0700 Subject: IB/hfi1: Fix two format strings Enable format string checking for hfi1_cdbg() and fix the resulting compiler warnings. Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 4 ++-- drivers/infiniband/hw/hfi1/trace_dbg.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index faaaac8fbc55..3c339617aa95 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -469,7 +469,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, if (rcd->egrbufs.size < hfi1_max_mtu) { rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); hfi1_cdbg(PROC, - "ctxt%u: eager bufs size too small. Adjusting to %zu\n", + "ctxt%u: eager bufs size too small. Adjusting to %u\n", rcd->ctxt, rcd->egrbufs.size); } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; @@ -2071,7 +2071,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.size = alloced_bytes; hfi1_cdbg(PROC, - "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", + "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n", rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index e62171fb7379..de7a87392b8d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -86,14 +86,14 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, * actual function to work and can not be in a macro. */ #define __hfi1_trace_def(lvl) \ -void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ \ DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ TP_PROTO(const char *function, struct va_format *vaf), \ TP_ARGS(function, vaf)) #define __hfi1_trace_fn(lvl) \ -void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ +void __printf(2, 3) __hfi1_trace_##lvl(const char *func, char *fmt, ...)\ { \ struct va_format vaf = { \ .fmt = fmt, \ -- cgit v1.2.3-59-g8ed1b From 0ee3b915b1b03c4deeea9d47af3e6a8e5e66c262 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 20 Feb 2019 16:20:57 -0800 Subject: hfi1: Convert vesw_idr to XArray Signed-off-by: Matthew Wilcox Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/vnic_main.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 048b5d73ba39..aafd8c377dd3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -1021,8 +1020,8 @@ struct hfi1_asic_data { struct hfi1_vnic_data { struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; struct kmem_cache *txreq_cache; + struct xarray vesws; u8 num_vports; - struct idr vesw_idr; u8 rmt_start; u8 num_ctxt; }; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index a922db58be14..4d5683919b1f 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -162,12 +162,12 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, void hfi1_vnic_setup(struct hfi1_devdata *dd) { - idr_init(&dd->vnic.vesw_idr); + xa_init(&dd->vnic.vesws); } void hfi1_vnic_cleanup(struct hfi1_devdata *dd) { - idr_destroy(&dd->vnic.vesw_idr); + WARN_ON(!xa_empty(&dd->vnic.vesws)); } #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ @@ -534,7 +534,7 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) l4_type = hfi1_16B_get_l4(packet->ebuf); if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); - vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + vinfo = xa_load(&dd->vnic.vesws, vesw_id); /* * In case of invalid vesw id, count the error on @@ -542,9 +542,10 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) */ if (unlikely(!vinfo)) { struct hfi1_vnic_vport_info *vinfo_tmp; - int id_tmp = 0; + unsigned long index = 0; - vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX, + XA_PRESENT); if (vinfo_tmp) { spin_lock(&vport_cntr_lock); vinfo_tmp->stats[0].netstats.rx_nohandler++; @@ -598,8 +599,7 @@ static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) if (!vinfo->vesw_id) return -EINVAL; - rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, - vinfo->vesw_id + 1, GFP_NOWAIT); + rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL); if (rc < 0) return rc; @@ -625,7 +625,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) clear_bit(HFI1_VNIC_UP, &vinfo->flags); netif_carrier_off(vinfo->netdev); netif_tx_disable(vinfo->netdev); - idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + xa_erase(&dd->vnic.vesws, vinfo->vesw_id); /* ensure irqs see the change */ msix_vnic_synchronize_irq(dd); -- cgit v1.2.3-59-g8ed1b From 03b92789e5cfdac66805c1a98f1ec67336199d56 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 8 Feb 2019 15:41:29 -0500 Subject: hfi1: Convert hfi1_unit_table to XArray Also remove hfi1_devs_list. Signed-off-by: Matthew Wilcox Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++------ drivers/infiniband/hw/hfi1/debugfs.c | 8 +++--- drivers/infiniband/hw/hfi1/driver.c | 10 +++---- drivers/infiniband/hw/hfi1/hfi.h | 5 ++-- drivers/infiniband/hw/hfi1/init.c | 52 +++++++----------------------------- drivers/infiniband/hw/hfi1/verbs.c | 8 +++--- 6 files changed, 31 insertions(+), 68 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 612f04190ed8..6150567c0b51 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14654,8 +14654,8 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) */ static int init_asic_data(struct hfi1_devdata *dd) { - unsigned long flags; - struct hfi1_devdata *tmp, *peer = NULL; + unsigned long index; + struct hfi1_devdata *peer; struct hfi1_asic_data *asic_data; int ret = 0; @@ -14664,14 +14664,12 @@ static int init_asic_data(struct hfi1_devdata *dd) if (!asic_data) return -ENOMEM; - spin_lock_irqsave(&hfi1_devs_lock, flags); + xa_lock_irq(&hfi1_dev_table); /* Find our peer device */ - list_for_each_entry(tmp, &hfi1_dev_list, list) { - if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) && - dd->unit != tmp->unit) { - peer = tmp; + xa_for_each(&hfi1_dev_table, index, peer) { + if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) && + dd->unit != peer->unit) break; - } } if (peer) { @@ -14683,7 +14681,7 @@ static int init_asic_data(struct hfi1_devdata *dd) mutex_init(&dd->asic_data->asic_resource_mutex); } dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irq(&hfi1_dev_table); /* first one through - set up i2c devices */ if (!peer) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 427ba0ce74a5..057bb82c664f 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1302,15 +1302,15 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v) static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..867b4e10018f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -72,8 +72,6 @@ */ const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; -DEFINE_SPINLOCK(hfi1_devs_lock); -LIST_HEAD(hfi1_dev_list); DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; @@ -175,11 +173,11 @@ int hfi1_count_active_units(void) { struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; - unsigned long flags; + unsigned long index, flags; int pidx, nunits_active = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -190,7 +188,7 @@ int hfi1_count_active_units(void) } } } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return nunits_active; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index aafd8c377dd3..b458c218842b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -1040,7 +1041,6 @@ struct sdma_vl_map; typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64); struct hfi1_devdata { struct hfi1_ibdev verbs_dev; /* must be first */ - struct list_head list; /* pointers to related structs for this device */ /* pci access data structure */ struct pci_dev *pcidev; @@ -1425,8 +1425,7 @@ struct hfi1_filedata { struct mm_struct *mm; }; -extern struct list_head hfi1_dev_list; -extern spinlock_t hfi1_devs_lock; +extern struct xarray hfi1_dev_table; struct hfi1_devdata *hfi1_lookup(int unit); static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 3c339617aa95..e4c2ae4f1cb3 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include @@ -124,7 +124,7 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); -static struct idr hfi1_unit_table; +DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -1018,21 +1018,9 @@ done: return ret; } -static inline struct hfi1_devdata *__hfi1_lookup(int unit) -{ - return idr_find(&hfi1_unit_table, unit); -} - struct hfi1_devdata *hfi1_lookup(int unit) { - struct hfi1_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - dd = __hfi1_lookup(unit); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - return dd; + return xa_load(&hfi1_dev_table, unit); } /* @@ -1200,7 +1188,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be - * holding hfi1_devs_lock. + * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { @@ -1236,13 +1224,10 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) struct hfi1_asic_data *ad; unsigned long flags; - spin_lock_irqsave(&hfi1_devs_lock, flags); - if (!list_empty(&dd->list)) { - idr_remove(&hfi1_unit_table, dd->unit); - list_del_init(&dd->list); - } + xa_lock_irqsave(&hfi1_dev_table, flags); + __xa_erase(&hfi1_dev_table, dd->unit); ad = release_asic_data(dd); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); finalize_asic_data(dd, ad); free_platform_config(dd); @@ -1286,13 +1271,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct hfi1_devdata *dd; int ret, nports; @@ -1307,21 +1289,10 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pport = (struct hfi1_pportdata *)(dd + 1); dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - - INIT_LIST_HEAD(&dd->list); - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&hfi1_devs_lock, flags); - - ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &hfi1_dev_list); - } dd->node = NUMA_NO_NODE; - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { dev_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); @@ -1522,8 +1493,6 @@ static int __init hfi1_mod_init(void) * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&hfi1_unit_table); - hfi1_dbg_init(); ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { @@ -1534,7 +1503,6 @@ static int __init hfi1_mod_init(void) bail_dev: hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); dev_cleanup(); bail: return ret; @@ -1552,7 +1520,7 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy_all(); hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); + WARN_ON(!xa_empty(&hfi1_dev_table)); dispose_firmware(); /* asymmetric with obtain_firmware() */ dev_cleanup(); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index b73ab7c64d91..1eb4105b2d22 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1740,15 +1740,15 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } -- cgit v1.2.3-59-g8ed1b From f6f3f532556e4fcaa2d259fd04a800bfb4f9670d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:30 -0700 Subject: IB/hfi1: Delay the release of destination mr for TID RDMA WRITE DATA The reference of destination memory region is first obtained when TID RDMA WRITE request is first received on the responder side. This reference is released once all TID RDMA WRITE RESP packets are sent to the requester side, even though not all TID RDMA WRITE DATA packets may have been received. This early release will especially be undesired if the software needs to access the destination memory before the last data packet is received. This patch delays the release of the MR until all TID RDMA DATA packets have been received. A helper function to release the reference is also created to simplify the code. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 23 ++++++----------------- drivers/infiniband/hw/hfi1/rc.h | 8 ++++++++ drivers/infiniband/hw/hfi1/tid_rdma.c | 16 ++++------------ 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5991211d72bd..5ba39a9f65ad 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, case OP(RDMA_READ_RESPONSE_LAST): case OP(RDMA_READ_RESPONSE_ONLY): e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* FALLTHROUGH */ case OP(ATOMIC_ACKNOWLEDGE): /* @@ -343,7 +340,8 @@ write_resp: break; e->sent = 1; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + /* Do not free e->rdma_sge until all data are received */ + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); break; case TID_OP(READ_RESP): @@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = get_ib_reth_vaddr(reth); @@ -3088,10 +3083,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); reth = &ohdr->u.rc.reth; len = be32_to_cpu(reth->length); if (len) { @@ -3166,10 +3158,7 @@ send_last: update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* Process OPFN special virtual address */ if (opfn) { opfn_conn_response(qp, e, ateth); diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h index 8e0935b9bf2a..5ed5e85d5841 100644 --- a/drivers/infiniband/hw/hfi1/rc.h +++ b/drivers/infiniband/hw/hfi1/rc.h @@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, return rvt_restart_sge(ss, wqe, len); } +static inline void release_rdma_sge_mr(struct rvt_ack_entry *e) +{ + if (e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } +} + struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev, u8 *prev_ack, bool *scheduled); int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val, diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index fdda33aca77f..145dd3348fdd 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2036,10 +2036,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, if (psn != e->psn || len != req->total_len) goto unlock; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); vaddr = get_ib_reth_vaddr(reth); @@ -2285,10 +2282,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); rkey = be32_to_cpu(reth->rkey); qp->r_len = len; @@ -3751,10 +3745,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) goto update_head; } - if (e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } + release_rdma_sge_mr(e); /* The length needs to be in multiples of PAGE_SIZE */ if (!len || len & ~PAGE_MASK) @@ -4347,6 +4338,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) priv->r_tid_ack = priv->r_tid_tail; if (opcode == TID_OP(WRITE_DATA_LAST)) { + release_rdma_sge_mr(e); for (next = priv->r_tid_tail + 1; ; next++) { if (next > rvt_size_atomic(&dev->rdi)) next = 0; -- cgit v1.2.3-59-g8ed1b From 6a40693a884dacae68c1771d369ad3be0594ba1c Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:40 -0700 Subject: IB/hfi1: Add a function to read next expected psn from hardware flow This patch adds a function to read next expected KDETH PSN from hardware flow to simplify the code. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 38 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 145dd3348fdd..d540e9cffa37 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -128,6 +128,7 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -2807,18 +2808,10 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, } priv->flow_state.r_next_psn++; } else { - u64 reg; u32 last_psn; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - last_psn = mask_psn(reg); - + last_psn = read_r_next_psn(dd, rcd->ctxt, + flow->idx); priv->flow_state.r_next_psn = last_psn; priv->flow_state.flags |= TID_FLOW_SW_PSN; /* @@ -2968,17 +2961,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, switch (rte) { case RHF_RTE_EXPECTED_FLOW_SEQ_ERR: if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) { - u64 reg; - qpriv->s_flags |= HFI1_R_TID_SW_PSN; - /* - * The only sane way to get the amount of - * progress is to read the HW flow state. - */ - reg = read_uctxt_csr(dd, rcd->ctxt, - RCV_TID_FLOW_TABLE + - (8 * flow->idx)); - flow->flow_state.r_next_psn = mask_psn(reg); + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, + flow->idx); qpriv->r_next_psn_kdeth = flow->flow_state.r_next_psn; goto nak_psn; @@ -5456,3 +5442,15 @@ bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e) } return false; } + +static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) +{ + u64 reg; + + /* + * The only sane way to get the amount of + * progress is to read the HW flow state. + */ + reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); + return mask_psn(reg); +} -- cgit v1.2.3-59-g8ed1b From b885d5be9ca10dff6110a8738c45eb4b3fb5a40a Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:58:50 -0700 Subject: IB/hfi1: Unify the software PSN check for TID RDMA READ/WRITE For expected packet receiving, the hfi1 hardware checks the KDETH PSN automatically. However, when sequence error occurs, the hfi1 driver can check the sequence instead until the hardware flow generation is reloaded. TID RDMA READ and WRITE protocols implement similar software checking mechanisms, but with different flags and different local variables to store next expected PSN. Unify the handling by using only one set of flag and local variable for both TID RDMA READ and WRITE protocols. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/tid_rdma.c | 45 +++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/tid_rdma.h | 2 -- drivers/infiniband/hw/hfi1/trace_tid.h | 12 +++------ 3 files changed, 31 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index d540e9cffa37..9ade7d3954b0 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -67,8 +67,6 @@ static u32 mask_generation(u32 a) #define TID_RDMA_DESTQP_FLOW_SHIFT 11 #define TID_RDMA_DESTQP_FLOW_MASK 0x1f -#define TID_FLOW_SW_PSN BIT(0) - #define TID_OPFN_QP_CTXT_MASK 0xff #define TID_OPFN_QP_CTXT_SHIFT 56 #define TID_OPFN_QP_KDETH_MASK 0xff @@ -777,7 +775,6 @@ int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp) rcd->flows[fs->index].generation = fs->generation; fs->generation = kern_setup_hw_flow(rcd, fs->index); fs->psn = 0; - fs->flags = 0; dequeue_tid_waiter(rcd, &rcd->flow_queue, qp); /* get head before dropping lock */ fqp = first_qp(rcd, &rcd->flow_queue); @@ -1808,6 +1805,7 @@ sync_check: goto done; hfi1_kern_clear_hw_flow(req->rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; req->state = TID_REQUEST_ACTIVE; } @@ -2476,8 +2474,13 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ - if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) + if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) + goto ack_done; + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); goto ack_done; + } + flow->flow_state.r_next_psn = mask_psn(kpsn + 1); req->ack_pending--; priv->pending_tid_r_segs--; qp->s_num_rd_atomic--; @@ -2519,6 +2522,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) req->comp_seg == req->cur_seg) || priv->tid_r_comp == priv->tid_r_reqs) { hfi1_kern_clear_hw_flow(priv->rcd, qp); + priv->s_flags &= ~HFI1_R_TID_SW_PSN; if (req->state == TID_REQUEST_SYNC) req->state = TID_REQUEST_ACTIVE; } @@ -2768,9 +2772,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return ret; } - if (priv->flow_state.flags & TID_FLOW_SW_PSN) { + if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, - priv->flow_state.r_next_psn); + flow->flow_state.r_next_psn); if (diff > 0) { if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) restart_tid_rdma_read_req(rcd, @@ -2806,14 +2810,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, qp->r_flags &= ~RVT_R_RDMAR_SEQ; } - priv->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); } else { u32 last_psn; last_psn = read_r_next_psn(dd, rcd->ctxt, flow->idx); - priv->flow_state.r_next_psn = last_psn; - priv->flow_state.flags |= TID_FLOW_SW_PSN; + flow->flow_state.r_next_psn = last_psn; + priv->s_flags |= HFI1_R_TID_SW_PSN; /* * If no request has been restarted yet, * restart the current one. @@ -2878,6 +2883,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, struct rvt_ack_entry *e; struct tid_rdma_request *req; struct tid_rdma_flow *flow; + int diff = 0; trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); @@ -2977,10 +2983,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, * mismatch could be due to packets that were * already in flight. */ - if (psn != flow->flow_state.r_next_psn) { - psn = flow->flow_state.r_next_psn; + diff = cmp_psn(psn, + flow->flow_state.r_next_psn); + if (diff > 0) goto nak_psn; - } + else if (diff < 0) + break; qpriv->s_nak_state = 0; /* @@ -2991,8 +2999,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (psn == full_flow_psn(flow, flow->flow_state.lpsn)) ret = false; + flow->flow_state.r_next_psn = + mask_psn(psn + 1); qpriv->r_next_psn_kdeth = - ++flow->flow_state.r_next_psn; + flow->flow_state.r_next_psn; } break; @@ -3497,8 +3507,10 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->r_tid_alloc == qpriv->r_tid_head) { /* If all data has been received, clear the flow */ if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS && - !qpriv->alloc_w_segs) + !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + } break; } @@ -3524,8 +3536,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->sync_pt && !qpriv->alloc_w_segs) { hfi1_kern_clear_hw_flow(rcd, qp); qpriv->sync_pt = false; - if (qpriv->s_flags & HFI1_R_TID_SW_PSN) - qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; + qpriv->s_flags &= ~HFI1_R_TID_SW_PSN; } /* Allocate flow if we don't have one */ @@ -4299,7 +4310,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; - flow->flow_state.r_next_psn++; + flow->flow_state.r_next_psn = mask_psn(psn + 1); goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 53ab24ef4f02..1c536185261e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -76,10 +76,8 @@ struct tid_rdma_qp_params { struct tid_flow_state { u32 generation; u32 psn; - u32 r_next_psn; /* next PSN to be received (in TID space) */ u8 index; u8 last_index; - u8 flags; }; enum tid_rdma_req_state { diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 548dfc45a407..4388b594ed1b 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "tid_r_comp %u pending_tid_r_segs %u " \ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \ "s_state 0x%x hw_flow_index %u generation 0x%x " \ - "fpsn 0x%x flow_flags 0x%x" + "fpsn 0x%x" #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \ @@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent); "pending_tid_w_segs %u sync_pt %s " \ "ps_nak_psn 0x%x ps_nak_state 0x%x " \ "prnr_nak_state 0x%x hw_flow_index %u generation "\ - "0x%x fpsn 0x%x flow_flags 0x%x resync %s" \ + "0x%x fpsn 0x%x resync %s" \ "r_next_psn_kdeth 0x%x" #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \ @@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) ), TP_fast_assign(/* assign */ struct hfi1_qp_priv *priv = qp->priv; @@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; ), TP_printk(/* print */ TID_READ_SENDER_PRN, @@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */ __entry->s_state, __entry->hw_flow_index, __entry->generation, - __entry->fpsn, - __entry->flow_flags + __entry->fpsn ) ); @@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __field(u32, hw_flow_index) __field(u32, generation) __field(u32, fpsn) - __field(u32, flow_flags) __field(bool, resync) __field(u32, r_next_psn_kdeth) ), @@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index = priv->flow_state.index; __entry->generation = priv->flow_state.generation; __entry->fpsn = priv->flow_state.psn; - __entry->flow_flags = priv->flow_state.flags; __entry->resync = priv->resync; __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth; ), @@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */ __entry->hw_flow_index, __entry->generation, __entry->fpsn, - __entry->flow_flags, __entry->resync ? "yes" : "no", __entry->r_next_psn_kdeth ) -- cgit v1.2.3-59-g8ed1b From 8da0f0f26f80612efadc23beb72d5b66a498a386 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 09:59:00 -0700 Subject: IB/hfi1: Remove WARN_ON when freeing expected receive groups When PSM user receive context is freed, the expected receive groups allocated by the receive context will also been freed. However, if there are still TID entries in use, the receive groups rcd->tid_full_list or rcd->tid_used_list will not be empty, and thus triggering the WARN_ONs in the function hfi1_free_ctxt_rcv_groups(). Even if the two lists may not be empty, the hfi1 driver will free all TID entries and receive groups associated with the receive context to prevent any resource leakage. Since a clean user application exit is not controlled by the hfi1 driver, this patch will remove the WARN_ONs in hfi1_free_ctxt_rcv_groups(). Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J. Ruhl Reviewed-by: Dennis Dalessandro Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/exp_rcv.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 1be49a0d9c11..e9d5cc8b771a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - kfree(rcd->groups); rcd->groups = NULL; hfi1_exp_tid_group_init(rcd); -- cgit v1.2.3-59-g8ed1b From 747b931fbe2362366dee30617f816501f3126882 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 18 Mar 2019 12:20:59 -0700 Subject: IB/hfi1: Implement CCA for TID RDMA protocol Currently, FECN handling is not implemented on TID RDMA expected receive packets and therefore CCA can't be turned on when TID RDMA is enabled. This patch adds the CCA support to TID RDMA protocol by: - modifying FECN RSM rule to include kernel receive contexts - For TID_RDMA READ RESP or TID RDMA ACK packet, a CNP will be sent out if the FECN bit is set. For other TID RDMA packets that generate at least one response packet, the BECN bit will be set in the first response packet - Copying expected packet data to destination buffer when FECN bit is set in the TID RDMA READ RESP or TID RDMA WRITE DATA packet. In this case, the expected packet is received as an eager packet - Handling the TID sequence error for subsequent normal expected packets. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Michael J. Ruhl Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 35 ++++--- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/tid_rdma.c | 173 ++++++++++++++++++++++++++++------ 3 files changed, 167 insertions(+), 45 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9784c6c0d2ec..16861d9ba1b7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13297,15 +13297,18 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * The RMT entries are currently allocated as shown below: * 1. QOS (0 to 128 entries); - * 2. FECN for PSM (num_user_contexts + num_vnic_contexts); + * 2. FECN (num_kernel_context - 1 + num_user_contexts + + * num_vnic_contexts); * 3. VNIC (num_vnic_contexts). - * It should be noted that PSM FECN oversubscribe num_vnic_contexts + * It should be noted that FECN oversubscribe num_vnic_contexts * entries of RMT because both VNIC and PSM could allocate any receive * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, * and PSM FECN must reserve an RMT entry for each possible PSM receive * context. */ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2); + if (HFI1_CAP_IS_KSET(TID_RDMA)) + rmt_count += num_kernel_contexts - 1; if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; dd_dev_err(dd, @@ -14288,37 +14291,43 @@ bail: init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1); } -static void init_user_fecn_handling(struct hfi1_devdata *dd, - struct rsm_map_table *rmt) +static void init_fecn_handling(struct hfi1_devdata *dd, + struct rsm_map_table *rmt) { struct rsm_rule_data rrd; u64 reg; - int i, idx, regoff, regidx; + int i, idx, regoff, regidx, start; u8 offset; u32 total_cnt; + if (HFI1_CAP_IS_KSET(TID_RDMA)) + /* Exclude context 0 */ + start = 1; + else + start = dd->first_dyn_alloc_ctxt; + + total_cnt = dd->num_rcv_contexts - start; + /* there needs to be enough room in the map table */ - total_cnt = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) { - dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n"); + dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n"); return; } /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). + * in the range start...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. * There are only enough bits in offset for the table size, so * start with that to allow for a "negative" offset. */ - offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_dyn_alloc_ctxt); + offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start); - for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; - i < dd->num_rcv_contexts; i++, idx++) { + for (i = start, idx = rmt->used; i < dd->num_rcv_contexts; + i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; regidx = idx / 8; @@ -14440,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd) rmt = alloc_rsm_map_table(dd); /* set up QOS, including the QPN map table */ init_qos(dd, rmt); - init_user_fecn_handling(dd, rmt); + init_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); /* record number of used rsm map entries for vnic */ dd->vnic.rmt_start = rmt->used; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a9d2912f5db..1f4fe0269b51 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -516,7 +516,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, */ do_cnp = prescan || (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && - opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(ACK); /* Call appropriate CNP handler */ if (!ignore_fecn && do_cnp && fecn) diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 9ade7d3954b0..eae6f05ca2fa 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -127,6 +127,14 @@ static int make_tid_rdma_ack(struct rvt_qp *qp, struct hfi1_pkt_state *ps); static void hfi1_do_tid_send(struct rvt_qp *qp); static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx); +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn); +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn); static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { @@ -2234,7 +2242,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) struct ib_reth *reth; struct hfi1_qp_priv *qpriv = qp->priv; u32 bth0, psn, len, rkey; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -2244,7 +2252,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_read_req(qp, psn); @@ -2263,9 +2271,8 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* We've verified the request, insert it into the ack queue. */ @@ -2317,11 +2324,11 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet) /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -2338,8 +2345,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -2456,12 +2461,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 opcode, aeth; - bool is_fecn; + bool fecn; unsigned long flags; u32 kpsn, ipsn; trace_hfi1_sender_rcv_tid_read_resp(qp); - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); kpsn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -2475,9 +2480,39 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet) flow = &req->flows[req->clear_tail]; /* When header suppression is disabled */ if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(kpsn, flow->flow_state.r_next_psn)) goto ack_done; flow->flow_state.r_next_psn = mask_psn(kpsn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto ack_op_err; + len = restart_sge(&ss, req->e.swqe, ipsn, pmtu); + if (unlikely(len < pmtu)) + goto ack_op_err; + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->s_flags |= HFI1_R_TID_SW_PSN; + } + goto ack_done; } flow->flow_state.r_next_psn = mask_psn(kpsn + 1); @@ -2544,8 +2579,6 @@ ack_op_err: ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) @@ -3678,7 +3711,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) struct hfi1_qp_priv *qpriv = qp->priv; struct tid_rdma_request *req; u32 bth0, psn, len, rkey, num_segs; - bool is_fecn; + bool fecn; u8 next; u64 vaddr; int diff; @@ -3687,7 +3720,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); trace_hfi1_rsp_rcv_tid_write_req(qp, psn); @@ -3704,9 +3737,8 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet) num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len); diff = delta_psn(psn, qp->r_psn); if (unlikely(diff)) { - if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff)) - return; - goto send_ack; + tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn); + return; } /* @@ -3822,11 +3854,11 @@ update_head: /* Schedule the send tasklet. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; nack_inv_unlock: @@ -3843,8 +3875,6 @@ nack_acc: rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; -send_ack: - hfi1_send_rc_ack(packet, is_fecn); } u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, @@ -4061,10 +4091,10 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) struct tid_rdma_flow *flow; enum ib_wc_status status; u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4204,7 +4234,6 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet) qpriv->s_tid_cur = i; } qp->s_flags &= ~HFI1_S_WAIT_TID_RESP; - hfi1_schedule_tid_send(qp); goto ack_done; @@ -4213,9 +4242,9 @@ ack_op_err: ack_err: rvt_error_qp(qp, status); ack_done: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - hfi1_send_rc_ack(packet, is_fecn); } bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe, @@ -4295,7 +4324,9 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) unsigned long flags; u32 psn, next; u8 opcode; + bool fecn; + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; @@ -4308,9 +4339,53 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) req = ack_to_tid_req(e); flow = &req->flows[req->clear_tail]; if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) { + update_r_next_psn_fecn(packet, priv, rcd, flow, fecn); + if (cmp_psn(psn, flow->flow_state.r_next_psn)) goto send_nak; + flow->flow_state.r_next_psn = mask_psn(psn + 1); + /* + * Copy the payload to destination buffer if this packet is + * delivered as an eager packet due to RSM rule and FECN. + * The RSM rule selects FECN bit in BTH and SH bit in + * KDETH header and therefore will not match the last + * packet of each segment that has SH bit cleared. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) { + struct rvt_sge_state ss; + u32 len; + u32 tlen = packet->tlen; + u16 hdrsize = packet->hlen; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + + (SIZE_OF_CRC << 2); + u32 pmtu = qp->pmtu; + + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) + goto send_nak; + len = req->comp_seg * req->seg_len; + len += delta_psn(psn, + full_flow_psn(flow, flow->flow_state.spsn)) * + pmtu; + if (unlikely(req->total_len - len < pmtu)) + goto send_nak; + + /* + * The e->rdma_sge field is set when TID RDMA WRITE REQ + * is first received and is never modified thereafter. + */ + ss.sge = e->rdma_sge; + ss.sg_list = NULL; + ss.num_sge = 1; + ss.total_len = req->total_len; + rvt_skip_sge(&ss, len, false); + rvt_copy_sge(qp, &ss, packet->payload, pmtu, false, + false); + /* Raise the sw sequence check flag for next packet */ + priv->r_next_psn_kdeth = mask_psn(psn + 1); + priv->s_flags |= HFI1_R_TID_SW_PSN; + } goto exit; } flow->flow_state.r_next_psn = mask_psn(psn + 1); @@ -4375,6 +4450,8 @@ done: hfi1_schedule_tid_send(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); return; @@ -4476,12 +4553,11 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct tid_rdma_request *req; struct tid_rdma_flow *flow; u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; - bool is_fecn; unsigned long flags; u16 fidx; trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0); - is_fecn = process_ecn(qp, packet); + process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth); req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn)); @@ -4835,10 +4911,10 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) struct tid_rdma_flow *flow; struct tid_flow_state *fs = &qpriv->flow_state; u32 psn, generation, idx, gen_next; - bool is_fecn; + bool fecn; unsigned long flags; - is_fecn = process_ecn(qp, packet); + fecn = process_ecn(qp, packet); psn = mask_psn(be32_to_cpu(ohdr->bth[2])); generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT; @@ -4929,6 +5005,8 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->s_flags |= RVT_S_ACK_PENDING; hfi1_schedule_tid_send(qp); bail: + if (fecn) + qp->s_flags |= RVT_S_ECN; spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -5465,3 +5543,36 @@ static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx) reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx)); return mask_psn(reg); } + +static void tid_rdma_rcv_err(struct hfi1_packet *packet, + struct ib_other_headers *ohdr, + struct rvt_qp *qp, u32 psn, int diff, bool fecn) +{ + unsigned long flags; + + tid_rdma_rcv_error(packet, ohdr, qp, psn, diff); + if (fecn) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags |= RVT_S_ECN; + spin_unlock_irqrestore(&qp->s_lock, flags); + } +} + +static void update_r_next_psn_fecn(struct hfi1_packet *packet, + struct hfi1_qp_priv *priv, + struct hfi1_ctxtdata *rcd, + struct tid_rdma_flow *flow, + bool fecn) +{ + /* + * If a start/middle packet is delivered here due to + * RSM rule and FECN, we need to update the r_next_psn. + */ + if (fecn && packet->etype == RHF_RCV_TYPE_EAGER && + !(priv->s_flags & HFI1_R_TID_SW_PSN)) { + struct hfi1_devdata *dd = rcd->dd; + + flow->flow_state.r_next_psn = + read_r_next_psn(dd, rcd->ctxt, flow->idx); + } +} -- cgit v1.2.3-59-g8ed1b From 07c5ba912401b2ae3f13e3ce214158aec723c3fd Mon Sep 17 00:00:00 2001 From: Josh Collier Date: Thu, 11 Apr 2019 07:07:42 -0700 Subject: IB/hfi1: Add debugfs to control expansion ROM write protect Some kernels now enable CONFIG_IO_STRICT_DEVMEM which prevents multiple handles to PCI resource0. In order to continue to support expansion ROM updates while the driver is loaded, the driver must now provide an interface to control the expansion ROM write protection. This patch adds an exprom_wp debugfs interface that allows the hfi1_eprom user tool to disable the expansion ROM write protection by opening the file and writing a '1'. The write protection is released when writing a '0' or automatically re-enabled when the file handle is closed. The current implementation will only allow one handle to be opened at a time across all hfi1 devices. Reviewed-by: Dennis Dalessandro Signed-off-by: Josh Collier Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/debugfs.c | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 057bb82c664f..15efb4a380b2 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1080,6 +1080,77 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp) return __qsfp_debugfs_release(in, fp, 1); } +#define EXPROM_WRITE_ENABLE BIT_ULL(14) + +static bool exprom_wp_disabled; + +static int exprom_wp_set(struct hfi1_devdata *dd, bool disable) +{ + u64 gpio_val = 0; + + if (disable) { + gpio_val = EXPROM_WRITE_ENABLE; + exprom_wp_disabled = true; + dd_dev_info(dd, "Disable Expansion ROM Write Protection\n"); + } else { + exprom_wp_disabled = false; + dd_dev_info(dd, "Enable Expansion ROM Write Protection\n"); + } + + write_csr(dd, ASIC_GPIO_OUT, gpio_val); + write_csr(dd, ASIC_GPIO_OE, gpio_val); + + return 0; +} + +static ssize_t exprom_wp_debugfs_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t exprom_wp_debugfs_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + char cdata; + + if (count != 1) + return -EINVAL; + if (get_user(cdata, buf)) + return -EFAULT; + if (cdata == '0') + exprom_wp_set(ppd->dd, false); + else if (cdata == '1') + exprom_wp_set(ppd->dd, true); + else + return -EINVAL; + + return 1; +} + +static unsigned long exprom_in_use; + +static int exprom_wp_debugfs_open(struct inode *in, struct file *fp) +{ + if (test_and_set_bit(0, &exprom_in_use)) + return -EBUSY; + + return 0; +} + +static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) +{ + struct hfi1_pportdata *ppd = private2ppd(fp); + + if (exprom_wp_disabled) + exprom_wp_set(ppd->dd, false); + clear_bit(0, &exprom_in_use); + + return 0; +} + #define DEBUGFS_OPS(nm, readroutine, writeroutine) \ { \ .name = nm, \ @@ -1119,6 +1190,9 @@ static const struct counter_info port_cntr_ops[] = { qsfp1_debugfs_open, qsfp1_debugfs_release), DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), + DEBUGFS_XOPS("exprom_wp", exprom_wp_debugfs_read, + exprom_wp_debugfs_write, exprom_wp_debugfs_open, + exprom_wp_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), -- cgit v1.2.3-59-g8ed1b From ea752bc5e50a03e337dfa5c8940d357c62300f8a Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 11 Apr 2019 07:15:49 -0700 Subject: IB/{rdmavt, hfi1): Miscellaneous comment fixes This patch fixes miscellaneous comment errors. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- include/rdma/rdmavt_qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 124a3ec1e15c..23ac6057b211 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -524,7 +524,7 @@ void _hfi1_do_send(struct work_struct *work) /** * hfi1_do_send - perform a send on a QP - * @work: contains a pointer to the QP + * @qp: a pointer to the QP * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0fbd4063fef..0ad89867b258 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -83,7 +83,6 @@ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available - * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available -- cgit v1.2.3-59-g8ed1b From 62644c1d2bb7c8f7ec259e45a123dca6bbaa0f7b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:16:00 -0700 Subject: IB/hfi1: Make opfn.h self sufficient The opfn.h include file build-ablility depends on the including file having the correct includes. Fix by making opfn.h self sufficient. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/opfn.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h index 5f2011cabc25..62f93c1dc082 100644 --- a/drivers/infiniband/hw/hfi1/opfn.h +++ b/drivers/infiniband/hw/hfi1/opfn.h @@ -47,12 +47,14 @@ * for future transactions */ +#include +#include +#include + /* STL Verbs Extended */ #define IB_BTHE_E_SHIFT 24 #define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX -struct ib_atomic_eth; - enum hfi1_opfn_codes { STL_VERBS_EXTD_NONE = 0, STL_VERBS_EXTD_TID_RDMA, -- cgit v1.2.3-59-g8ed1b From d40f69c9b9dff3e47d9647943db267b5792ae215 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 Apr 2019 06:41:42 -0700 Subject: IB/{rdmavt, qib, hfi1}: Use new routine to release reference counts The reference count adjustments on reference count completion are open coded throughout. Add a routine to do all reference count adjustments and use. Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 4 ++-- drivers/infiniband/hw/qib/qib_rc.c | 4 ++-- drivers/infiniband/sw/rdmavt/qp.c | 9 ++------- include/rdma/rdmavt_qp.h | 14 ++++++++++++++ 4 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5ba39a9f65ad..a922edcf23d6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1834,7 +1834,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], @@ -1882,7 +1882,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; trdma_clean_swqe(qp, wqe); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 50dd9811b088..2ac4c67f5ba1 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -933,7 +933,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, ib_qib_wc_opcode[wqe->wr.opcode], @@ -975,7 +975,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - rvt_put_swqe(wqe); + rvt_put_qp_swqe(qp, wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 2460303053eb..31a2e65e4906 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -623,10 +623,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) while (qp->s_last != qp->s_head) { struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - rvt_put_swqe(wqe); - if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); if (++qp->s_last >= qp->s_size) qp->s_last = 0; smp_wmb(); /* see qp_set_savail */ @@ -2683,9 +2680,7 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - rvt_put_swqe(wqe); - if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); + rvt_put_qp_swqe(qp, wqe); rvt_qp_swqe_complete(qp, wqe, diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index a00c46a4e779..68e38c20afc0 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -723,6 +723,20 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp) return rvt_mod_retry_timer_ext(qp, 0); } +/** + * rvt_put_qp_swqe - drop refs held by swqe + * @qp: the send qp + * @wqe: the send wqe + * + * This drops any references held by the swqe + */ +static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe) +{ + rvt_put_swqe(wqe); + if (qp->allowed_ops == IB_OPCODE_UD) + atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -- cgit v1.2.3-59-g8ed1b From a9c62e007878ba88b703369c1cd9e26682453665 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 11 Apr 2019 07:17:10 -0700 Subject: IB/hfi1: Add selected Rcv counters These counters are required for error analysis and debug. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 3 +++ drivers/infiniband/hw/hfi1/chip.h | 3 +++ drivers/infiniband/hw/hfi1/chip_registers.h | 3 +++ 3 files changed, 9 insertions(+) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 229d5d4cafe8..ec2df39c2f60 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4104,6 +4104,9 @@ def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), +[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), +[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, CNTR_NORMAL), [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6c27c1c6a868..4e6c3556ec48 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -858,6 +858,9 @@ static inline int idx_from_vl(int vl) /* Per device counter indexes */ enum { C_RCV_OVF = 0, + C_RX_LEN_ERR, + C_RX_ICRC_ERR, + C_RX_EBP, C_RX_TID_FULL, C_RX_TID_INVALID, C_RX_TID_FLGMS, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c0800ea5a3f8..ab3589d17aee 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -380,6 +380,9 @@ #define DC_LCB_PRF_TX_FLIT_CNT (DC_LCB_CSRS + 0x000000000418) #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) +#define RCV_LENGTH_ERR_CNT 0 +#define RCV_ICRC_ERR_CNT 6 +#define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 #define RCV_CONTEXT_EGR_STALL 22 #define RCV_DATA_PKT_CNT 0 -- cgit v1.2.3-59-g8ed1b From 3c176c9d72446217f6451543452692141eb665dc Mon Sep 17 00:00:00 2001 From: John Fleck Date: Thu, 11 Apr 2019 07:17:21 -0700 Subject: IB/hfi1: Remove reference to RHF.VCRCErr The bit VCRCErr in the receive header flag is actually a reserved field. Remove bit operations on this field. Reviewed-by: Michael J. Ruhl Reviewed-by: Dennis Dalessandro Signed-off-by: John Fleck Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/common.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 5 ++--- drivers/infiniband/hw/hfi1/tid_rdma.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7310a5dba420..d47da7b0438f 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -286,7 +286,7 @@ struct diag_pkt { #define RHF_TID_ERR (0x1ull << 59) #define RHF_LEN_ERR (0x1ull << 60) #define RHF_ECC_ERR (0x1ull << 61) -#define RHF_VCRC_ERR (0x1ull << 62) +#define RHF_RESERVED (0x1ull << 62) #define RHF_ICRC_ERR (0x1ull << 63) #define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 129e48ec9ee0..01aa1f132f55 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -262,7 +262,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, hfi1_dbg_fault_suppress_err(verbs_dev)) return; - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return; if (packet->etype == RHF_RCV_TYPE_BYPASS) { @@ -1581,7 +1581,7 @@ static void show_eflags_errs(struct hfi1_packet *packet) u32 rte = rhf_rcv_type_err(packet->rhf); dd_dev_err(rcd->dd, - "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n", + "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n", rcd->ctxt, packet->rhf, packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "", packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "", @@ -1589,7 +1589,6 @@ static void show_eflags_errs(struct hfi1_packet *packet) packet->rhf & RHF_TID_ERR ? "tid " : "", packet->rhf & RHF_LEN_ERR ? "len " : "", packet->rhf & RHF_ECC_ERR ? "ecc " : "", - packet->rhf & RHF_VCRC_ERR ? "vcrc " : "", packet->rhf & RHF_ICRC_ERR ? "icrc " : "", rte); } diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index eae6f05ca2fa..cf72bf39ff1e 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2920,7 +2920,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ", packet->rhf); - if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) + if (packet->rhf & RHF_ICRC_ERR) return ret; packet->ohdr = &hdr->u.oth; -- cgit v1.2.3-59-g8ed1b From 4c4b1996b5db688e2dcb8242b0a3bf7b1e845e42 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 18 Mar 2019 09:55:09 -0700 Subject: IB/hfi1: Fix WQ_MEM_RECLAIM warning The work_item cancels that occur when a QP is destroyed can elicit the following trace: workqueue: WQ_MEM_RECLAIM ipoib_wq:ipoib_cm_tx_reap [ib_ipoib] is flushing !WQ_MEM_RECLAIM hfi0_0:_hfi1_do_send [hfi1] WARNING: CPU: 7 PID: 1403 at kernel/workqueue.c:2486 check_flush_dependency+0xb1/0x100 Call Trace: __flush_work.isra.29+0x8c/0x1a0 ? __switch_to_asm+0x40/0x70 __cancel_work_timer+0x103/0x190 ? schedule+0x32/0x80 iowait_cancel_work+0x15/0x30 [hfi1] rvt_reset_qp+0x1f8/0x3e0 [rdmavt] rvt_destroy_qp+0x65/0x1f0 [rdmavt] ? _cond_resched+0x15/0x30 ib_destroy_qp+0xe9/0x230 [ib_core] ipoib_cm_tx_reap+0x21c/0x560 [ib_ipoib] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ? max_active_store+0x80/0x80 ? kthread_bind+0x10/0x10 ret_from_fork+0x35/0x40 Since QP destruction frees memory, hfi1_wq should have the WQ_MEM_RECLAIM. The hfi1_wq does not allocate memory with GFP_KERNEL or otherwise become entangled with memory reclaim, so this flag is appropriate. Fixes: 0a226edd203f ("staging/rdma/hfi1: Use parallel workqueue for SDMA engines") Reviewed-by: Michael J. Ruhl Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e4c2ae4f1cb3..71cb9525c074 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -805,7 +805,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | + WQ_MEM_RECLAIM, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) -- cgit v1.2.3-59-g8ed1b