diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
26 files changed, 470 insertions, 388 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 4044a8c8dbf4..0405d26d0833 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := \ affinity.o \ + aspm.o \ chip.o \ device.o \ driver.o \ diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 4fe662c3bbc1..c142b23bb401 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node) struct hfi1_affinity_node *entry; cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; const struct cpumask *node_mask, - *proc_mask = ¤t->cpus_allowed; + *proc_mask = current->cpus_ptr; struct hfi1_affinity_node_list *affinity = &node_affinity; struct cpu_mask_set *set = &affinity->proc; @@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node) * check whether process/context affinity has already * been set */ - if (cpumask_weight(proc_mask) == 1) { + if (current->nr_cpus_allowed == 1) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); @@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node) cpu = cpumask_first(proc_mask); cpumask_set_cpu(cpu, &set->used); goto done; - } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { + } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", current->pid, current->comm, cpumask_pr_args(proc_mask)); diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c new file mode 100644 index 000000000000..a3c53be4072c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/aspm.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2019 Intel Corporation. + * + */ + +#include "aspm.h" + +/* Time after which the timer interrupt will re-enable ASPM */ +#define ASPM_TIMER_MS 1000 +/* Time for which interrupts are ignored after a timer has been scheduled */ +#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2) +/* Two interrupts within this time trigger ASPM disable */ +#define ASPM_TRIGGER_MS 1 +#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull) +#define ASPM_L1_SUPPORTED(reg) \ + ((((reg) & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2) + +uint aspm_mode = ASPM_MODE_DISABLED; +module_param_named(aspm, aspm_mode, uint, 0444); +MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); + +static bool aspm_hw_l1_supported(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + u32 up, dn; + + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return false; + + pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); + dn = ASPM_L1_SUPPORTED(dn); + + pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up); + up = ASPM_L1_SUPPORTED(up); + + /* ASPM works on A-step but is reported as not supported */ + return (!!dn || is_ax(dd)) && !!up; +} + +/* Set L1 entrance latency for slower entry to L1 */ +static void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd) +{ + u32 l1_ent_lat = 0x4u; + u32 reg32; + + pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, ®32); + reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK; + reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT; + pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32); +} + +static void aspm_hw_enable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* + * If the driver does not have access to the upstream component, + * it cannot support ASPM L1 at all. + */ + if (!parent) + return; + + /* Enable ASPM L1 first in upstream component and then downstream */ + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, + PCI_EXP_LNKCTL_ASPM_L1); +} + +void aspm_hw_disable_l1(struct hfi1_devdata *dd) +{ + struct pci_dev *parent = dd->pcidev->bus->self; + + /* Disable ASPM L1 first in downstream component and then upstream */ + pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); + if (parent) + pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, 0x0); +} + +static void aspm_enable(struct hfi1_devdata *dd) +{ + if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED || + !dd->aspm_supported) + return; + + aspm_hw_enable_l1(dd); + dd->aspm_enabled = true; +} + +static void aspm_disable(struct hfi1_devdata *dd) +{ + if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED) + return; + + aspm_hw_disable_l1(dd); + dd->aspm_enabled = false; +} + +static void aspm_disable_inc(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + aspm_disable(dd); + atomic_inc(&dd->aspm_disabled_cnt); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +static void aspm_enable_dec(struct hfi1_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->aspm_lock, flags); + if (atomic_dec_and_test(&dd->aspm_disabled_cnt)) + aspm_enable(dd); + spin_unlock_irqrestore(&dd->aspm_lock, flags); +} + +/* ASPM processing for each receive context interrupt */ +void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd) +{ + bool restart_timer; + bool close_interrupts; + unsigned long flags; + ktime_t now, prev; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + /* PSM contexts are open */ + if (!rcd->aspm_intr_enable) + goto unlock; + + prev = rcd->aspm_ts_last_intr; + now = ktime_get(); + rcd->aspm_ts_last_intr = now; + + /* An interrupt pair close together in time */ + close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS; + + /* Don't push out our timer till this much time has elapsed */ + restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) > + ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC; + restart_timer = restart_timer && close_interrupts; + + /* Disable ASPM and schedule timer */ + if (rcd->aspm_enabled && close_interrupts) { + aspm_disable_inc(rcd->dd); + rcd->aspm_enabled = false; + restart_timer = true; + } + + if (restart_timer) { + mod_timer(&rcd->aspm_timer, + jiffies + msecs_to_jiffies(ASPM_TIMER_MS)); + rcd->aspm_ts_timer_sched = now; + } +unlock: + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* Timer function for re-enabling ASPM in the absence of interrupt activity */ +static void aspm_ctx_timer_function(struct timer_list *t) +{ + struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); + unsigned long flags; + + spin_lock_irqsave(&rcd->aspm_lock, flags); + aspm_enable_dec(rcd->dd); + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); +} + +/* + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts + * are open. + */ +void aspm_disable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + u16 i; + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + del_timer_sync(&rcd->aspm_timer); + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = false; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } + } + + aspm_disable(dd); + atomic_set(&dd->aspm_disabled_cnt, 0); +} + +/* Re-enable interrupt processing for verbs contexts */ +void aspm_enable_all(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + unsigned long flags; + u16 i; + + aspm_enable(dd); + + if (aspm_mode != ASPM_MODE_DYNAMIC) + return; + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = true; + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } + } +} + +static void aspm_ctx_init(struct hfi1_ctxtdata *rcd) +{ + spin_lock_init(&rcd->aspm_lock); + timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); + rcd->aspm_intr_supported = rcd->dd->aspm_supported && + aspm_mode == ASPM_MODE_DYNAMIC && + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; +} + +void aspm_init(struct hfi1_devdata *dd) +{ + struct hfi1_ctxtdata *rcd; + u16 i; + + spin_lock_init(&dd->aspm_lock); + dd->aspm_supported = aspm_hw_l1_supported(dd); + + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + aspm_ctx_init(rcd); + hfi1_rcd_put(rcd); + } + + /* Start with ASPM disabled */ + aspm_hw_set_l1_ent_latency(dd); + dd->aspm_enabled = false; + aspm_hw_disable_l1(dd); + + /* Now turn on ASPM if configured */ + aspm_enable_all(dd); +} + +void aspm_exit(struct hfi1_devdata *dd) +{ + aspm_disable_all(dd); + + /* Turn on ASPM on exit to conserve power */ + aspm_enable(dd); +} + diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index e8133870ee87..75d5d18da3da 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -57,266 +57,20 @@ enum aspm_mode { ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */ }; -/* Time after which the timer interrupt will re-enable ASPM */ -#define ASPM_TIMER_MS 1000 -/* Time for which interrupts are ignored after a timer has been scheduled */ -#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2) -/* Two interrupts within this time trigger ASPM disable */ -#define ASPM_TRIGGER_MS 1 -#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull) -#define ASPM_L1_SUPPORTED(reg) \ - (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2) +void aspm_init(struct hfi1_devdata *dd); +void aspm_exit(struct hfi1_devdata *dd); +void aspm_hw_disable_l1(struct hfi1_devdata *dd); +void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd); +void aspm_disable_all(struct hfi1_devdata *dd); +void aspm_enable_all(struct hfi1_devdata *dd); -static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - u32 up, dn; - - /* - * If the driver does not have access to the upstream component, - * it cannot support ASPM L1 at all. - */ - if (!parent) - return false; - - pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn); - dn = ASPM_L1_SUPPORTED(dn); - - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up); - up = ASPM_L1_SUPPORTED(up); - - /* ASPM works on A-step but is reported as not supported */ - return (!!dn || is_ax(dd)) && !!up; -} - -/* Set L1 entrance latency for slower entry to L1 */ -static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd) -{ - u32 l1_ent_lat = 0x4u; - u32 reg32; - - pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, ®32); - reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK; - reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT; - pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32); -} - -static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - - /* - * If the driver does not have access to the upstream component, - * it cannot support ASPM L1 at all. - */ - if (!parent) - return; - - /* Enable ASPM L1 first in upstream component and then downstream */ - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, - PCI_EXP_LNKCTL_ASPM_L1); - pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, - PCI_EXP_LNKCTL_ASPM_L1); -} - -static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd) -{ - struct pci_dev *parent = dd->pcidev->bus->self; - - /* Disable ASPM L1 first in downstream component and then upstream */ - pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, 0x0); - if (parent) - pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPMC, 0x0); -} - -static inline void aspm_enable(struct hfi1_devdata *dd) -{ - if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED || - !dd->aspm_supported) - return; - - aspm_hw_enable_l1(dd); - dd->aspm_enabled = true; -} - -static inline void aspm_disable(struct hfi1_devdata *dd) -{ - if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED) - return; - - aspm_hw_disable_l1(dd); - dd->aspm_enabled = false; -} - -static inline void aspm_disable_inc(struct hfi1_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->aspm_lock, flags); - aspm_disable(dd); - atomic_inc(&dd->aspm_disabled_cnt); - spin_unlock_irqrestore(&dd->aspm_lock, flags); -} - -static inline void aspm_enable_dec(struct hfi1_devdata *dd) -{ - unsigned long flags; - - spin_lock_irqsave(&dd->aspm_lock, flags); - if (atomic_dec_and_test(&dd->aspm_disabled_cnt)) - aspm_enable(dd); - spin_unlock_irqrestore(&dd->aspm_lock, flags); -} - -/* ASPM processing for each receive context interrupt */ static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd) { - bool restart_timer; - bool close_interrupts; - unsigned long flags; - ktime_t now, prev; - /* Quickest exit for minimum impact */ - if (!rcd->aspm_intr_supported) - return; - - spin_lock_irqsave(&rcd->aspm_lock, flags); - /* PSM contexts are open */ - if (!rcd->aspm_intr_enable) - goto unlock; - - prev = rcd->aspm_ts_last_intr; - now = ktime_get(); - rcd->aspm_ts_last_intr = now; - - /* An interrupt pair close together in time */ - close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS; - - /* Don't push out our timer till this much time has elapsed */ - restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) > - ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC; - restart_timer = restart_timer && close_interrupts; - - /* Disable ASPM and schedule timer */ - if (rcd->aspm_enabled && close_interrupts) { - aspm_disable_inc(rcd->dd); - rcd->aspm_enabled = false; - restart_timer = true; - } - - if (restart_timer) { - mod_timer(&rcd->aspm_timer, - jiffies + msecs_to_jiffies(ASPM_TIMER_MS)); - rcd->aspm_ts_timer_sched = now; - } -unlock: - spin_unlock_irqrestore(&rcd->aspm_lock, flags); -} - -/* Timer function for re-enabling ASPM in the absence of interrupt activity */ -static inline void aspm_ctx_timer_function(struct timer_list *t) -{ - struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); - unsigned long flags; - - spin_lock_irqsave(&rcd->aspm_lock, flags); - aspm_enable_dec(rcd->dd); - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); -} - -/* - * Disable interrupt processing for verbs contexts when PSM or VNIC contexts - * are open. - */ -static inline void aspm_disable_all(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - unsigned long flags; - u16 i; - - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) { - del_timer_sync(&rcd->aspm_timer); - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = false; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); - hfi1_rcd_put(rcd); - } - } - - aspm_disable(dd); - atomic_set(&dd->aspm_disabled_cnt, 0); -} - -/* Re-enable interrupt processing for verbs contexts */ -static inline void aspm_enable_all(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - unsigned long flags; - u16 i; - - aspm_enable(dd); - - if (aspm_mode != ASPM_MODE_DYNAMIC) + if (likely(!rcd->aspm_intr_supported)) return; - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) { - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = true; - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); - hfi1_rcd_put(rcd); - } - } -} - -static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) -{ - spin_lock_init(&rcd->aspm_lock); - timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); - rcd->aspm_intr_supported = rcd->dd->aspm_supported && - aspm_mode == ASPM_MODE_DYNAMIC && - rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; -} - -static inline void aspm_init(struct hfi1_devdata *dd) -{ - struct hfi1_ctxtdata *rcd; - u16 i; - - spin_lock_init(&dd->aspm_lock); - dd->aspm_supported = aspm_hw_l1_supported(dd); - - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = hfi1_rcd_get_by_index(dd, i); - if (rcd) - aspm_ctx_init(rcd); - hfi1_rcd_put(rcd); - } - - /* Start with ASPM disabled */ - aspm_hw_set_l1_ent_latency(dd); - dd->aspm_enabled = false; - aspm_hw_disable_l1(dd); - - /* Now turn on ASPM if configured */ - aspm_enable_all(dd); -} - -static inline void aspm_exit(struct hfi1_devdata *dd) -{ - aspm_disable_all(dd); - - /* Turn on ASPM on exit to conserve power */ - aspm_enable(dd); + __aspm_ctx_disable(rcd); } #endif /* _ASPM_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4221a99ee7f4..d5b643a1d9fd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14032,6 +14032,19 @@ static void init_kdeth_qp(struct hfi1_devdata *dd) } /** + * hfi1_get_qp_map + * @dd: device data + * @idx: index to read + */ +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) +{ + u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8); + + reg >>= (idx % 8) * 8; + return reg; +} + +/** * init_qpmap_table * @dd - device data * @first_ctxt - first context diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 4e6c3556ec48..b76cf81f927f 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1445,6 +1445,7 @@ void clear_all_interrupts(struct hfi1_devdata *dd); void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr); void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr); void reset_interrupts(struct hfi1_devdata *dd); +u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 15efb4a380b2..d268bf9c42ee 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -987,9 +987,6 @@ static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target) struct hfi1_pportdata *ppd; int ret; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - ppd = private2ppd(fp); ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0); @@ -1155,6 +1152,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) { \ .name = nm, \ .ops = { \ + .owner = THIS_MODULE, \ .read = readroutine, \ .write = writeroutine, \ .llseek = generic_file_llseek, \ @@ -1165,6 +1163,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp) { \ .name = nm, \ .ops = { \ + .owner = THIS_MODULE, \ .read = readf, \ .write = writef, \ .llseek = generic_file_llseek, \ diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 3fd3315d0fb0..93613e5def9b 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -153,6 +153,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, char *dash; unsigned long range_start, range_end, i; bool remove = false; + unsigned long bound = 1U << BITS_PER_BYTE; end = strchr(ptr, ','); if (end) @@ -178,6 +179,10 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, BITS_PER_BYTE); break; } + /* Check the inputs */ + if (range_start >= bound || range_end >= bound) + break; + for (i = range_start; i <= range_end; i++) { if (remove) clear_bit(i, fault->opcodes); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b458c218842b..fa45350a9a1d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -539,6 +539,37 @@ static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt, mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK); } +/** + * hfi1_get_rc_ohdr - get extended header + * @opah - the opaheader + */ +static inline struct ib_other_headers * +hfi1_get_rc_ohdr(struct hfi1_opa_header *opah) +{ + struct ib_other_headers *ohdr; + struct ib_header *hdr = NULL; + struct hfi1_16b_header *hdr_16b = NULL; + + /* Find out where the BTH is */ + if (opah->hdr_type == HFI1_PKT_TYPE_9B) { + hdr = &opah->ibh; + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + } else { + u8 l4; + + hdr_16b = &opah->opah; + l4 = hfi1_16B_get_l4(hdr_16b); + if (l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr_16b->u.oth; + else + ohdr = &hdr_16b->u.l.oth; + } + return ohdr; +} + struct rvt_sge_state; /* diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4228393e6c4c..184dba3c2828 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2744,8 +2744,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, u16 link_width; u16 link_speed; - response_data_size = sizeof(struct opa_port_status_rsp) + - num_vls * sizeof(struct _vls_pctrs); + response_data_size = struct_size(rsp, vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE; return reply((struct ib_mad_hdr *)pmp); @@ -3014,8 +3013,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, } /* Sanity check */ - response_data_size = sizeof(struct opa_port_data_counters_msg) + - num_vls * sizeof(struct _vls_dctrs); + response_data_size = struct_size(req, port[0].vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3232,8 +3230,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - response_data_size = sizeof(struct opa_port_error_counters64_msg) + - num_vls * sizeof(struct _vls_ectrs); + response_data_size = struct_size(req, port[0].vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index c96d193bb236..61aa5504d7c3 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -450,10 +450,6 @@ static int hfi1_pcie_caps; module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -uint aspm_mode = ASPM_MODE_DISABLED; -module_param_named(aspm, aspm_mode, uint, 0444); -MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); - /** * tune_pcie_caps() - Code to adjust PCIe capabilities. * @dd: Valid device data structure diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 16ba9d52e1b9..79126b2b14ab 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -952,6 +952,22 @@ void sc_disable(struct send_context *sc) } } spin_unlock(&sc->release_lock); + + write_seqlock(&sc->waitlock); + while (!list_empty(&sc->piowait)) { + struct iowait *wait; + struct rvt_qp *qp; + struct hfi1_qp_priv *priv; + + wait = list_first_entry(&sc->piowait, struct iowait, list); + qp = iowait_to_qp(wait); + priv = qp->priv; + list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; + hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); + } + write_sequnlock(&sc->waitlock); + spin_unlock_irq(&sc->alloc_lock); } @@ -1427,7 +1443,8 @@ void sc_stop(struct send_context *sc, int flag) * @cb: optional callback to call when the buffer is finished sending * @arg: argument for cb * - * Return a pointer to a PIO buffer if successful, NULL if not enough room. + * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM + * when link is down. */ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, pio_release_cb cb, void *arg) @@ -1443,7 +1460,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, spin_lock_irqsave(&sc->alloc_lock, flags); if (!(sc->flags & SCF_ENABLED)) { spin_unlock_irqrestore(&sc->alloc_lock, flags); - goto done; + return ERR_PTR(-ECOMM); } retry: @@ -1577,9 +1594,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) else sc_del_credit_return_intr(sc); trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); - if (needint) { + if (needint) sc_return_credits(sc); - } } /** diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4e0e9fc0a777..f8e733aa3bb8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -348,7 +348,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) break; case IB_QPT_GSI: case IB_QPT_UD: - ah = ibah_to_rvtah(wqe->ud_wr.ah); + ah = rvt_get_swqe_ah(wqe); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) @@ -702,8 +702,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) sde ? sde->this_idx : 0, send_context, send_context ? send_context->sw_index : 0, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + ib_cq_head(qp->ibqp.send_cq), + ib_cq_tail(qp->ibqp.send_cq), qp->pid, qp->s_state, qp->s_ack_state, diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index a922edcf23d6..0477c14633ab 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1432,7 +1432,7 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn) pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, sc_to_vlt(ppd->dd, sc5), plen); pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL); - if (!pbuf) { + if (IS_ERR_OR_NULL(pbuf)) { /* * We have no room to send at the moment. Pass * responsibility for sending the ACK to the send engine @@ -1701,6 +1701,36 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) } } +/** + * hfi1_rc_verbs_aborted - handle abort status + * @qp: the QP + * @opah: the opa header + * + * This code modifies both ACK bit in BTH[2] + * and the s_flags to go into send one mode. + * + * This serves to throttle the send engine to only + * send a single packet in the likely case the + * a link has gone down. + */ +void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah) +{ + struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah); + u8 opcode = ib_bth_get_opcode(ohdr); + u32 psn; + + /* ignore responses */ + if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) || + opcode == TID_OP(READ_RESP) || + opcode == TID_OP(WRITE_RESP)) + return; + + psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK; + ohdr->bth[2] = cpu_to_be32(psn); + qp->s_flags |= RVT_S_SEND_ONE; +} + /* * This should be called with the QP s_lock held and interrupts disabled. */ @@ -1709,8 +1739,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) struct ib_other_headers *ohdr; struct hfi1_qp_priv *priv = qp->priv; struct rvt_swqe *wqe; - struct ib_header *hdr = NULL; - struct hfi1_16b_header *hdr_16b = NULL; u32 opcode, head, tail; u32 psn; struct tid_rdma_request *req; @@ -1719,24 +1747,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK)) return; - /* Find out where the BTH is */ - if (priv->hdr_type == HFI1_PKT_TYPE_9B) { - hdr = &opah->ibh; - if (ib_get_lnh(hdr) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - } else { - u8 l4; - - hdr_16b = &opah->opah; - l4 = hfi1_16B_get_l4(hdr_16b); - if (l4 == OPA_16B_L4_IB_LOCAL) - ohdr = &hdr_16b->u.oth; - else - ohdr = &hdr_16b->u.l.oth; - } - + ohdr = hfi1_get_rc_ohdr(opah); opcode = ib_bth_get_opcode(ohdr); if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) || @@ -1819,23 +1830,14 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) } while (qp->s_last != qp->s_acked) { - u32 s_last; - wqe = rvt_get_swqe_ptr(qp, qp->s_last); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; trdma_clean_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); - s_last = qp->s_last; - trace_hfi1_qp_send_completion(qp, wqe, s_last); - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - rvt_qp_swqe_complete(qp, + trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); + rvt_qp_complete_swqe(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); @@ -1879,19 +1881,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, trace_hfi1_rc_completion(qp, wqe->lpsn); if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { - u32 s_last; - trdma_clean_swqe(qp, wqe); - rvt_put_qp_swqe(qp, wqe); rvt_qp_wqe_unreserve(qp, wqe); - s_last = qp->s_last; - trace_hfi1_qp_send_completion(qp, wqe, s_last); - if (++s_last >= qp->s_size) - s_last = 0; - qp->s_last = s_last; - /* see post_send() */ - barrier(); - rvt_qp_swqe_complete(qp, + trace_hfi1_qp_send_completion(qp, wqe, qp->s_last); + rvt_qp_complete_swqe(qp, wqe, ib_hfi1_wc_opcode[wqe->wr.opcode], IB_WC_SUCCESS); @@ -3015,8 +3008,7 @@ send_last: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_ONLY): diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..2395fd4233a7 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -405,19 +405,33 @@ static void sdma_flush(struct sdma_engine *sde) struct sdma_txreq *txp, *txp_next; LIST_HEAD(flushlist); unsigned long flags; + uint seq; /* flush from head to tail */ sdma_flush_descq(sde); spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */ - list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) { - list_del_init(&txp->list); - list_add_tail(&txp->list, &flushlist); - } + list_splice_init(&sde->flushlist, &flushlist); spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); + /* wakeup QPs orphaned on the dmawait list */ + do { + struct iowait *w, *nw; + + seq = read_seqbegin(&sde->waitlock); + if (!list_empty(&sde->dmawait)) { + write_seqlock(&sde->waitlock); + list_for_each_entry_safe(w, nw, &sde->dmawait, list) { + if (w->wakeup) { + w->wakeup(w, SDMA_AVAIL_REASON); + list_del_init(&w->list); + } + } + write_sequnlock(&sde->waitlock); + } + } while (read_seqretry(&sde->waitlock, seq)); } /* @@ -855,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, { struct sdma_rht_node *rht_node; struct sdma_engine *sde = NULL; - const struct cpumask *current_mask = ¤t->cpus_allowed; unsigned long cpu_id; /* * To ensure that always the same sdma engine(s) will be * selected make sure the process is pinned to this CPU only. */ - if (cpumask_weight(current_mask) != 1) + if (current->nr_cpus_allowed != 1) goto out; cpu_id = smp_processor_id(); @@ -2413,7 +2426,7 @@ unlock_noconn: list_add_tail(&tx->list, &sde->flushlist); spin_unlock(&sde->flushlist_lock); iowait_inc_wait_count(wait, tx->num_desc); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto unlock; nodesc: @@ -2511,7 +2524,7 @@ unlock_noconn: iowait_inc_wait_count(wait, tx->num_desc); } spin_unlock(&sde->flushlist_lock); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto update_tail; nodesc: diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 6fb93032fbef..92acccaaaa86 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -312,9 +312,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, if (qp->ibqp.qp_num == 0) ctxt = 0; else - ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % - (dd->n_krcv_queues - 1)) + 1; - + ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift); return dd->rcd[ctxt]; } @@ -477,7 +475,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, * Must hold the qp s_lock and the exp_lock. * * Return: - * false if either of the conditions below are statisfied: + * false if either of the conditions below are satisfied: * 1. The list is empty or * 2. The indicated qp is at the head of the list and the * HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags. @@ -2026,7 +2024,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req); if (e->opcode == TID_OP(READ_REQ)) { struct ib_reth *reth; - u32 offset; u32 len; u32 rkey; u64 vaddr; @@ -2038,7 +2035,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet, * The requester always restarts from the start of the original * request. */ - offset = delta_psn(psn, e->psn) * qp->pmtu; len = be32_to_cpu(reth->length); if (psn != e->psn || len != req->total_len) goto unlock; @@ -4552,7 +4548,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4756,7 +4752,6 @@ done: IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ flow = &req->flows[req->acked_tail]; - fspsn = full_flow_psn(flow, flow->flow_state.spsn); trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index d1372cc66de6..2f84290a88ca 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -79,6 +79,8 @@ __print_symbolic(opcode, \ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ ib_opcode_name(RC_COMPARE_SWAP), \ ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ + ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ ib_opcode_name(TID_RDMA_WRITE_REQ), \ ib_opcode_name(TID_RDMA_WRITE_RESP), \ ib_opcode_name(TID_RDMA_WRITE_DATA), \ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 4ed4fcfabd6c..0c77f18120ed 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -476,8 +476,7 @@ last_imm: wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f88ad425664a..e804af71b629 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -87,7 +87,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) rcu_read_lock(); qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, - swqe->ud_wr.remote_qpn); + rvt_get_swqe_remote_qpn(swqe)); if (!qp) { ibp->rvp.n_pkt_drops++; rcu_read_unlock(); @@ -105,7 +105,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto drop; } - ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(swqe); ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -135,8 +135,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->ud_wr.remote_qkey < 0 ? - sqp->qkey : swqe->ud_wr.remote_qkey; + qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ? + sqp->qkey : rvt_get_swqe_remote_qkey(swqe); if (unlikely(qkey != qp->qkey)) goto drop; /* silently drop per IBTA spec */ } @@ -240,7 +240,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { if (sqp->ibqp.qp_type == IB_QPT_GSI || sqp->ibqp.qp_type == IB_QPT_SMI) - wc.pkey_index = swqe->ud_wr.pkey_index; + wc.pkey_index = rvt_get_swqe_pkey_index(swqe); else wc.pkey_index = sqp->s_pkey_index; } else { @@ -255,8 +255,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); ibp->rvp.n_loop_pkts++; bail_unlock: spin_unlock_irqrestore(&qp->r_lock, flags); @@ -283,20 +282,21 @@ static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe, bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); else *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); if (!bypass) bth0 |= *pkey; ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); + ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe)); ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? - qp->qkey : wqe->ud_wr.remote_qkey); + ohdr->u.ud.deth[0] = + cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey : + rvt_get_swqe_remote_qkey(wqe)); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); } @@ -316,7 +316,7 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); extra_bytes = -wqe->length & 3; nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; @@ -380,7 +380,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; u32 dlid, slid, nwords, extra_bytes; - u32 dest_qp = wqe->ud_wr.remote_qpn; + u32 dest_qp = rvt_get_swqe_remote_qpn(wqe); u32 src_qp = qp->ibqp.qp_num; u16 len, pkey; u8 l4, sc5; @@ -388,7 +388,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); /* * Build 16B Management Packet if either the destination @@ -450,7 +450,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (is_mgmt) { l4 = OPA_16B_L4_FM; - pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe)); hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, dest_qp, src_qp); } else { @@ -515,7 +515,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + ah_attr = rvt_get_swqe_ah_attr(wqe); priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr); if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) || (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) { @@ -683,7 +683,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) { + if (!IS_ERR_OR_NULL(pbuf)) { trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); @@ -738,7 +738,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) { + if (!IS_ERR_OR_NULL(pbuf)) { trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); @@ -1061,7 +1061,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited); + rvt_recv_cq(qp, &wc, solicited); return; drop: diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 02eee8eff1db..b89a9b9aef7a 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -118,13 +118,10 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty) { - size_t i; - - for (i = 0; i < npages; i++) { - if (dirty) - set_page_dirty_lock(p[i]); - put_page(p[i]); - } + if (dirty) + put_user_pages_dirty_lock(p, npages); + else + put_user_pages(p, npages); if (mm) { /* during close after signal, mm can be NULL */ atomic64_sub(npages, &mm->pinned_vm); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8bfbc6d7ea34..fd754a16475a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,20 +130,16 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct user_sdma_txreq *tx = - container_of(txreq, struct user_sdma_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) { - if (tx->busycount++ < MAX_DEFER_RETRY_COUNT) - goto eagain; - } + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) + goto eagain; /* * We are assuming that if the list is enqueued somewhere, it * is to the dmawait list since that is the only place where * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) { iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); @@ -151,6 +147,7 @@ static int defer_packet_queue( write_sequnlock(&sde->waitlock); return -EBUSY; eagain: + write_sequnlock(&sde->waitlock); return -EAGAIN; } @@ -804,7 +801,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) tx->flags = 0; tx->req = req; - tx->busycount = 0; INIT_LIST_HEAD(&tx->list); /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 14dfd757dafd..4d8510b0fc38 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -245,7 +245,6 @@ struct user_sdma_txreq { struct list_head list; struct user_sdma_request *req; u16 flags; - unsigned int busycount; u16 seqnum; }; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a2b26a635baf..c4b243f50c76 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -638,6 +638,8 @@ static void verbs_sdma_complete( struct hfi1_opa_header *hdr; hdr = &tx->phdr.hdr; + if (unlikely(status == SDMA_TXREQ_S_ABORTED)) + hfi1_rc_verbs_aborted(qp, hdr); hfi1_rc_send_complete(qp, hdr); } spin_unlock(&qp->s_lock); @@ -1037,10 +1039,10 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(!pbuf)) { + if (unlikely(IS_ERR_OR_NULL(pbuf))) { if (cb) verbs_pio_complete(qp, 0); - if (ppd->host_link_state != HLS_UP_ACTIVE) { + if (IS_ERR(pbuf)) { /* * If we have filled the PIO buffers to capacity and are * not in an active state this request is not going to @@ -1095,15 +1097,15 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); rvt_send_complete(qp, qp->s_wqe, wc_status); - spin_unlock_irqrestore(&qp->s_lock, flags); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(wc_status == IB_WC_GENERAL_ERR)) + hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr); hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); } + spin_unlock_irqrestore(&qp->s_lock, flags); ret = 0; @@ -1777,6 +1779,9 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static const struct ib_device_ops hfi1_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_HFI1, + .alloc_hw_stats = alloc_hw_stats, .alloc_rdma_netdev = hfi1_vnic_alloc_rn, .get_dev_fw_str = hfi1_get_dev_fw_str, @@ -1827,7 +1832,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) */ if (!ib_hfi1_sys_image_guid) ib_hfi1_sys_image_guid = ibdev->node_guid; - ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; @@ -1921,7 +1925,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &ib_hfi1_attr_group); - ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_verbs_txreq; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 7ecb8ed4a1d9..ae9582ddbc8f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -416,6 +416,7 @@ void hfi1_rc_hdrerr( u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); +void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index c4ab2d5b4502..8f766dd3f61c 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -100,7 +100,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (tx) goto out; priv = qp->priv; diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index b002e96eb335..bfa6e081cb56 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -72,6 +72,7 @@ struct hfi1_ibdev; struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp); +#define VERBS_TXREQ_GFP (GFP_ATOMIC | __GFP_NOWARN) static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) __must_hold(&qp->slock) @@ -79,7 +80,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct verbs_txreq *tx; struct hfi1_qp_priv *priv = qp->priv; - tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); + tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP); if (unlikely(!tx)) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); |