aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c220
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h3
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c139
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h9
-rw-r--r--drivers/infiniband/hw/hfi1/common.h8
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c184
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c42
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c185
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c63
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h46
-rw-r--r--drivers/infiniband/hw/hfi1/init.c50
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c33
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c258
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c36
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c34
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h3
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c146
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c377
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c103
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c31
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h13
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h14
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c15
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c45
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h93
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c2
35 files changed, 1540 insertions, 818 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 79575ee873f2..a26a9a0bfc41 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,7 +47,7 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
-#include <linux/cpumask.h>
+#include <linux/interrupt.h>
#include "hfi.h"
#include "affinity.h"
@@ -56,7 +56,7 @@
struct hfi1_affinity_node_list node_affinity = {
.list = LIST_HEAD_INIT(node_affinity.list),
- .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+ .lock = __MUTEX_INITIALIZER(node_affinity.lock)
};
/* Name of IRQ types, indexed by enum irq_type */
@@ -160,14 +160,14 @@ void node_affinity_destroy(void)
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
list_for_each_safe(pos, q, &node_affinity.list) {
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
kfree(entry);
}
- spin_unlock(&node_affinity.lock);
+ mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
}
@@ -234,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
/*
* If this is the first time this NUMA node's affinity is used,
@@ -247,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
+ mutex_unlock(&node_affinity.lock);
return -ENOMEM;
}
init_cpu_mask_set(&entry->def_intr);
@@ -303,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- spin_lock(&node_affinity.lock);
node_affinity_add_tail(entry);
- spin_unlock(&node_affinity.lock);
}
-
+ mutex_unlock(&node_affinity.lock);
return 0;
}
-int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+/*
+ * Function updates the irq affinity hint for msix after it has been changed
+ * by the user using the /proc/irq interface. This function only accepts
+ * one cpu in the mask.
+ */
+static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
+{
+ struct sdma_engine *sde = msix->arg;
+ struct hfi1_devdata *dd = sde->dd;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set;
+ int i, old_cpu;
+
+ if (cpu > num_online_cpus() || cpu == sde->cpu)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ old_cpu = sde->cpu;
+ sde->cpu = cpu;
+ cpumask_clear(&msix->mask);
+ cpumask_set_cpu(cpu, &msix->mask);
+ dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
+ msix->msix.vector, irq_type_names[msix->type],
+ sde->this_idx, cpu);
+ irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+ /*
+ * Set the new cpu in the hfi1_affinity_node and clean
+ * the old cpu if it is not used by any other IRQ
+ */
+ set = &entry->def_intr;
+ cpumask_set_cpu(cpu, &set->mask);
+ cpumask_set_cpu(cpu, &set->used);
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *other_msix;
+
+ other_msix = &dd->msix_entries[i];
+ if (other_msix->type != IRQ_SDMA || other_msix == msix)
+ continue;
+
+ if (cpumask_test_cpu(old_cpu, &other_msix->mask))
+ goto unlock;
+ }
+ cpumask_clear_cpu(old_cpu, &set->mask);
+ cpumask_clear_cpu(old_cpu, &set->used);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+}
+
+static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ int cpu = cpumask_first(mask);
+ struct hfi1_msix_entry *msix = container_of(notify,
+ struct hfi1_msix_entry,
+ notify);
+
+ /* Only one CPU configuration supported currently */
+ hfi1_update_sdma_affinity(msix, cpu);
+}
+
+static void hfi1_irq_notifier_release(struct kref *ref)
+{
+ /*
+ * This is required by affinity notifier. We don't have anything to
+ * free here.
+ */
+}
+
+static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ notify->irq = msix->msix.vector;
+ notify->notify = hfi1_irq_notifier_notify;
+ notify->release = hfi1_irq_notifier_release;
+
+ if (irq_set_affinity_notifier(notify->irq, notify))
+ pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ if (irq_set_affinity_notifier(notify->irq, NULL))
+ pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+/*
+ * Function sets the irq affinity for msix.
+ * It *must* be called with node_affinity.lock held.
+ */
+static int get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
@@ -329,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
if (!ret)
return -ENOMEM;
- spin_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
@@ -361,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
* finds its CPU here.
*/
if (cpu == -1 && set) {
- spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -373,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&node_affinity.lock);
- }
-
- switch (msix->type) {
- case IRQ_SDMA:
- sde->cpu = cpu;
- break;
- case IRQ_GENERAL:
- case IRQ_RCVCTXT:
- case IRQ_OTHER:
- break;
}
cpumask_set_cpu(cpu, &msix->mask);
@@ -392,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
extra, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ if (msix->type == IRQ_SDMA) {
+ sde->cpu = cpu;
+ hfi1_setup_sdma_notifier(msix);
+ }
+
free_cpumask_var(diff);
return 0;
}
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+ int ret;
+
+ mutex_lock(&node_affinity.lock);
+ ret = get_irq_affinity(dd, msix);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
@@ -403,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
set = &entry->def_intr;
+ hfi1_cleanup_sdma_notifier(msix);
break;
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
@@ -421,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
break;
default:
+ mutex_unlock(&node_affinity.lock);
return;
}
if (set) {
- spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
+ mutex_unlock(&node_affinity.lock);
}
/* This should be called with node_affinity.lock held */
@@ -536,7 +635,7 @@ int hfi1_get_proc_affinity(int node)
if (!ret)
goto free_available_mask;
- spin_lock(&affinity->lock);
+ mutex_lock(&affinity->lock);
/*
* If we've used all available HW threads, clear the mask and start
* overloading.
@@ -644,7 +743,8 @@ int hfi1_get_proc_affinity(int node)
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&affinity->lock);
+
+ mutex_unlock(&affinity->lock);
hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
free_cpumask_var(intrs_mask);
@@ -665,49 +765,53 @@ void hfi1_put_proc_affinity(int cpu)
if (cpu < 0)
return;
- spin_lock(&affinity->lock);
+
+ mutex_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&affinity->lock);
+ mutex_unlock(&affinity->lock);
}
-/* Prevents concurrent reads and writes of the sdma_affinity attrib */
-static DEFINE_MUTEX(sdma_affinity_mutex);
-
int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
size_t count)
{
struct hfi1_affinity_node *entry;
- struct cpumask mask;
+ cpumask_var_t mask;
int ret, i;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
- return -EINVAL;
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
- ret = cpulist_parse(buf, &mask);
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = cpulist_parse(buf, mask);
if (ret)
- return ret;
+ goto out;
- if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+ if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
dd_dev_warn(dd, "Invalid CPU mask\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- mutex_lock(&sdma_affinity_mutex);
/* reset the SDMA interrupt affinity details */
init_cpu_mask_set(&entry->def_intr);
- cpumask_copy(&entry->def_intr.mask, &mask);
- /*
- * Reassign the affinity for each SDMA interrupt.
- */
+ cpumask_copy(&entry->def_intr.mask, mask);
+
+ /* Reassign the affinity for each SDMA interrupt. */
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *msix;
@@ -715,13 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
if (msix->type != IRQ_SDMA)
continue;
- ret = hfi1_get_irq_affinity(dd, msix);
+ ret = get_irq_affinity(dd, msix);
if (ret)
break;
}
-
- mutex_unlock(&sdma_affinity_mutex);
+out:
+ free_cpumask_var(mask);
+unlock:
+ mutex_unlock(&node_affinity.lock);
return ret ? ret : strnlen(buf, PAGE_SIZE);
}
@@ -729,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
{
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
+ if (!entry) {
+ mutex_unlock(&node_affinity.lock);
return -EINVAL;
+ }
- mutex_lock(&sdma_affinity_mutex);
cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
- mutex_unlock(&sdma_affinity_mutex);
+ mutex_unlock(&node_affinity.lock);
return strnlen(buf, PAGE_SIZE);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 8879cf7a8cac..b89ea3c0ee1a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -121,8 +121,7 @@ struct hfi1_affinity_node_list {
int num_core_siblings;
int num_online_nodes;
int num_online_cpus;
- /* protect affinity node list */
- spinlock_t lock;
+ struct mutex lock; /* protects affinity nodes */
};
int node_affinity_init(void);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b32638d58ae8..9bf5f23544d4 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT),
- FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT)
+ FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT),
+ FLAG_ENTRY0("External Device Request Timeout",
+ EXTERNAL_DEVICE_REQ_TIMEOUT),
};
/*
@@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work)
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
OPA_LINKDOWN_REASON_SPEED_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work)
* If there is no cable attached, turn the DC off. Otherwise,
* start the link bring up.
*/
- if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
+ if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd))
dc_shutdown(ppd->dd);
- } else {
- tune_serdes(ppd);
+ else
start_link(ppd);
- }
}
void handle_link_bounce(struct work_struct *work)
@@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work)
*/
if (ppd->host_link_state & HLS_UP) {
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
@@ -7531,7 +7529,6 @@ done:
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
OPA_LINKDOWN_REASON_WIDTH_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -9161,6 +9158,12 @@ set_local_link_attributes_fail:
*/
int start_link(struct hfi1_pportdata *ppd)
{
+ /*
+ * Tune the SerDes to a ballpark setting for optimal signal and bit
+ * error rate. Needs to be done before starting the link.
+ */
+ tune_serdes(ppd);
+
if (!ppd->link_enabled) {
dd_dev_info(ppd->dd,
"%s: stopping link start because link is disabled\n",
@@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work)
*/
set_qsfp_int_n(ppd, 1);
- tune_serdes(ppd);
-
start_link(ppd);
}
@@ -9490,6 +9491,73 @@ static void init_lcb(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}
+/*
+ * Perform a test read on the QSFP. Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+ int ret;
+ u8 status;
+
+ /* report success if not a QSFP */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0;
+
+ /* read byte 2, the status byte */
+ ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+ if (ret < 0)
+ return ret;
+ if (ret != 1)
+ return -EIO;
+
+ return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms). The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read. If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+ if (test_qsfp_read(ppd)) {
+ /* read failed */
+ if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+ dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+ return;
+ }
+ dd_dev_info(ppd->dd,
+ "QSFP not responding, waiting and retrying %d\n",
+ (int)ppd->qsfp_retry_count);
+ ppd->qsfp_retry_count++;
+ queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ msecs_to_jiffies(QSFP_RETRY_WAIT));
+ return;
+ }
+ ppd->qsfp_retry_count = 0;
+
+ start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ start_link_work.work);
+ try_start_link(ppd);
+}
+
int bringup_serdes(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9593,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
set_qsfp_int_n(ppd, 1);
}
- /*
- * Tune the SerDes to a ballpark setting for
- * optimal signal and bit error rate
- * Needs to be done before starting the link
- */
- tune_serdes(ppd);
-
- return start_link(ppd);
+ try_start_link(ppd);
+ return 0;
}
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9611,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
+ ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+ flush_delayed_work(&ppd->start_link_work);
+ cancel_delayed_work_sync(&ppd->start_link_work);
+
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -9648,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct hfi1_message_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
+struct ib_header *hfi1_get_msgheader(
+ struct hfi1_devdata *dd, __le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
- return (struct hfi1_message_header *)
+ return (struct ib_header *)
(rhf_addr - dd->rhf_offset + offset);
}
@@ -11489,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
!(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
/* reset the cached receive header queue head value */
@@ -11557,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* update with a dummy tail address and then disable
* receive context.
*/
- if (dd->rcvhdrtail_dummy_physaddr) {
+ if (dd->rcvhdrtail_dummy_dma) {
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
}
@@ -11570,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
+ if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11642,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* so it doesn't contain an address that is invalid.
*/
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
}
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
@@ -12865,7 +12931,7 @@ fail:
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
- int num_kernel_contexts;
+ unsigned long num_kernel_contexts;
int total_contexts;
int ret;
unsigned ngroups;
@@ -12894,9 +12960,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
dd_dev_err(dd,
- "Reducing # kernel rcv contexts to: %d, from %d\n",
+ "Reducing # kernel rcv contexts to: %d, from %lu\n",
(int)(dd->chip_send_contexts - num_vls - 1),
- (int)num_kernel_contexts);
+ num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
@@ -13319,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd)
/*
* Give up after 1ms - maximum wait time.
*
- * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
+ * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at
* 250MB/s bandwidth. Lower rate to 66% for overhead to get:
- * 148 KB / (66% * 250MB/s) = 920us
+ * 136 KB / (66% * 250MB/s) = 844us
*/
if (count++ > 500) {
dd_dev_err(dd,
@@ -14500,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /* call before get_platform_config(), after init_chip_resources() */
+ ret = eprom_init(dd);
+ if (ret)
+ goto bail_free_rcverr;
+
/* Needs to be called before hfi1_firmware_init */
get_platform_config(dd);
@@ -14620,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
- ret = eprom_init(dd);
- if (ret)
- goto bail_free_rcverr;
-
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ed11107c50fe..92345259a8f4 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -82,7 +82,7 @@
*/
#define CM_VAU 3
/* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
-#define CM_GLOBAL_CREDITS 0x940
+#define CM_GLOBAL_CREDITS 0x880
/* Number of PKey entries in the HW */
#define MAX_PKEY_VALUES 16
@@ -254,12 +254,14 @@
#define FAILED_LNI_VERIFY_CAP2 BIT(10)
#define FAILED_LNI_CONFIGLT BIT(11)
#define HOST_HANDSHAKE_TIMEOUT BIT(12)
+#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13)
#define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
| FAILED_LNI_VERIFY_CAP1 \
| FAILED_LNI_VERIFY_CAP2 \
- | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
+ | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \
+ | EXTERNAL_DEVICE_REQ_TIMEOUT)
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
#define HOST_REQ_DONE BIT(0)
@@ -706,6 +708,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
@@ -1335,7 +1338,7 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct hfi1_message_header *hfi1_get_msgheader(
+struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index fcc9c217a97a..da7be21bedb4 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -320,14 +320,6 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
-/*
- * This structure contains the first field common to all protocols
- * that employ this chip.
- */
-struct hfi1_message_header {
- __be16 lrh[4];
-};
-
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9cc288..632ba21759ab 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -59,6 +59,40 @@
static struct dentry *hfi1_dbg_root;
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+ struct file *file,
+ char __user *buf,
+ size_t size,
+ loff_t *ppos)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ ssize_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_read(file, buf, size, ppos);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
+static loff_t hfi1_seq_lseek(
+ struct file *file,
+ loff_t offset,
+ int whence)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ loff_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_lseek(file, offset, whence);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
@@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
.release = seq_release \
}
@@ -105,11 +139,9 @@ do { \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_opcode_stats_perctx *opstats;
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
@@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
DEBUGFS_FILE_OPS(ctx_stats);
static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
+ __acquires(RCU)
{
struct qp_iter *iter;
loff_t n = *pos;
- rcu_read_lock();
iter = qp_iter_init(s->private);
+
+ /* stop calls rcu_read_unlock */
+ rcu_read_lock();
+
if (!iter)
return NULL;
- while (n--) {
+ do {
if (qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
- }
+ } while (n--);
return iter;
}
static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
loff_t *pos)
+ __must_hold(RCU)
{
struct qp_iter *iter = iter_ptr;
@@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
}
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
-__releases(RCU)
+ __releases(RCU)
{
rcu_read_unlock();
}
@@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats)
DEBUGFS_FILE_OPS(qp_stats);
static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_ibdev *ibd;
struct hfi1_devdata *dd;
- rcu_read_lock();
ibd = (struct hfi1_ibdev *)s->private;
dd = dd_from_dev(ibd);
if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
struct hfi1_pportdata *ppd;
ssize_t rval;
- rcu_read_lock();
ppd = private2ppd(file);
avail = hfi1_read_portcntrs(ppd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
int used;
int i;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
size = PAGE_SIZE;
used = 0;
tmp = kmalloc(size, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
used += scnprintf(tmp + used, size - used,
@@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
u64 scratch0;
u64 clear;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto do_return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
do_free:
kfree(buff);
- do_return:
- rcu_read_unlock();
return ret;
}
@@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
char *tmp;
int ret;
- rcu_read_lock();
ppd = private2ppd(file);
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
if (ret > 0)
ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
int offset;
int total_written;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
int offset;
int total_read;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
@@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
int ret;
int total_written;
- rcu_read_lock();
- if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
- ret = -EINVAL;
- goto _return;
- }
+ if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+ return -EINVAL;
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
-
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
int ret;
int total_read;
- rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
@@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
_return:
- rcu_read_unlock();
return ret;
}
@@ -948,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
};
+static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ loff_t *spos = v;
+ loff_t i = *spos;
+
+ sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
+DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
+DEBUGFS_FILE_OPS(sdma_cpu_list);
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -976,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
@@ -1006,7 +1029,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
ibd->hfi1_ibdev_dbg = NULL;
- synchronize_rcu();
}
/*
@@ -1031,9 +1053,7 @@ static const char * const hfi1_statnames[] = {
};
static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1051,9 +1071,7 @@ static void *_driver_stats_names_seq_next(
}
static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1069,9 +1087,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
DEBUGFS_FILE_OPS(driver_stats_names);
static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1086,9 +1102,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static u64 hfi1_sps_ints(void)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 8246dc7d0573..6563e4d38b80 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
struct hfi1_packet *packet)
{
- struct hfi1_message_header *rhdr = packet->hdr;
+ struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr;
- struct hfi1_other_headers *ohdr = NULL;
+ struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(hdr->lrh[1]);
+ u16 lid = be16_to_cpu(rhdr->lrh[1]);
u32 qp_num;
u32 rcv_flags = 0;
@@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* Check for GRH */
if (lnh == HFI1_LRH_BTH) {
- ohdr = &hdr->u.oth;
+ ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
- ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ ohdr = &rhdr->u.l.oth;
+ if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
@@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
case IB_QPT_RC:
hfi1_rc_hdrerr(
rcd,
- hdr,
+ rhdr,
rcv_flags,
qp);
break;
@@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_ib_header *hdr = pkt->hdr;
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_header *hdr = pkt->hdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
@@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
+ sc = hdr2sc(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
@@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header *hdr;
+ struct ib_other_headers *ohdr;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- hdr = (struct hfi1_ib_header *)
- hfi1_get_msgheader(dd, rhf_addr);
+ hdr = hfi1_get_msgheader(dd, rhf_addr);
+
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_BTH)
@@ -888,14 +887,15 @@ void set_all_slowpath(struct hfi1_devdata *dd)
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet packet,
+ struct hfi1_packet *packet,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
- packet.rhf_addr);
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ u8 etype = rhf_rcv_type(packet->rhf);
- if (hdr2sc(hdr, packet.rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
/* Auto activate link on non-SC15 packet receive */
if (unlikely(rcd->ppd->host_link_state ==
HLS_UP_ARMED) &&
- set_armed_to_active(rcd, packet, dd))
+ set_armed_to_active(rcd, &packet, dd))
goto bail;
last = process_rcv_packet(&packet, thread);
}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 36b77943cbfd..e70c223801b4 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -49,7 +49,26 @@
#include "common.h"
#include "eprom.h"
+/*
+ * The EPROM is logically divided into three partitions:
+ * partition 0: the first 128K, visible from PCI ROM BAR
+ * partition 1: 4K config file (sector size)
+ * partition 2: the rest
+ */
+#define P0_SIZE (128 * 1024)
+#define P1_SIZE (4 * 1024)
+#define P1_START P0_SIZE
+#define P2_START (P0_SIZE + P1_SIZE)
+
+/* controller page size, in bytes */
+#define EP_PAGE_SIZE 256
+#define EP_PAGE_MASK (EP_PAGE_SIZE - 1)
+#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32))
+
+/* controller commands */
#define CMD_SHIFT 24
+#define CMD_NOP (0)
+#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
/* controller interface speeds */
@@ -61,6 +80,90 @@
* Double it for safety.
*/
#define EPROM_TIMEOUT 80000 /* ms */
+
+/*
+ * Read a 256 byte (64 dword) EPROM page.
+ * All callers have verified the offset is at a page boundary.
+ */
+static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
+{
+ int i;
+
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
+ for (i = 0; i < EP_PAGE_DWORDS; i++)
+ result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
+}
+
+/*
+ * Read length bytes starting at offset from the start of the EPROM.
+ */
+static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest)
+{
+ u32 buffer[EP_PAGE_DWORDS];
+ u32 end;
+ u32 start_offset;
+ u32 read_start;
+ u32 bytes;
+
+ if (len == 0)
+ return 0;
+
+ end = start + len;
+
+ /*
+ * Make sure the read range is not outside of the controller read
+ * command address range. Note that '>' is correct below - the end
+ * of the range is OK if it stops at the limit, but no higher.
+ */
+ if (end > (1 << CMD_SHIFT))
+ return -EINVAL;
+
+ /* read the first partial page */
+ start_offset = start & EP_PAGE_MASK;
+ if (start_offset) {
+ /* partial starting page */
+
+ /* align and read the page that contains the start */
+ read_start = start & ~EP_PAGE_MASK;
+ read_page(dd, read_start, buffer);
+
+ /* the rest of the page is available data */
+ bytes = EP_PAGE_SIZE - start_offset;
+
+ if (len <= bytes) {
+ /* end is within this page */
+ memcpy(dest, (u8 *)buffer + start_offset, len);
+ return 0;
+ }
+
+ memcpy(dest, (u8 *)buffer + start_offset, bytes);
+
+ start += bytes;
+ len -= bytes;
+ dest += bytes;
+ }
+ /* start is now page aligned */
+
+ /* read whole pages */
+ while (len >= EP_PAGE_SIZE) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, EP_PAGE_SIZE);
+
+ start += EP_PAGE_SIZE;
+ len -= EP_PAGE_SIZE;
+ dest += EP_PAGE_SIZE;
+ }
+
+ /* read the last partial page */
+ if (len) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, len);
+ }
+
+ return 0;
+}
+
/*
* Initialize the EPROM handler.
*/
@@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd)
done_asic:
return ret;
}
+
+/* magic character sequence that trails an image */
+#define IMAGE_TRAIL_MAGIC "egamiAPO"
+
+/*
+ * Read all of partition 1. The actual file is at the front. Adjust
+ * the returned size if a trailing image magic is found.
+ */
+static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
+ u32 *size)
+{
+ void *buffer;
+ void *p;
+ u32 length;
+ int ret;
+
+ buffer = kmalloc(P1_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ ret = read_length(dd, P1_START, P1_SIZE, buffer);
+ if (ret) {
+ kfree(buffer);
+ return ret;
+ }
+
+ /* scan for image magic that may trail the actual data */
+ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+ if (p)
+ length = p - buffer;
+ else
+ length = P1_SIZE;
+
+ *data = buffer;
+ *size = length;
+ return 0;
+}
+
+/*
+ * Read the platform configuration file from the EPROM.
+ *
+ * On success, an allocated buffer containing the data and its size are
+ * returned. It is up to the caller to free this buffer.
+ *
+ * Return value:
+ * 0 - success
+ * -ENXIO - no EPROM is available
+ * -EBUSY - not able to acquire access to the EPROM
+ * -ENOENT - no recognizable file written
+ * -ENOMEM - buffer could not be allocated
+ */
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
+{
+ u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */
+ int ret;
+
+ if (!dd->eprom_available)
+ return -ENXIO;
+
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+ if (ret)
+ return -EBUSY;
+
+ /* read the last page of P0 for the EPROM format magic */
+ ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ if (ret)
+ goto done;
+
+ /* last dword of P0 contains a magic indicator */
+ if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* partition format */
+ ret = read_partition_platform_config(dd, data, size);
+ goto done;
+ }
+
+ /* nothing recognized */
+ ret = -ENOENT;
+
+done:
+ release_chip_resource(dd, CR_EPROM);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..e774184f1643 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -45,8 +45,8 @@
*
*/
-struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret,
+ u32 *size_ret);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1ecbec192358..677efa0e8cd6 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -58,7 +58,6 @@
#include "trace.h"
#include "user_sdma.h"
#include "user_exp_rcv.h"
-#include "eprom.h"
#include "aspm.h"
#include "mmu_rb.h"
@@ -183,6 +182,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
if (fd) {
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
+ atomic_inc(&fd->mm->mm_count);
}
fp->private_data = fd;
@@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
ret = assign_ctxt(fp, &uinfo);
if (ret < 0)
return ret;
- setup_ctxt(fp);
+ ret = setup_ctxt(fp);
if (ret)
return ret;
ret = user_init(fp);
@@ -439,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd;
- unsigned long flags, pfn;
+ unsigned long flags;
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
+ void *memvirt = NULL;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -492,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memaddr = dd->cr_base[uctxt->numa_id].pa +
+ memvirt = dd->cr_base[uctxt->numa_id].va;
+ memaddr = virt_to_phys(memvirt) +
(((u64)uctxt->sc->hw_free -
(u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
memlen = PAGE_SIZE;
@@ -507,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
mapio = 1;
break;
case RCV_HDRQ:
- memaddr = uctxt->rcvhdrq_phys;
memlen = uctxt->rcvhdrq_size;
+ memvirt = uctxt->rcvhdrq;
break;
case RCV_EGRBUF: {
unsigned long addr;
@@ -532,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_flags &= ~VM_MAYWRITE;
addr = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
+ memlen = uctxt->egrbufs.buffers[i].len;
+ memvirt = uctxt->egrbufs.buffers[i].addr;
ret = remap_pfn_range(
vma, addr,
- uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
- uctxt->egrbufs.buffers[i].len,
+ /*
+ * virt_to_pfn() does the same, but
+ * it's not available on x86_64
+ * when CONFIG_MMU is enabled.
+ */
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
vma->vm_page_prot);
if (ret < 0)
goto done;
- addr += uctxt->egrbufs.buffers[i].len;
+ addr += memlen;
}
ret = 0;
goto done;
@@ -595,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- memaddr = uctxt->rcvhdrqtailaddr_phys;
memlen = PAGE_SIZE;
+ memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -649,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
"%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
vma->vm_end - vma->vm_start, vma->vm_flags);
- pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
if (vmf) {
- vma->vm_pgoff = pfn;
+ vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
} else if (mapio) {
- ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
+ } else if (memvirt) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
+ vma->vm_page_prot);
} else {
- ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
}
done:
@@ -779,6 +796,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
done:
+ mmdrop(fdata->mm);
kobject_put(&dd->kobj);
kfree(fdata);
return 0;
@@ -959,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
uctxt->dd->node);
- if (!uctxt->sc)
- return -ENOMEM;
-
+ if (!uctxt->sc) {
+ ret = -ENOMEM;
+ goto ctxdata_free;
+ }
hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
- return ret;
+ goto ctxdata_free;
+
/*
* Setup shared context resources if the user-level has requested
* shared contexts and this is the 'master' process.
@@ -980,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
* send context because it will be done during file close
*/
if (ret)
- return ret;
+ goto ctxdata_free;
}
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
@@ -1000,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
fd->uctxt = uctxt;
return 0;
+
+ctxdata_free:
+ dd->rcd[ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ return ret;
}
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
@@ -1258,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
uctxt->rcvhdrq);
binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
fd->subctxt,
- uctxt->egrbufs.rcvtids[0].phys);
+ uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
fd->subctxt, 0);
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 1000e0fd96d9..7eef11b316ff 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -64,6 +64,8 @@
#include <linux/kthread.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
+#include <rdma/ib_hdrs.h>
+#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -171,12 +173,12 @@ struct ctxt_eager_bufs {
u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
ssize_t len;
} *buffers;
struct {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
} *rcvtids;
};
@@ -207,8 +209,8 @@ struct hfi1_ctxtdata {
/* size of each of the rcvhdrq entries */
u16 rcvhdrqentsize;
/* mmap of hdrq, must fit in 44 bits */
- dma_addr_t rcvhdrq_phys;
- dma_addr_t rcvhdrqtailaddr_phys;
+ dma_addr_t rcvhdrq_dma;
+ dma_addr_t rcvhdrqtailaddr_dma;
struct ctxt_eager_bufs egrbufs;
/* this receive context's assigned PIO ACK send context */
struct send_context *sc;
@@ -350,7 +352,7 @@ struct hfi1_packet {
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
struct rvt_qp *qp;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -529,6 +531,7 @@ struct hfi1_msix_entry {
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
+ struct irq_affinity_notify notify;
};
/* per-SL CCA information */
@@ -605,6 +608,7 @@ struct hfi1_pportdata {
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
struct work_struct link_bounce_work;
+ struct delayed_work start_link_work;
/* host link state variables */
struct mutex hls_lock;
u32 host_link_state;
@@ -659,6 +663,7 @@ struct hfi1_pportdata {
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
u8 last_pstate; /* info only */
+ u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
@@ -1058,8 +1063,6 @@ struct hfi1_devdata {
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
- /* high volume overflow errors deferred to tasklet */
- struct tasklet_struct error_tasklet;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1162,7 +1165,7 @@ struct hfi1_devdata {
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
- dma_addr_t rcvhdrtail_dummy_physaddr;
+ dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
@@ -1173,6 +1176,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
+ struct rhashtable sdma_rht;
struct kobject kobj;
};
@@ -1266,15 +1270,32 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
((!!(rhf_dc_info(rhf))) << 4);
}
+#define HFI1_JKEY_WIDTH 16
+#define HFI1_JKEY_MASK (BIT(16) - 1)
+#define HFI1_ADMIN_JKEY_RANGE 32
+
+/*
+ * J_KEYs are split and allocated in the following groups:
+ * 0 - 31 - users with administrator privileges
+ * 32 - 63 - kernel protocols using KDETH packets
+ * 64 - 65535 - all other users using KDETH packets
+ */
static inline u16 generate_jkey(kuid_t uid)
{
- return from_kuid(current_user_ns(), uid) & 0xffff;
+ u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
+
+ if (capable(CAP_SYS_ADMIN))
+ jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
+ else if (jkey < 64)
+ jkey |= BIT(HFI1_JKEY_WIDTH - 1);
+
+ return jkey;
}
/*
@@ -1584,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
@@ -1656,7 +1677,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
const struct pci_device_id *);
void hfi1_free_devdata(struct hfi1_devdata *);
-void cc_state_reclaim(struct rcu_head *rcu);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
@@ -1788,7 +1808,7 @@ extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
extern unsigned int user_credit_return_threshold;
extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
extern uint kdeth_qp;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index a358d23ecd54..60db61536fed 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
/* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
return rcd;
bail:
+ dd->rcd[ctxt] = NULL;
kfree(rcd->egrbufs.rcvtids);
kfree(rcd->egrbufs.buffers);
kfree(rcd);
@@ -500,6 +501,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+ INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
@@ -708,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_physaddr,
+ &dd->rcvhdrtail_dummy_dma,
GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
@@ -941,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (rcd->rcvhdrq) {
dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
- rcd->rcvhdrq, rcd->rcvhdrq_phys);
+ rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
if (rcd->rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)rcd->rcvhdrtail_kvaddr,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
}
@@ -955,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.rcvtids);
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].phys)
+ if (rcd->egrbufs.buffers[e].dma)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
- rcd->egrbufs.buffers[e].phys);
+ rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
@@ -1333,7 +1335,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
spin_unlock(&ppd->cc_state_lock);
if (cc_state)
- call_rcu(&cc_state->rcu, cc_state_reclaim);
+ kfree_rcu(cc_state, rcu);
}
free_credit_return(dd);
@@ -1353,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
if (dd->rcvhdrtail_dummy_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
}
@@ -1576,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t phys_hdrqtail;
+ dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1589,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+ &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
@@ -1601,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+ rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
@@ -1633,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
* before enabling any receive context
*/
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
return 0;
@@ -1644,7 +1646,7 @@ bail_free:
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
bail:
return -ENOMEM;
@@ -1705,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[idx].addr =
dma_zalloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].phys,
+ &rcd->egrbufs.buffers[idx].dma,
gfp_flags);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
rcd->egrbufs.buffers[idx].addr;
- rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys =
- rcd->egrbufs.buffers[idx].phys;
+ rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
+ rcd->egrbufs.buffers[idx].dma;
rcd->egrbufs.alloced++;
alloced_bytes += rcd->egrbufs.rcvtid_size;
idx++;
@@ -1754,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (i = 0, j = 0, offset = 0; j < idx; i++) {
if (i >= rcd->egrbufs.count)
break;
- rcd->egrbufs.rcvtids[i].phys =
- rcd->egrbufs.buffers[j].phys + offset;
+ rcd->egrbufs.rcvtids[i].dma =
+ rcd->egrbufs.buffers[j].dma + offset;
rcd->egrbufs.rcvtids[i].addr =
rcd->egrbufs.buffers[j].addr + offset;
rcd->egrbufs.alloced++;
- if ((rcd->egrbufs.buffers[j].phys + offset +
+ if ((rcd->egrbufs.buffers[j].dma + offset +
new_size) ==
- (rcd->egrbufs.buffers[j].phys +
+ (rcd->egrbufs.buffers[j].dma +
rcd->egrbufs.buffers[j].len)) {
j++;
offset = 0;
@@ -1813,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
- rcd->egrbufs.rcvtids[idx].phys, order);
+ rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
goto bail;
@@ -1825,9 +1827,9 @@ bail_rcvegrbuf_phys:
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[idx].len,
rcd->egrbufs.buffers[idx].addr,
- rcd->egrbufs.buffers[idx].phys);
+ rcd->egrbufs.buffers[idx].dma);
rcd->egrbufs.buffers[idx].addr = NULL;
- rcd->egrbufs.buffers[idx].phys = 0;
+ rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
bail:
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 1263abe01999..9487c9bb8920 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* offline.
*/
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
set_link_state(ppd, link_state);
@@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
if (key == okey)
continue;
/*
- * Don't update pkeys[2], if an HFI port without MgmtAllowed
- * by neighbor is a switch.
- */
- if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
- continue;
- /*
* The SM gives us the complete PKey table. We have
* to ensure that we put the PKeys in the matching
* slots.
@@ -1819,6 +1812,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
+ if (dd->pport->port_type != PORT_TYPE_QSFP) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
#define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
#define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
@@ -2599,7 +2597,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- unsigned long port_num;
+ u8 port_num;
unsigned long vl;
u32 vl_select_mask;
int vfi;
@@ -2633,9 +2631,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2837,7 +2835,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3010,7 +3008,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3247,7 +3245,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3398,7 +3396,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
spin_unlock(&ppd->cc_state_lock);
- call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+ kfree_rcu(old_cc_state, rcu);
}
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
@@ -3553,13 +3551,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
-void cc_state_reclaim(struct rcu_head *rcu)
-{
- struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
-
- kfree(cc_state);
-}
-
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ac1bf4a73571..50a3a36d9363 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -551,11 +551,11 @@ static inline u32 group_size(u32 group)
}
/*
- * Obtain the credit return addresses, kernel virtual and physical, for the
+ * Obtain the credit return addresses, kernel virtual and bus, for the
* given sc.
*
* To understand this routine:
- * o va and pa are arrays of struct credit_return. One for each physical
+ * o va and dma are arrays of struct credit_return. One for each physical
* send context, per NUMA.
* o Each send context always looks in its relative location in a struct
* credit_return for its credit return.
@@ -563,14 +563,14 @@ static inline u32 group_size(u32 group)
* with the same value. Use the address of the first send context in the
* group.
*/
-static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
+static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
{
u32 gc = group_context(sc->hw_context, sc->group);
u32 index = sc->hw_context & 0x7;
sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
- *pa = (unsigned long)
- &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
+ *dma = (unsigned long)
+ &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
}
/*
@@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
- dma_addr_t pa;
+ dma_addr_t dma;
unsigned long flags;
u64 reg;
u32 thresh;
@@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->sw_index = sw_index;
sc->hw_context = hw_context;
- cr_group_addresses(sc, &pa);
+ cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
/* PIO Send Memory Address details */
@@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
/* set up credit return */
- reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
+ reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
/*
@@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd)
dd->cr_base[i].va = dma_zalloc_coherent(
&dd->pcidev->dev,
bytes,
- &dd->cr_base[i].pa,
+ &dd->cr_base[i].dma,
GFP_KERNEL);
if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd)
TXE_NUM_CONTEXTS *
sizeof(struct credit_return),
dd->cr_base[i].va,
- dd->cr_base[i].pa);
+ dd->cr_base[i].dma);
}
}
kfree(dd->cr_base);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 464cbd27b975..e709eaf743b5 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -154,7 +154,7 @@ struct credit_return {
/* NUMA indexed credit return array */
struct credit_return_base {
struct credit_return *va;
- dma_addr_t pa;
+ dma_addr_t dma;
};
/* send context configuration sizes (one per type) */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..aa7773643107 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
preempt_enable();
}
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
@@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
#define mshift(x) (8 * (x))
/*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry. Other bytes are zeroed. Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
-{
- unsigned long off;
-
- if (nbytes == 0) {
- pbuf->carry.val64 = 0;
- } else {
- /* align our pointer */
- off = (unsigned long)from & 0x7;
- from = (void *)((unsigned long)from & ~0x7l);
- pbuf->carry.val64 = ((*(u64 *)from)
- << zshift(nbytes + off))/* zero upper bytes */
- >> zshift(nbytes); /* place at bottom */
- }
- pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
-{
- unsigned long off = (unsigned long)from & 0x7;
- unsigned int room, xbytes;
-
- /* align our pointer */
- from = (void *)((unsigned long)from & ~0x7l);
-
- /* check count first - don't read anything if count is zero */
- while (nbytes) {
- /* find the number of bytes in this u64 */
- room = 8 - off; /* this u64 has room for this many bytes */
- xbytes = min(room, nbytes);
-
- /*
- * shift down to zero lower bytes, shift up to zero upper
- * bytes, shift back down to move into place
- */
- pbuf->carry.val64 |= (((*(u64 *)from)
- >> mshift(off))
- << zshift(xbytes))
- >> zshift(xbytes + pbuf->carry_bytes);
- off = 0;
- pbuf->carry_bytes += xbytes;
- nbytes -= xbytes;
- from += sizeof(u64);
- }
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- unsigned int remaining;
-
- if (zbytes == 0) /* nothing to do */
- return;
-
- remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
-
- /* NOTE: zshift only guaranteed to work if remaining != 0 */
- if (remaining)
- pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
- >> zshift(remaining);
- else
- pbuf->carry.val64 = 0;
- pbuf->carry_bytes = remaining;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
- struct pio_buf *pbuf,
- void __iomem *dest,
- const void *src)
-{
- u64 new, temp;
-
- new = *(u64 *)src;
- temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
- writeq(temp, dest);
- pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
- writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry. If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
- if (pbuf->carry_bytes) {
- /* unused bytes are always kept zeroed, so just write */
- writeq(pbuf->carry.val64, dest);
- return 1;
- }
-
- return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
+ /* fall through */
case 6:
*dest++ = *src++;
+ /* fall through */
case 5:
*dest++ = *src++;
+ /* fall through */
case 4:
*dest++ = *src++;
+ /* fall through */
case 3:
*dest++ = *src++;
+ /* fall through */
case 2:
*dest++ = *src++;
+ /* fall through */
case 1:
*dest++ = *src++;
+ /* fall through */
}
}
@@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
+ pbuf->carry.val64 = 0;
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
@@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
}
/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
+ * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
*
* NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
- void *dest,
+ void __iomem *dest,
const void *src)
{
- u32 remainder = 8 - pbuf->carry_bytes;
+ u64 new, temp;
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
- writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+ new = *(u64 *)src;
+ temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+ writeq(temp, dest);
+ pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
@@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
- u64 zero = 0;
-
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ /* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
-#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
@@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
- unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
- unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+ unsigned long qw_to_write = nbytes >> 3;
+ unsigned long bytes_left = nbytes & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- /* adjust carry */
- if (pbuf->carry_bytes < bytes_left) {
- /* need to read more */
- read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
+ pbuf->qw_written += qw_to_write;
+
+ /* handle carry and left-over bytes */
+ if (pbuf->carry_bytes + bytes_left >= 8) {
+ unsigned long nread;
+
+ /* there is enough to fill another qw - fill carry */
+ nread = 8 - pbuf->carry_bytes;
+ read_extra_bytes(pbuf, from, nread);
+
+ /*
+ * One more write - but need to make sure dest is correct.
+ * Check for wrap and the possibility the write
+ * should be in SOP space.
+ *
+ * The two checks immediately below cannot both be true, hence
+ * the else. If we have wrapped, we cannot still be within the
+ * first block. Conversely, if we are still in the first block,
+ * we cannot have wrapped. We do the wrap check first as that
+ * is more likely.
+ */
+ /* adjust if we have wrapped */
+ if (dest >= pbuf->end)
+ dest -= pbuf->size;
+ /* jump to the SOP range if within the first block */
+ else if (pbuf->qw_written < PIO_BLOCK_QWS)
+ dest += SOP_DISTANCE;
+
+ /* flush out full carry */
+ carry8_write8(pbuf->carry, dest);
+ pbuf->qw_written++;
+
+ /* now adjust and read the rest of the bytes into carry */
+ bytes_left -= nread;
+ from += nread; /* from is now not aligned */
+ read_low_bytes(pbuf, from, bytes_left);
} else {
- /* remove invalid bytes */
- zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
+ /* not enough to fill another qw, append the rest to carry */
+ read_extra_bytes(pbuf, from, bytes_left);
}
-
- pbuf->qw_written += qw_to_write;
}
/*
@@ -771,6 +649,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_extra_bytes(pbuf, from, to_fill);
from += to_fill;
nbytes -= to_fill;
+ /* may not be enough valid bytes left to align */
+ if (extra > nbytes)
+ extra = nbytes;
/* ...now write carry */
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +679,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_low_bytes(pbuf, from, extra);
from += extra;
nbytes -= extra;
+ /*
+ * If no bytes are left, return early - we are done.
+ * NOTE: This short-circuit is *required* because
+ * "extra" may have been reduced in size and "from"
+ * is not aligned, as required when leaving this
+ * if block.
+ */
+ if (nbytes == 0)
+ return;
}
/* at this point, from is QW aligned */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 965c8aef0c60..202433178864 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -47,29 +47,39 @@
#include "hfi.h"
#include "efivar.h"
+#include "eprom.h"
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
unsigned long size = 0;
u8 *temp_platform_config = NULL;
+ u32 esize;
+
+ ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ size = esize;
+ goto success;
+ }
+ /* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
- if (ret) {
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
- /* fall back to request firmware */
- platform_config_load = 1;
- goto bail;
- }
+ if (!ret)
+ goto success;
+
+ dd_dev_info(dd,
+ "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+ __func__);
+ /* fall back to request firmware */
+ platform_config_load = 1;
+ return;
+success:
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
-
-bail:
- /* exit */;
}
void free_platform_config(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index a5aa3517e7d5..9fc75e7e8781 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp)
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
}
@@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp)
*/
void hfi1_schedule_send(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
+ * hfi1_get_credit - handle credit in aeth
+ * @qp: the qp
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
@@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
+ lockdep_assert_held(&qp->s_lock);
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
@@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
static int iowait_sleep(
@@ -544,7 +544,7 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
iter->dev = dev;
iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
- if (qp_iter_next(iter)) {
- kfree(iter);
- return NULL;
- }
return iter;
}
@@ -812,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
kfree(priv);
return ERR_PTR(-ENOMEM);
}
+ iowait_init(
+ &priv->s_iowait,
+ 1,
+ _hfi1_do_send,
+ iowait_sleep,
+ iowait_wakeup,
+ iowait_sdma_drained);
setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
qp->s_timer.function = hfi1_rc_timeout;
return priv;
@@ -852,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi)
void flush_qp_waiters(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_stop_rc_timers(qp);
}
@@ -877,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- iowait_init(
- &priv->s_iowait,
- 1,
- _hfi1_do_send,
- iowait_sleep,
- iowait_wakeup,
- iowait_sdma_drained);
priv->r_adefered = 0;
clear_ahg(qp);
}
@@ -967,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index a207717ade2a..1869f639c3ae 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
bus->algo.getsda = hfi1_getsda;
bus->algo.getscl = hfi1_getscl;
bus->algo.udelay = 5;
- bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.timeout = usecs_to_jiffies(100000);
bus->algo.data = bus;
bus->adapter.owner = THIS_MODULE;
@@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
u8 *data)
{
struct hfi1_pportdata *ppd;
- u32 excess_len = 0;
- int ret = 0;
+ u32 excess_len = len;
+ int ret = 0, offset = 0;
if (port_num > dd->num_pports || port_num < 1) {
dd_dev_info(dd, "%s: Invalid port number %d\n",
@@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
}
memcpy(data, &ppd->qsfp_info.cache[addr], len);
+
+ if (addr <= QSFP_MONITOR_VAL_END &&
+ (addr + len) >= QSFP_MONITOR_VAL_START) {
+ /* Overlap with the dynamic channel monitor range */
+ if (addr < QSFP_MONITOR_VAL_START) {
+ if (addr + len <= QSFP_MONITOR_VAL_END)
+ len = addr + len - QSFP_MONITOR_VAL_START;
+ else
+ len = QSFP_MONITOR_RANGE;
+ offset = QSFP_MONITOR_VAL_START - addr;
+ addr = QSFP_MONITOR_VAL_START;
+ } else if (addr == QSFP_MONITOR_VAL_START) {
+ offset = 0;
+ if (addr + len > QSFP_MONITOR_VAL_END)
+ len = QSFP_MONITOR_RANGE;
+ } else {
+ offset = 0;
+ if (addr + len > QSFP_MONITOR_VAL_END)
+ len = QSFP_MONITOR_VAL_END - addr + 1;
+ }
+ /* Refresh the values of the dynamic monitors from the cable */
+ ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len);
+ if (ret != len) {
+ ret = -EAGAIN;
+ goto set_zeroes;
+ }
+ }
+
return 0;
set_zeroes:
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 69275ebd9597..36cf52359848 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -74,6 +74,9 @@
/* Defined fields that Intel requires of qualified cables */
/* Byte 0 is Identifier, not checked */
/* Byte 1 is reserved "status MSB" */
+#define QSFP_MONITOR_VAL_START 22
+#define QSFP_MONITOR_VAL_END 81
+#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1)
#define QSFP_TX_CTRL_BYTE_OFFS 86
#define QSFP_PWR_CTRL_BYTE_OFFS 93
#define QSFP_CDR_CTRL_BYTE_OFFS 98
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5da190e6011b..8bc5013f39a1 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -55,7 +55,7 @@
#include "trace.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
+#define OP(x) RC_OP(x)
/**
* hfi1_add_retry_timer - add/start a retry timer
@@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
@@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
add_timer(&priv->s_rnr_timer);
@@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
@@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
{
int rval = 0;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry */
if (qp->s_flags & RVT_S_TIMER) {
qp->s_flags &= ~RVT_S_TIMER;
@@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
@@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
int rval = 0;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from rnr timer */
if (qp->s_flags & RVT_S_WAIT_RNR) {
qp->s_flags &= ~RVT_S_WAIT_RNR;
@@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp)
del_timer_sync(&priv->s_rnr_timer);
}
-/* only opcode mask for adaptive pio */
-const u32 rc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
- BIT(OP(ACKNOWLEDGE & 0x1f)) |
- BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
- BIT(OP(COMPARE_SWAP & 0x1f)) |
- BIT(OP(FETCH_ADD & 0x1f));
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
* Note the QP s_lock must be held.
*/
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
@@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
@@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
@@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
int middle = 0;
int delta;
+ lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
goto bail_no_tx;
@@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr + len);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr + len,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
@@ -841,7 +835,7 @@ bail_no_tx:
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
+ * send side QP state and send engine.
*/
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
@@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
u32 vl, plen;
struct send_context *sc;
struct pio_buf *pbuf;
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
@@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (!pbuf) {
/*
* We have no room to send at the moment. Pass
- * responsibility for sending the ACK to the send tasklet
+ * responsibility for sending the ACK to the send engine
* so that when enough buffer space becomes available,
* the ACK is sent ahead of other outgoing packets.
*/
@@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
return;
queue_ack:
- this_cpu_inc(*ibp->rvp.rc_qacks);
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto unlock;
+ this_cpu_inc(*ibp->rvp.rc_qacks);
qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
if (is_fecn)
qp->s_flags |= RVT_S_ECN;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
hfi1_schedule_send(qp);
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
+ lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
/*
@@ -1027,7 +1025,7 @@ done:
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
- * asynchronously before the send tasklet can get scheduled.
+ * asynchronously before the send engine can get scheduled.
* Doing it in hfi1_make_rc_req() is too late.
*/
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
@@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
@@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe;
u32 n = qp->s_last;
+ lockdep_assert_held(&qp->s_lock);
/* Find the work request corresponding to the given PSN. */
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
@@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
+ lockdep_assert_held(&qp->s_lock);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct ib_wc wc;
unsigned i;
+ lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int diff;
unsigned long to;
+ lockdep_assert_held(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
{
struct rvt_swqe *wqe;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
hfi1_stop_rc_timers(qp);
@@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void rc_rcv_resp(struct hfi1_ibport *ibp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen, struct rvt_qp *qp,
u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
struct hfi1_ctxtdata *rcd)
@@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64)be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else {
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
- }
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
@@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
return;
list_del_init(&qp->rspwait);
qp->r_flags &= ~RVT_R_RSP_NAK;
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
/**
@@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
+static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
@@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
}
if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
@@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
case OP(FETCH_ADD): {
/*
* If we didn't find the atomic request in the ack queue
- * or the send tasklet is already backed up to send an
+ * or the send engine is already backed up to send an
* earlier entry, we can ignore this request.
*/
if (!e || e->opcode != (u8)opcode || old_req)
@@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
@@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int copy_last = 0;
u32 rkey;
+ lockdep_assert_held(&qp->r_lock);
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
@@ -2342,7 +2343,7 @@ send_last:
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2397,7 +2398,7 @@ send_last:
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2432,7 +2433,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2469,8 +2470,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2481,11 +2481,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2499,7 +2499,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2575,12 +2575,12 @@ send_ack:
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
int diff;
u32 opcode;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 48d5094f98e2..a1576aea4756 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -352,7 +352,7 @@ err:
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the tasklet, we still
+ * Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
@@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
@@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work)
* @work: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
- * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
void hfi1_do_send(struct rvt_qp *qp)
@@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
- * the send tasklet will be woken up later.
+ * the send engine will be woken up later.
*/
if (hfi1_verbs_send(qp, &ps))
return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc05b349..fd39bcaa062d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
}
/**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ struct sdma_vl_map *m;
+ u8 vl;
+
+ if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+ return -EINVAL;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ vl = m->engine_to_vl[sde->this_idx];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
* @selector: a spreading factor
@@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}
+struct sdma_rht_map_elem {
+ u32 mask;
+ u8 ctr;
+ struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+ unsigned long cpu_id;
+ struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+ struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+ .nelem_hint = NR_CPUS_HINT,
+ .head_offset = offsetof(struct sdma_rht_node, node),
+ .key_offset = offsetof(struct sdma_rht_node, cpu_id),
+ .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .max_size = NR_CPUS,
+ .min_size = 8,
+ .automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct sdma_rht_node *rht_node;
+ struct sdma_engine *sde = NULL;
+ const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ unsigned long cpu_id;
+
+ /*
+ * To ensure that always the same sdma engine(s) will be
+ * selected make sure the process is pinned to this CPU only.
+ */
+ if (cpumask_weight(current_mask) != 1)
+ goto out;
+
+ cpu_id = smp_processor_id();
+ rcu_read_lock();
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+ if (rht_node && rht_node->map[vl]) {
+ struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+ sde = map->sde[selector & map->mask];
+ }
+ rcu_read_unlock();
+
+ if (sde)
+ return sde;
+
+out:
+ return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+ int i;
+
+ for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+ map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+ struct sdma_engine *sde)
+{
+ unsigned int i, pow;
+
+ /* only need to check the first ctr entries for a match */
+ for (i = 0; i < map->ctr; i++) {
+ if (map->sde[i] == sde) {
+ memmove(&map->sde[i], &map->sde[i + 1],
+ (map->ctr - i - 1) * sizeof(map->sde[0]));
+ map->ctr--;
+ pow = roundup_pow_of_two(map->ctr ? : 1);
+ map->mask = pow - 1;
+ sdma_populate_sde_map(map);
+ break;
+ }
+ }
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ cpumask_var_t mask, new_mask;
+ unsigned long cpu;
+ int ret, vl, sz;
+
+ vl = sdma_engine_get_vl(sde);
+ if (unlikely(vl < 0))
+ return -EINVAL;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ ret = cpulist_parse(buf, mask);
+ if (ret)
+ goto out_free;
+
+ if (!cpumask_subset(mask, cpu_online_mask)) {
+ dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ sz = sizeof(struct sdma_rht_map_elem) +
+ (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+ mutex_lock(&process_to_sde_mutex);
+
+ for_each_cpu(cpu, mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Check if we have this already mapped */
+ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+ cpumask_set_cpu(cpu, new_mask);
+ continue;
+ }
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (!rht_node) {
+ rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+ if (!rht_node) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+ if (!rht_node->map[vl]) {
+ kfree(rht_node);
+ ret = -ENOMEM;
+ goto out;
+ }
+ rht_node->cpu_id = cpu;
+ rht_node->map[vl]->mask = 0;
+ rht_node->map[vl]->ctr = 1;
+ rht_node->map[vl]->sde[0] = sde;
+
+ ret = rhashtable_insert_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ if (ret) {
+ kfree(rht_node->map[vl]);
+ kfree(rht_node);
+ dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+ cpu);
+ goto out;
+ }
+
+ } else {
+ int ctr, pow;
+
+ /* Add new user mappings */
+ if (!rht_node->map[vl])
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+ if (!rht_node->map[vl]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl]->ctr++;
+ ctr = rht_node->map[vl]->ctr;
+ rht_node->map[vl]->sde[ctr - 1] = sde;
+ pow = roundup_pow_of_two(ctr);
+ rht_node->map[vl]->mask = pow - 1;
+
+ /* Populate the sde map table */
+ sdma_populate_sde_map(rht_node->map[vl]);
+ }
+ cpumask_set_cpu(cpu, new_mask);
+ }
+
+ /* Clean up old mappings */
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Don't cleanup sdes that are set in the new mask */
+ if (cpumask_test_cpu(cpu, mask))
+ continue;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (rht_node) {
+ bool empty = true;
+ int i;
+
+ /* Remove mappings for old sde */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ if (rht_node->map[i])
+ sdma_cleanup_sde_map(rht_node->map[i],
+ sde);
+
+ /* Free empty hash table entries */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i])
+ continue;
+
+ if (rht_node->map[i]->ctr) {
+ empty = false;
+ break;
+ }
+ }
+
+ if (empty) {
+ ret = rhashtable_remove_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ WARN_ON(ret);
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+ }
+ }
+ }
+
+ cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+ mutex_unlock(&process_to_sde_mutex);
+out_free:
+ free_cpumask_var(mask);
+ free_cpumask_var(new_mask);
+ return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ mutex_lock(&process_to_sde_mutex);
+ if (cpumask_empty(&sde->cpu_mask))
+ snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+ else
+ cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+ mutex_unlock(&process_to_sde_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+ struct sdma_rht_node *rht_node = ptr;
+ int i;
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+ struct hfi1_devdata *dd,
+ unsigned long cpuid)
+{
+ struct sdma_rht_node *rht_node;
+ int i, j;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ sdma_rht_params);
+ if (!rht_node)
+ return;
+
+ seq_printf(s, "cpu%3lu: ", cpuid);
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+ continue;
+
+ seq_printf(s, " vl%d: [", i);
+
+ for (j = 0; j < rht_node->map[i]->ctr; j++) {
+ if (!rht_node->map[i]->sde[j])
+ continue;
+
+ if (j > 0)
+ seq_puts(s, ",");
+
+ seq_printf(s, " sdma%2d",
+ rht_node->map[i]->sde[j]->this_idx);
+ }
+ seq_puts(s, " ]");
+ }
+
+ seq_puts(s, "\n");
+}
+
/*
* Free the indicated map struct
*/
@@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;
+
+ if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ goto bail;
+
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
@@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
+ rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -2086,6 +2439,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2100,18 +2458,18 @@ nodesc:
* side locking.
*
* Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list)
+ struct list_head *tx_list, u32 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
unsigned long flags;
u16 tail = INVALID_TAIL;
- int count = 0;
+ u32 submit_count = 0, flush_count = 0, total_count;
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
@@ -2127,33 +2485,34 @@ retry:
}
list_del_init(&tx->list);
tail = submit_tx(sde, tx);
- count++;
+ submit_count++;
if (tail != INVALID_TAIL &&
- (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+ (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
sdma_update_tail(sde, tail);
tail = INVALID_TAIL;
}
}
update_tail:
+ total_count = submit_count + flush_count;
if (wait)
- iowait_sdma_add(wait, count);
+ iowait_sdma_add(wait, total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret == 0 ? count : ret;
+ *count_out = total_count;
+ return ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
- if (wait)
- iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
list_add_tail(&tx->list, &sde->flushlist);
+ flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..56257ea3598f 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@ struct sdma_engine {
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
+ struct cpumask cpu_mask;
+ struct kobject kobj;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
struct sdma_txreq *tx);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
- struct list_head *tx_list);
+ struct list_head *tx_list,
+ u32 *count);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl(
u32 selector,
u8 vl);
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl);
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count);
+int sdma_engine_get_vl(struct sdma_engine *sde);
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
+void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
+ unsigned long cpuid);
#ifdef CONFIG_SDMA_VERBOSITY
void sdma_dumpstate(struct sdma_engine *);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c655f7e..edba22461a9c 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,95 @@ bail:
return ret;
}
+struct sde_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct sdma_engine *sde, char *buf);
+ ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
+};
+
+static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!sde_attr->show)
+ return -EINVAL;
+
+ return sde_attr->show(sde, buf);
+}
+
+static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!sde_attr->store)
+ return -EINVAL;
+
+ return sde_attr->store(sde, buf, count);
+}
+
+static const struct sysfs_ops sde_sysfs_ops = {
+ .show = sde_show,
+ .store = sde_store,
+};
+
+static struct kobj_type sde_ktype = {
+ .sysfs_ops = &sde_sysfs_ops,
+};
+
+#define SDE_ATTR(_name, _mode, _show, _store) \
+ struct sde_attribute sde_attr_##_name = \
+ __ATTR(_name, _mode, _show, _store)
+
+static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ return sdma_get_cpu_to_sde_map(sde, buf);
+}
+
+static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
+ const char *buf, size_t count)
+{
+ return sdma_set_cpu_to_sde_map(sde, buf, count);
+}
+
+static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
+{
+ int vl;
+
+ vl = sdma_engine_get_vl(sde);
+ if (vl < 0)
+ return vl;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+}
+
+static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+ sde_show_cpu_to_sde_map,
+ sde_store_cpu_to_sde_map);
+static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL);
+
+static struct sde_attribute *sde_attribs[] = {
+ &sde_attr_cpu_list,
+ &sde_attr_vl
+};
+
/*
* Register and create our files in /sys/class/infiniband.
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
+ struct device *class_dev = &dev->dev;
+ int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
goto bail;
}
+ for (i = 0; i < dd->num_sdma; i++) {
+ ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
+ &sde_ktype, &class_dev->kobj,
+ "sdma%d", i);
+ if (ret)
+ goto bail;
+
+ for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
+ ret = sysfs_create_file(&dd->per_sdma[i].kobj,
+ &sde_attribs[j]->attr);
+ if (ret)
+ goto bail;
+ }
+ }
+
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
+
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_del(&dd->per_sdma[i].kobj);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 4cfb13771897..01f525cd985a 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,9 +47,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
+u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u8 opcode;
u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %lld"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
+#define ATOMICACKETH_PRN "origdata %llx"
+#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
-static u64 ib_u64_get(__be32 *p)
-{
- return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
-}
-
static const char *parse_syndrome(u8 syndrome)
{
switch (syndrome >> 5) {
@@ -113,8 +108,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length),
be32_to_cpu(eh->rc.imm_data));
@@ -126,8 +120,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length));
break;
@@ -145,20 +138,16 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
- (unsigned long long)
- ib_u64_get(eh->at.atomic_ack_eth));
+ ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
- (unsigned long long)ib_u64_get(
- eh->atomic_eth.vaddr),
+ get_ib_ateth_vaddr(&eh->atomic_eth),
eh->atomic_eth.rkey,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.swap_data),
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.compare_data));
+ get_ib_ateth_swap(&eh->atomic_eth),
+ get_ib_ateth_compare(&eh->atomic_eth));
break;
/* deth */
case OP(UD, SEND_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 31654bbac1cf..26ae789e47cf 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, hw_free)
__field(void __iomem *, piobase)
__field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
+ __field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
+ __field(u64, rcvegr_dma)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
@@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
__entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
+ __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
),
TP_printk("[%s] ctxt %u " UCTXT_FMT,
__get_str(dev),
@@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
+ __entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_phys
+ __entry->rcvegr_dma
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index c3e41aed0034..382fcda3a5f6 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,7 +55,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+u8 ibhdr_exhdr_len(struct ib_header *hdr);
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -74,7 +74,7 @@ __print_symbolic(lrh, \
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
+ struct ib_header *hdr),
TP_ARGS(dd, hdr),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
/* LRH */
@@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 9ba1f615ec95..11e02b228922 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TRACE_EVENT(snoop_capture,
TP_PROTO(struct hfi1_devdata *dd,
int hdr_len,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
int data_len,
void *data),
TP_ARGS(dd, hdr_len, hdr, data_len, data),
@@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture,
__dynamic_array(u8, raw_pkt, data_len)
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
if (__entry->lnh == HFI1_LRH_BTH)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index a726d96d185f..5e6d1bac4914 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -50,14 +50,7 @@
#include "qp.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/* only opcode mask for adaptive pio */
-const u32 uc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+#define OP(x) UC_OP(x)
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
@@ -70,7 +63,7 @@ const u32 uc_only_opcode =
int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords = 5;
u32 bth0 = 0;
@@ -304,12 +297,12 @@ bail_no_tx:
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f01e8e1d62d3..97ae24b6314c 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -271,7 +271,7 @@ drop:
int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
@@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
/*
* opa_smp_check() - Do the regular pkey checking, and the additional
- * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
- * ("SMA Packet Checks").
+ * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
+ * 9.10.25 ("SMA Packet Checks").
*
* Note that:
* - Checks are done using the pkey directly from the packet's BTH,
@@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/*
* SMPs fall into one of four (disjoint) categories:
- * SMA request, SMA response, trap, or trap repress.
- * Our response depends, in part, on which type of
- * SMP we're processing.
+ * SMA request, SMA response, SMA trap, or SMA trap repress.
+ * Our response depends, in part, on which type of SMP we're
+ * processing.
*
- * If this is not an SMA request, or trap repress:
- * - accept MAD if the port is running an SM
- * - pkey == FULL_MGMT_P_KEY =>
- * reply with unsupported method (i.e., just mark
- * the smp's status field here, and let it be
- * processed normally)
- * - pkey != LIM_MGMT_P_KEY =>
- * increment port recv constraint errors, drop MAD
- * If this is an SMA request or trap repress:
+ * If this is an SMA response, skip the check here.
+ *
+ * If this is an SMA request or SMA trap repress:
* - pkey != FULL_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
+ *
+ * Otherwise:
+ * - accept if the port is running an SM
+ * - drop MAD if it's an SMA trap
+ * - pkey == FULL_MGMT_P_KEY =>
+ * reply with unsupported method
+ * - pkey != FULL_MGMT_P_KEY =>
+ * increment port recv constraint errors, drop MAD
*/
switch (smp->method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ break;
case IB_MGMT_METHOD_GET:
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_REPORT:
@@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
return 1;
}
break;
- case IB_MGMT_METHOD_SEND:
- case IB_MGMT_METHOD_TRAP:
- case IB_MGMT_METHOD_GET_RESP:
- case IB_MGMT_METHOD_REPORT_RESP:
+ default:
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return 0;
+ if (smp->method == IB_MGMT_METHOD_TRAP)
+ return 1;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
return 0;
}
- if (pkey != LIM_MGMT_P_KEY) {
- ingress_pkey_table_fail(ppd, pkey, slid);
- return 1;
- }
- break;
- default:
- break;
+ ingress_pkey_table_fail(ppd, pkey, slid);
+ return 1;
}
return 0;
}
@@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
@@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ u8 sc5 = hdr2sc(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0ecf27903dc2..a761f804111e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
+#define AHG_KDETH_INTR_SHIFT 12
+
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -546,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
u8 opcode, sc, vl;
int req_queued = 0;
u16 dlid;
- u8 selector;
+ u32 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -751,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dlid = be16_to_cpu(req->hdr.lrh[1]);
selector = dlid_to_selector(dlid);
+ selector += uctxt->ctxt + fd->subctxt;
+ req->sde = sdma_select_user_engine(dd, selector, vl);
- /* Have to select the engine */
- req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt +
- selector),
- vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
goto free_req;
@@ -892,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
{
- int ret = 0;
+ int ret = 0, count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -1088,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
- if (list_empty(&req->txps)) {
- req->seqsubmitted = req->seqnum;
- if (req->seqnum == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
- /*
- * The txreq has already been submitted to the HW queue
- * so we can free the AHG entry now. Corruption will not
- * happen due to the sequential manner in which
- * descriptors are processed.
- */
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
- sdma_ahg_free(req->sde, req->ahg_idx);
- }
- } else if (ret > 0) {
- req->seqsubmitted += ret;
- ret = 0;
+ ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ req->seqsubmitted += count;
+ if (req->seqsubmitted == req->info.npkts) {
+ set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ /*
+ * The txreq has already been submitted to the HW queue
+ * so we can free the AHG entry now. Corruption will not
+ * happen due to the sequential manner in which
+ * descriptors are processed.
+ */
+ if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1480,7 +1474,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* Clear KDETH.SH on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) >> 16);
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
} else {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2b359540901d..f2f6b5a78e0e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF;
module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-unsigned int hfi1_max_cqes = 0x2FFFF;
+unsigned int hfi1_max_cqes = 0x2FFFFF;
module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqes,
"Maximum number of completion queue entries to support");
@@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF;
module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-unsigned int hfi1_max_qps = 16384;
+unsigned int hfi1_max_qps = 32768;
module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
@@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
[IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
- [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
+ [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8,
[IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
@@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_CNP] = &hfi1_cnp_rcv
};
+#define OPMASK 0x1f
+
+static const u32 pio_opmask[BIT(3)] = {
+ /* RC */
+ [IB_OPCODE_RC >> 5] =
+ BIT(RC_OP(SEND_ONLY) & OPMASK) |
+ BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
+ BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
+ BIT(RC_OP(FETCH_ADD) & OPMASK),
+ /* UC */
+ [IB_OPCODE_UC >> 5] =
+ BIT(UC_OP(SEND_ONLY) & OPMASK) |
+ BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
+};
+
/*
* System image GUID.
*/
@@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -719,7 +741,7 @@ static void verbs_sdma_complete(
if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct hfi1_ib_header *hdr;
+ struct ib_header *hdr;
hdr = &tx->phdr.hdr;
hfi1_rc_send_complete(qp, hdr);
@@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
@@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ib_header *h = &tx->phdr.hdr;
+ struct ib_header *h = &tx->phdr.hdr;
if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
return dd->process_pio_send;
@@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_GSI:
case IB_QPT_UD:
break;
- case IB_QPT_RC:
- if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
- iowait_sdma_pending(&priv->s_iowait) == 0 &&
- !sdma_txreq_built(&tx->txreq))
- return dd->process_pio_send;
- break;
case IB_QPT_UC:
+ case IB_QPT_RC: {
+ u8 op = get_opcode(h);
+
if (piothreshold &&
qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+ (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
+ }
default:
break;
}
@@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
- struct hfi1_ib_header *hdr;
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr;
send_routine sr;
int ret;
u8 lnh;
@@ -1754,7 +1772,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
@@ -1781,7 +1799,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ sc5 = hdr2sc(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index d1b101c54828..1c3815d89eb7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -60,6 +60,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
#include <rdma/rdmavt_cq.h>
@@ -80,16 +81,6 @@ struct hfi1_packet;
*/
#define HFI1_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -104,80 +95,16 @@ struct hfi1_packet;
#define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK BIT(31)
-#define IB_BTH_SOLICITED BIT(23)
-#define IB_BTH_MIG_REQ BIT(22)
-
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+#define RC_OP(x) IB_OPCODE_RC_##x
+#define UC_OP(x) IB_OPCODE_UC_##x
+
/* flags passed by hfi1_ib_rcv() */
enum {
HFI1_HAS_GRH = (1 << 0),
};
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-union ib_ehdrs {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
-} __packed;
-
-struct hfi1_other_headers {
- __be32 bth[3];
- union ib_ehdrs u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct hfi1_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct hfi1_other_headers oth;
- } l;
- struct hfi1_other_headers oth;
- } u;
-} __packed;
-
struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
@@ -187,7 +114,7 @@ struct hfi1_ahg_info {
struct hfi1_sdma_header {
__le64 pbc;
- struct hfi1_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp);
@@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg);
void hfi1_del_timers_sync(struct rvt_qp *qp);
void hfi1_stop_rc_timers(struct rvt_qp *qp);
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
@@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
-static inline u8 get_opcode(struct hfi1_ib_header *h)
+static inline u8 get_opcode(struct ib_header *h)
{
u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
@@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index d8fb056526f8..094ab829ec42 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}