164 files changed, 11766 insertions, 63845 deletions
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index ba8765063174..f1f3ecadf0fb 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -22,12 +22,6 @@ menuconfig STAGING_RDMA
 # Please keep entries in alphabetic order
 if STAGING_RDMA
 
-source "drivers/staging/rdma/amso1100/Kconfig"
-
-source "drivers/staging/rdma/ehca/Kconfig"
-
 source "drivers/staging/rdma/hfi1/Kconfig"
 
-source "drivers/staging/rdma/ipath/Kconfig"
-
 endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index 139d78ef2c24..8c7fc1de48a7 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -1,5 +1,2 @@
 # Entries for RDMA_STAGING tree
-obj-$(CONFIG_INFINIBAND_AMSO1100)	+= amso1100/
-obj-$(CONFIG_INFINIBAND_EHCA)	+= ehca/
 obj-$(CONFIG_INFINIBAND_HFI1)	+= hfi1/
-obj-$(CONFIG_INFINIBAND_IPATH)	+= ipath/
diff --git a/drivers/staging/rdma/amso1100/Kbuild b/drivers/staging/rdma/amso1100/Kbuild
deleted file mode 100644
index 950dfabcd89d..000000000000
--- a/drivers/staging/rdma/amso1100/Kbuild
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
-
-iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
-	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/staging/rdma/amso1100/Kconfig b/drivers/staging/rdma/amso1100/Kconfig
deleted file mode 100644
index e6ce5f209e47..000000000000
--- a/drivers/staging/rdma/amso1100/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_AMSO1100
-	tristate "Ammasso 1100 HCA support"
-	depends on PCI && INET
-	---help---
-	  This is a low-level driver for the Ammasso 1100 host
-	  channel adapter (HCA).
-
-config INFINIBAND_AMSO1100_DEBUG
-	bool "Verbose debugging output"
-	depends on INFINIBAND_AMSO1100
-	default n
-	---help---
-	  This option causes the amso1100 driver to produce a bunch of
-	  debug messages.  Select this if you are developing the driver
-	  or trying to diagnose a problem.
diff --git a/drivers/staging/rdma/amso1100/TODO b/drivers/staging/rdma/amso1100/TODO
deleted file mode 100644
index 18b00a5cb549..000000000000
--- a/drivers/staging/rdma/amso1100/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-7/2015
-
-The amso1100 driver has been deprecated and moved to drivers/staging.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/amso1100/c2.c b/drivers/staging/rdma/amso1100/c2.c
deleted file mode 100644
index b46ebd1ae15a..000000000000
--- a/drivers/staging/rdma/amso1100/c2.c
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/prefetch.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_provider.h"
-
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
-    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-
-static int debug = -1;		/* defaults above */
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-static int c2_up(struct net_device *netdev);
-static int c2_down(struct net_device *netdev);
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
-static void c2_tx_interrupt(struct net_device *netdev);
-static void c2_rx_interrupt(struct net_device *netdev);
-static irqreturn_t c2_interrupt(int irq, void *dev_id);
-static void c2_tx_timeout(struct net_device *netdev);
-static int c2_change_mtu(struct net_device *netdev, int new_mtu);
-static void c2_reset(struct c2_port *c2_port);
-
-static struct pci_device_id c2_pci_table[] = {
-	{ PCI_DEVICE(0x18b8, 0xb001) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, c2_pci_table);
-
-static void c2_set_rxbufsize(struct c2_port *c2_port)
-{
-	struct net_device *netdev = c2_port->netdev;
-
-	if (netdev->mtu > RX_BUF_SIZE)
-		c2_port->rx_buf_size =
-		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
-		    NET_IP_ALIGN;
-	else
-		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
-}
-
-/*
- * Allocate TX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
-			    dma_addr_t base, void __iomem * mmio_txp_ring)
-{
-	struct c2_tx_desc *tx_desc;
-	struct c2_txp_desc __iomem *txp_desc;
-	struct c2_element *elem;
-	int i;
-
-	tx_ring->start = kmalloc_array(tx_ring->count, sizeof(*elem),
-				       GFP_KERNEL);
-	if (!tx_ring->start)
-		return -ENOMEM;
-
-	elem = tx_ring->start;
-	tx_desc = vaddr;
-	txp_desc = mmio_txp_ring;
-	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
-		tx_desc->len = 0;
-		tx_desc->status = 0;
-
-		/* Set TXP_HTXD_UNINIT */
-		__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-			     (void __iomem *) txp_desc + C2_TXP_ADDR);
-		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-		__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
-
-		elem->skb = NULL;
-		elem->ht_desc = tx_desc;
-		elem->hw_desc = txp_desc;
-
-		if (i == tx_ring->count - 1) {
-			elem->next = tx_ring->start;
-			tx_desc->next_offset = base;
-		} else {
-			elem->next = elem + 1;
-			tx_desc->next_offset =
-			    base + (i + 1) * sizeof(*tx_desc);
-		}
-	}
-
-	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
-
-	return 0;
-}
-
-/*
- * Allocate RX ring elements and chain them together.
- * One-to-one association of adapter descriptors with ring elements.
- */
-static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
-			    dma_addr_t base, void __iomem * mmio_rxp_ring)
-{
-	struct c2_rx_desc *rx_desc;
-	struct c2_rxp_desc __iomem *rxp_desc;
-	struct c2_element *elem;
-	int i;
-
-	rx_ring->start = kmalloc_array(rx_ring->count, sizeof(*elem),
-				       GFP_KERNEL);
-	if (!rx_ring->start)
-		return -ENOMEM;
-
-	elem = rx_ring->start;
-	rx_desc = vaddr;
-	rxp_desc = mmio_rxp_ring;
-	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
-		rx_desc->len = 0;
-		rx_desc->status = 0;
-
-		/* Set RXP_HRXD_UNINIT */
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
-		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
-		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
-		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
-
-		elem->skb = NULL;
-		elem->ht_desc = rx_desc;
-		elem->hw_desc = rxp_desc;
-
-		if (i == rx_ring->count - 1) {
-			elem->next = rx_ring->start;
-			rx_desc->next_offset = base;
-		} else {
-			elem->next = elem + 1;
-			rx_desc->next_offset =
-			    base + (i + 1) * sizeof(*rx_desc);
-		}
-	}
-
-	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
-
-	return 0;
-}
-
-/* Setup buffer for receiving */
-static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_rx_desc *rx_desc = elem->ht_desc;
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen;
-	struct c2_rxp_hdr *rxp_hdr;
-
-	skb = dev_alloc_skb(c2_port->rx_buf_size);
-	if (unlikely(!skb)) {
-		pr_debug("%s: out of memory for receive\n",
-			c2_port->netdev->name);
-		return -ENOMEM;
-	}
-
-	/* Zero out the rxp hdr in the sk_buff */
-	memset(skb->data, 0, sizeof(*rxp_hdr));
-
-	skb->dev = c2_port->netdev;
-
-	maplen = c2_port->rx_buf_size;
-	mapaddr =
-	    pci_map_single(c2dev->pcidev, skb->data, maplen,
-			   PCI_DMA_FROMDEVICE);
-
-	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
-	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-	rxp_hdr->flags = RXP_HRXD_READY;
-
-	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-	__raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
-		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-		     elem->hw_desc + C2_RXP_FLAGS);
-
-	elem->skb = skb;
-	elem->mapaddr = mapaddr;
-	elem->maplen = maplen;
-	rx_desc->len = maplen;
-
-	return 0;
-}
-
-/*
- * Allocate buffers for the Rx ring
- * For receive:  rx_ring.to_clean is next received frame
- */
-static int c2_rx_fill(struct c2_port *c2_port)
-{
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	int ret = 0;
-
-	elem = rx_ring->start;
-	do {
-		if (c2_rx_alloc(c2_port, elem)) {
-			ret = 1;
-			break;
-		}
-	} while ((elem = elem->next) != rx_ring->start);
-
-	rx_ring->to_clean = rx_ring->start;
-	return ret;
-}
-
-/* Free all buffers in RX ring, assumes receiver stopped */
-static void c2_rx_clean(struct c2_port *c2_port)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	struct c2_rx_desc *rx_desc;
-
-	elem = rx_ring->start;
-	do {
-		rx_desc = elem->ht_desc;
-		rx_desc->len = 0;
-
-		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
-			     elem->hw_desc + C2_RXP_ADDR);
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
-			     elem->hw_desc + C2_RXP_FLAGS);
-
-		if (elem->skb) {
-			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
-					 elem->maplen, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb(elem->skb);
-			elem->skb = NULL;
-		}
-	} while ((elem = elem->next) != rx_ring->start);
-}
-
-static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
-{
-	struct c2_tx_desc *tx_desc = elem->ht_desc;
-
-	tx_desc->len = 0;
-
-	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
-			 PCI_DMA_TODEVICE);
-
-	if (elem->skb) {
-		dev_kfree_skb_any(elem->skb);
-		elem->skb = NULL;
-	}
-
-	return 0;
-}
-
-/* Free all buffers in TX ring, assumes transmitter stopped */
-static void c2_tx_clean(struct c2_port *c2_port)
-{
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	struct c2_txp_desc txp_htxd;
-	int retry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-	elem = tx_ring->start;
-
-	do {
-		retry = 0;
-		do {
-			txp_htxd.flags =
-			    readw(elem->hw_desc + C2_TXP_FLAGS);
-
-			if (txp_htxd.flags == TXP_HTXD_READY) {
-				retry = 1;
-				__raw_writew(0,
-					     elem->hw_desc + C2_TXP_LEN);
-				__raw_writeq(0,
-					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
-					     elem->hw_desc + C2_TXP_FLAGS);
-				c2_port->netdev->stats.tx_dropped++;
-				break;
-			} else {
-				__raw_writew(0,
-					     elem->hw_desc + C2_TXP_LEN);
-				__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
-					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
-					     elem->hw_desc + C2_TXP_FLAGS);
-			}
-
-			c2_tx_free(c2_port->c2dev, elem);
-
-		} while ((elem = elem->next) != tx_ring->start);
-	} while (retry);
-
-	c2_port->tx_avail = c2_port->tx_ring.count - 1;
-	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
-
-	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-		netif_wake_queue(c2_port->netdev);
-
-	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-}
-
-/*
- * Process transmit descriptors marked 'DONE' by the firmware,
- * freeing up their unneeded sk_buffs.
- */
-static void c2_tx_interrupt(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	struct c2_txp_desc txp_htxd;
-
-	spin_lock(&c2_port->tx_lock);
-
-	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
-	     elem = elem->next) {
-		txp_htxd.flags =
-		    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
-
-		if (txp_htxd.flags != TXP_HTXD_DONE)
-			break;
-
-		if (netif_msg_tx_done(c2_port)) {
-			/* PCI reads are expensive in fast path */
-			txp_htxd.len =
-			    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
-			pr_debug("%s: tx done slot %3Zu status 0x%x len "
-				"%5u bytes\n",
-				netdev->name, elem - tx_ring->start,
-				txp_htxd.flags, txp_htxd.len);
-		}
-
-		c2_tx_free(c2dev, elem);
-		++(c2_port->tx_avail);
-	}
-
-	tx_ring->to_clean = elem;
-
-	if (netif_queue_stopped(netdev)
-	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
-		netif_wake_queue(netdev);
-
-	spin_unlock(&c2_port->tx_lock);
-}
-
-static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
-{
-	struct c2_rx_desc *rx_desc = elem->ht_desc;
-	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-
-	if (rxp_hdr->status != RXP_HRXD_OK ||
-	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
-		pr_debug("BAD RXP_HRXD\n");
-		pr_debug("  rx_desc : %p\n", rx_desc);
-		pr_debug("    index : %Zu\n",
-			elem - c2_port->rx_ring.start);
-		pr_debug("    len   : %u\n", rx_desc->len);
-		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
-			(void *) __pa((unsigned long) rxp_hdr));
-		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
-		pr_debug("    status: 0x%x\n", rxp_hdr->status);
-		pr_debug("    len   : %u\n", rxp_hdr->len);
-		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
-	}
-
-	/* Setup the skb for reuse since we're dropping this pkt */
-	elem->skb->data = elem->skb->head;
-	skb_reset_tail_pointer(elem->skb);
-
-	/* Zero out the rxp hdr in the sk_buff */
-	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
-
-	/* Write the descriptor to the adapter's rx ring */
-	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-	__raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
-		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
-		     elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-		     elem->hw_desc + C2_RXP_FLAGS);
-
-	pr_debug("packet dropped\n");
-	c2_port->netdev->stats.rx_dropped++;
-}
-
-static void c2_rx_interrupt(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *rx_ring = &c2_port->rx_ring;
-	struct c2_element *elem;
-	struct c2_rx_desc *rx_desc;
-	struct c2_rxp_hdr *rxp_hdr;
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen, buflen;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2dev->lock, flags);
-
-	/* Begin where we left off */
-	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
-
-	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
-	     elem = elem->next) {
-		rx_desc = elem->ht_desc;
-		mapaddr = elem->mapaddr;
-		maplen = elem->maplen;
-		skb = elem->skb;
-		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
-
-		if (rxp_hdr->flags != RXP_HRXD_DONE)
-			break;
-		buflen = rxp_hdr->len;
-
-		/* Sanity check the RXP header */
-		if (rxp_hdr->status != RXP_HRXD_OK ||
-		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
-			c2_rx_error(c2_port, elem);
-			continue;
-		}
-
-		/*
-		 * Allocate and map a new skb for replenishing the host
-		 * RX desc
-		 */
-		if (c2_rx_alloc(c2_port, elem)) {
-			c2_rx_error(c2_port, elem);
-			continue;
-		}
-
-		/* Unmap the old skb */
-		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
-				 PCI_DMA_FROMDEVICE);
-
-		prefetch(skb->data);
-
-		/*
-		 * Skip past the leading 8 bytes comprising of the
-		 * "struct c2_rxp_hdr", prepended by the adapter
-		 * to the usual Ethernet header ("struct ethhdr"),
-		 * to the start of the raw Ethernet packet.
-		 *
-		 * Fix up the various fields in the sk_buff before
-		 * passing it up to netif_rx(). The transfer size
-		 * (in bytes) specified by the adapter len field of
-		 * the "struct rxp_hdr_t" does NOT include the
-		 * "sizeof(struct c2_rxp_hdr)".
-		 */
-		skb->data += sizeof(*rxp_hdr);
-		skb_set_tail_pointer(skb, buflen);
-		skb->len = buflen;
-		skb->protocol = eth_type_trans(skb, netdev);
-
-		netif_rx(skb);
-
-		netdev->stats.rx_packets++;
-		netdev->stats.rx_bytes += buflen;
-	}
-
-	/* Save where we left off */
-	rx_ring->to_clean = elem;
-	c2dev->cur_rx = elem - rx_ring->start;
-	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-	spin_unlock_irqrestore(&c2dev->lock, flags);
-}
-
-/*
- * Handle netisr0 TX & RX interrupts.
- */
-static irqreturn_t c2_interrupt(int irq, void *dev_id)
-{
-	unsigned int netisr0, dmaisr;
-	int handled = 0;
-	struct c2_dev *c2dev = dev_id;
-
-	/* Process CCILNET interrupts */
-	netisr0 = readl(c2dev->regs + C2_NISR0);
-	if (netisr0) {
-
-		/*
-		 * There is an issue with the firmware that always
-		 * provides the status of RX for both TX & RX
-		 * interrupts.  So process both queues here.
-		 */
-		c2_rx_interrupt(c2dev->netdev);
-		c2_tx_interrupt(c2dev->netdev);
-
-		/* Clear the interrupt */
-		writel(netisr0, c2dev->regs + C2_NISR0);
-		handled++;
-	}
-
-	/* Process RNIC interrupts */
-	dmaisr = readl(c2dev->regs + C2_DISR);
-	if (dmaisr) {
-		writel(dmaisr, c2dev->regs + C2_DISR);
-		c2_rnic_interrupt(c2dev);
-		handled++;
-	}
-
-	if (handled) {
-		return IRQ_HANDLED;
-	} else {
-		return IRQ_NONE;
-	}
-}
-
-static int c2_up(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_element *elem;
-	struct c2_rxp_hdr *rxp_hdr;
-	struct in_device *in_dev;
-	size_t rx_size, tx_size;
-	int ret, i;
-	unsigned int netimr0;
-
-	if (netif_msg_ifup(c2_port))
-		pr_debug("%s: enabling interface\n", netdev->name);
-
-	/* Set the Rx buffer size based on MTU */
-	c2_set_rxbufsize(c2_port);
-
-	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
-	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
-	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
-
-	c2_port->mem_size = tx_size + rx_size;
-	c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
-					     &c2_port->dma);
-	if (c2_port->mem == NULL) {
-		pr_debug("Unable to allocate memory for "
-			"host descriptor rings\n");
-		return -ENOMEM;
-	}
-
-	/* Create the Rx host descriptor ring */
-	if ((ret =
-	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
-			      c2dev->mmio_rxp_ring))) {
-		pr_debug("Unable to create RX ring\n");
-		goto bail0;
-	}
-
-	/* Allocate Rx buffers for the host descriptor ring */
-	if (c2_rx_fill(c2_port)) {
-		pr_debug("Unable to fill RX ring\n");
-		goto bail1;
-	}
-
-	/* Create the Tx host descriptor ring */
-	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
-				    c2_port->dma + rx_size,
-				    c2dev->mmio_txp_ring))) {
-		pr_debug("Unable to create TX ring\n");
-		goto bail1;
-	}
-
-	/* Set the TX pointer to where we left off */
-	c2_port->tx_avail = c2_port->tx_ring.count - 1;
-	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
-	    c2_port->tx_ring.start + c2dev->cur_tx;
-
-	/* missing: Initialize MAC */
-
-	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
-
-	/* Reset the adapter, ensures the driver is in sync with the RXP */
-	c2_reset(c2_port);
-
-	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
-	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
-	     i++, elem++) {
-		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
-		rxp_hdr->flags = 0;
-		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
-			     elem->hw_desc + C2_RXP_FLAGS);
-	}
-
-	/* Enable network packets */
-	netif_start_queue(netdev);
-
-	/* Enable IRQ */
-	writel(0, c2dev->regs + C2_IDIS);
-	netimr0 = readl(c2dev->regs + C2_NIMR0);
-	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
-	writel(netimr0, c2dev->regs + C2_NIMR0);
-
-	/* Tell the stack to ignore arp requests for ipaddrs bound to
-	 * other interfaces.  This is needed to prevent the host stack
-	 * from responding to arp requests to the ipaddr bound on the
-	 * rdma interface.
-	 */
-	in_dev = in_dev_get(netdev);
-	IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
-	in_dev_put(in_dev);
-
-	return 0;
-
-bail1:
-	c2_rx_clean(c2_port);
-	kfree(c2_port->rx_ring.start);
-
-bail0:
-	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-			    c2_port->dma);
-
-	return ret;
-}
-
-static int c2_down(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-
-	if (netif_msg_ifdown(c2_port))
-		pr_debug("%s: disabling interface\n",
-			netdev->name);
-
-	/* Wait for all the queued packets to get sent */
-	c2_tx_interrupt(netdev);
-
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-
-	/* Disable IRQs by clearing the interrupt mask */
-	writel(1, c2dev->regs + C2_IDIS);
-	writel(0, c2dev->regs + C2_NIMR0);
-
-	/* missing: Stop transmitter */
-
-	/* missing: Stop receiver */
-
-	/* Reset the adapter, ensures the driver is in sync with the RXP */
-	c2_reset(c2_port);
-
-	/* missing: Turn off LEDs here */
-
-	/* Free all buffers in the host descriptor rings */
-	c2_tx_clean(c2_port);
-	c2_rx_clean(c2_port);
-
-	/* Free the host descriptor rings */
-	kfree(c2_port->rx_ring.start);
-	kfree(c2_port->tx_ring.start);
-	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
-			    c2_port->dma);
-
-	return 0;
-}
-
-static void c2_reset(struct c2_port *c2_port)
-{
-	struct c2_dev *c2dev = c2_port->c2dev;
-	unsigned int cur_rx = c2dev->cur_rx;
-
-	/* Tell the hardware to quiesce */
-	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
-
-	/*
-	 * The hardware will reset the C2_PCI_HRX_QUI bit once
-	 * the RXP is quiesced.  Wait 2 seconds for this.
-	 */
-	ssleep(2);
-
-	cur_rx = C2_GET_CUR_RX(c2dev);
-
-	if (cur_rx & C2_PCI_HRX_QUI)
-		pr_debug("c2_reset: failed to quiesce the hardware!\n");
-
-	cur_rx &= ~C2_PCI_HRX_QUI;
-
-	c2dev->cur_rx = cur_rx;
-
-	pr_debug("Current RX: %u\n", c2dev->cur_rx);
-}
-
-static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-	struct c2_dev *c2dev = c2_port->c2dev;
-	struct c2_ring *tx_ring = &c2_port->tx_ring;
-	struct c2_element *elem;
-	dma_addr_t mapaddr;
-	u32 maplen;
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&c2_port->tx_lock, flags);
-
-	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
-		netif_stop_queue(netdev);
-		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-		pr_debug("%s: Tx ring full when queue awake!\n",
-			netdev->name);
-		return NETDEV_TX_BUSY;
-	}
-
-	maplen = skb_headlen(skb);
-	mapaddr =
-	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
-
-	elem = tx_ring->to_use;
-	elem->skb = skb;
-	elem->mapaddr = mapaddr;
-	elem->maplen = maplen;
-
-	/* Tell HW to xmit */
-	__raw_writeq((__force u64) cpu_to_be64(mapaddr),
-		     elem->hw_desc + C2_TXP_ADDR);
-	__raw_writew((__force u16) cpu_to_be16(maplen),
-		     elem->hw_desc + C2_TXP_LEN);
-	__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-		     elem->hw_desc + C2_TXP_FLAGS);
-
-	netdev->stats.tx_packets++;
-	netdev->stats.tx_bytes += maplen;
-
-	/* Loop thru additional data fragments and queue them */
-	if (skb_shinfo(skb)->nr_frags) {
-		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-			maplen = skb_frag_size(frag);
-			mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
-						   0, maplen, DMA_TO_DEVICE);
-			elem = elem->next;
-			elem->skb = NULL;
-			elem->mapaddr = mapaddr;
-			elem->maplen = maplen;
-
-			/* Tell HW to xmit */
-			__raw_writeq((__force u64) cpu_to_be64(mapaddr),
-				     elem->hw_desc + C2_TXP_ADDR);
-			__raw_writew((__force u16) cpu_to_be16(maplen),
-				     elem->hw_desc + C2_TXP_LEN);
-			__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
-				     elem->hw_desc + C2_TXP_FLAGS);
-
-			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += maplen;
-		}
-	}
-
-	tx_ring->to_use = elem->next;
-	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
-
-	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
-		netif_stop_queue(netdev);
-		if (netif_msg_tx_queued(c2_port))
-			pr_debug("%s: transmit queue full\n",
-				netdev->name);
-	}
-
-	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
-
-	netdev->trans_start = jiffies;
-
-	return NETDEV_TX_OK;
-}
-
-static void c2_tx_timeout(struct net_device *netdev)
-{
-	struct c2_port *c2_port = netdev_priv(netdev);
-
-	if (netif_msg_timer(c2_port))
-		pr_debug("%s: tx timeout\n", netdev->name);
-
-	c2_tx_clean(c2_port);
-}
-
-static int c2_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	int ret = 0;
-
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-
-	if (netif_running(netdev)) {
-		c2_down(netdev);
-
-		c2_up(netdev);
-	}
-
-	return ret;
-}
-
-static const struct net_device_ops c2_netdev = {
-	.ndo_open 		= c2_up,
-	.ndo_stop 		= c2_down,
-	.ndo_start_xmit		= c2_xmit_frame,
-	.ndo_tx_timeout		= c2_tx_timeout,
-	.ndo_change_mtu		= c2_change_mtu,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-/* Initialize network device */
-static struct net_device *c2_devinit(struct c2_dev *c2dev,
-				     void __iomem * mmio_addr)
-{
-	struct c2_port *c2_port = NULL;
-	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
-
-	if (!netdev) {
-		pr_debug("c2_port etherdev alloc failed");
-		return NULL;
-	}
-
-	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-	netdev->netdev_ops = &c2_netdev;
-	netdev->watchdog_timeo = C2_TX_TIMEOUT;
-	netdev->irq = c2dev->pcidev->irq;
-
-	c2_port = netdev_priv(netdev);
-	c2_port->netdev = netdev;
-	c2_port->c2dev = c2dev;
-	c2_port->msg_enable = netif_msg_init(debug, default_msg);
-	c2_port->tx_ring.count = C2_NUM_TX_DESC;
-	c2_port->rx_ring.count = C2_NUM_RX_DESC;
-
-	spin_lock_init(&c2_port->tx_lock);
-
-	/* Copy our 48-bit ethernet hardware address */
-	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
-
-	/* Validate the MAC address */
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-		pr_debug("Invalid MAC Address\n");
-		pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name,
-			 netdev->dev_addr, netdev->irq);
-		free_netdev(netdev);
-		return NULL;
-	}
-
-	c2dev->netdev = netdev;
-
-	return netdev;
-}
-
-static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
-	int ret = 0, i;
-	unsigned long reg0_start, reg0_flags, reg0_len;
-	unsigned long reg2_start, reg2_flags, reg2_len;
-	unsigned long reg4_start, reg4_flags, reg4_len;
-	unsigned kva_map_size;
-	struct net_device *netdev = NULL;
-	struct c2_dev *c2dev = NULL;
-	void __iomem *mmio_regs = NULL;
-
-	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
-		DRV_VERSION);
-
-	/* Enable PCI device */
-	ret = pci_enable_device(pcidev);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
-			pci_name(pcidev));
-		goto bail0;
-	}
-
-	reg0_start = pci_resource_start(pcidev, BAR_0);
-	reg0_len = pci_resource_len(pcidev, BAR_0);
-	reg0_flags = pci_resource_flags(pcidev, BAR_0);
-
-	reg2_start = pci_resource_start(pcidev, BAR_2);
-	reg2_len = pci_resource_len(pcidev, BAR_2);
-	reg2_flags = pci_resource_flags(pcidev, BAR_2);
-
-	reg4_start = pci_resource_start(pcidev, BAR_4);
-	reg4_len = pci_resource_len(pcidev, BAR_4);
-	reg4_flags = pci_resource_flags(pcidev, BAR_4);
-
-	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
-	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
-	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
-
-	/* Make sure PCI base addr are MMIO */
-	if (!(reg0_flags & IORESOURCE_MEM) ||
-	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
-		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
-		ret = -ENODEV;
-		goto bail1;
-	}
-
-	/* Check for weird/broken PCI region reporting */
-	if ((reg0_len < C2_REG0_SIZE) ||
-	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
-		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
-		ret = -ENODEV;
-		goto bail1;
-	}
-
-	/* Reserve PCI I/O and memory resources */
-	ret = pci_request_regions(pcidev, DRV_NAME);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: Unable to request regions\n",
-			pci_name(pcidev));
-		goto bail1;
-	}
-
-	if ((sizeof(dma_addr_t) > 4)) {
-		ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
-		if (ret < 0) {
-			printk(KERN_ERR PFX "64b DMA configuration failed\n");
-			goto bail2;
-		}
-	} else {
-		ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
-		if (ret < 0) {
-			printk(KERN_ERR PFX "32b DMA configuration failed\n");
-			goto bail2;
-		}
-	}
-
-	/* Enables bus-mastering on the device */
-	pci_set_master(pcidev);
-
-	/* Remap the adapter PCI registers in BAR4 */
-	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-				    sizeof(struct c2_adapter_pci_regs));
-	if (!mmio_regs) {
-		printk(KERN_ERR PFX
-			"Unable to remap adapter PCI registers in BAR4\n");
-		ret = -EIO;
-		goto bail2;
-	}
-
-	/* Validate PCI regs magic */
-	for (i = 0; i < sizeof(c2_magic); i++) {
-		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
-			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
-				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
-			       "utility to update your boot loader\n",
-				i + 1, sizeof(c2_magic),
-				readb(mmio_regs + C2_REGS_MAGIC + i),
-				c2_magic[i]);
-			printk(KERN_ERR PFX "Adapter not claimed\n");
-			iounmap(mmio_regs);
-			ret = -EIO;
-			goto bail2;
-		}
-	}
-
-	/* Validate the adapter version */
-	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
-		printk(KERN_ERR PFX "Version mismatch "
-			"[fw=%u, c2=%u], Adapter not claimed\n",
-			be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
-			C2_VERSION);
-		ret = -EINVAL;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	/* Validate the adapter IVN */
-	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
-		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
-		       "the OpenIB device support kit. "
-		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
-		       C2_IVN);
-		ret = -EINVAL;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	/* Allocate hardware structure */
-	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
-	if (!c2dev) {
-		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
-			pci_name(pcidev));
-		ret = -ENOMEM;
-		iounmap(mmio_regs);
-		goto bail2;
-	}
-
-	memset(c2dev, 0, sizeof(*c2dev));
-	spin_lock_init(&c2dev->lock);
-	c2dev->pcidev = pcidev;
-	c2dev->cur_tx = 0;
-
-	/* Get the last RX index */
-	c2dev->cur_rx =
-	    (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
-	     0xffffc000) / sizeof(struct c2_rxp_desc);
-
-	/* Request an interrupt line for the driver */
-	ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
-			pci_name(pcidev), pcidev->irq);
-		iounmap(mmio_regs);
-		goto bail3;
-	}
-
-	/* Set driver specific data */
-	pci_set_drvdata(pcidev, c2dev);
-
-	/* Initialize network device */
-	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
-		ret = -ENOMEM;
-		iounmap(mmio_regs);
-		goto bail4;
-	}
-
-	/* Save off the actual size prior to unmapping mmio_regs */
-	kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
-
-	/* Unmap the adapter PCI registers in BAR4 */
-	iounmap(mmio_regs);
-
-	/* Register network device */
-	ret = register_netdev(netdev);
-	if (ret) {
-		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
-			ret);
-		goto bail5;
-	}
-
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-
-	/* Remap the adapter HRXDQ PA space to kernel VA space */
-	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
-					       C2_RXP_HRXDQ_SIZE);
-	if (!c2dev->mmio_rxp_ring) {
-		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
-		ret = -EIO;
-		goto bail6;
-	}
-
-	/* Remap the adapter HTXDQ PA space to kernel VA space */
-	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
-					       C2_TXP_HTXDQ_SIZE);
-	if (!c2dev->mmio_txp_ring) {
-		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
-		ret = -EIO;
-		goto bail7;
-	}
-
-	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
-	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
-
-	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
-	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-	if (!c2dev->regs) {
-		printk(KERN_ERR PFX "Unable to remap BAR0\n");
-		ret = -EIO;
-		goto bail8;
-	}
-
-	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
-	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
-	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
-				     kva_map_size);
-	if (!c2dev->kva) {
-		printk(KERN_ERR PFX "Unable to remap BAR4\n");
-		ret = -EIO;
-		goto bail9;
-	}
-
-	/* Print out the MAC address */
-	pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr,
-		 netdev->irq);
-
-	ret = c2_rnic_init(c2dev);
-	if (ret) {
-		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
-		goto bail10;
-	}
-
-	ret = c2_register_device(c2dev);
-	if (ret)
-		goto bail10;
-
-	return 0;
-
- bail10:
-	iounmap(c2dev->kva);
-
- bail9:
-	iounmap(c2dev->regs);
-
- bail8:
-	iounmap(c2dev->mmio_txp_ring);
-
- bail7:
-	iounmap(c2dev->mmio_rxp_ring);
-
- bail6:
-	unregister_netdev(netdev);
-
- bail5:
-	free_netdev(netdev);
-
- bail4:
-	free_irq(pcidev->irq, c2dev);
-
- bail3:
-	ib_dealloc_device(&c2dev->ibdev);
-
- bail2:
-	pci_release_regions(pcidev);
-
- bail1:
-	pci_disable_device(pcidev);
-
- bail0:
-	return ret;
-}
-
-static void c2_remove(struct pci_dev *pcidev)
-{
-	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
-	struct net_device *netdev = c2dev->netdev;
-
-	/* Unregister with OpenIB */
-	c2_unregister_device(c2dev);
-
-	/* Clean up the RNIC resources */
-	c2_rnic_term(c2dev);
-
-	/* Remove network device from the kernel */
-	unregister_netdev(netdev);
-
-	/* Free network device */
-	free_netdev(netdev);
-
-	/* Free the interrupt line */
-	free_irq(pcidev->irq, c2dev);
-
-	/* missing: Turn LEDs off here */
-
-	/* Unmap adapter PA space */
-	iounmap(c2dev->kva);
-	iounmap(c2dev->regs);
-	iounmap(c2dev->mmio_txp_ring);
-	iounmap(c2dev->mmio_rxp_ring);
-
-	/* Free the hardware structure */
-	ib_dealloc_device(&c2dev->ibdev);
-
-	/* Release reserved PCI I/O and memory resources */
-	pci_release_regions(pcidev);
-
-	/* Disable PCI device */
-	pci_disable_device(pcidev);
-
-	/* Clear driver specific data */
-	pci_set_drvdata(pcidev, NULL);
-}
-
-static struct pci_driver c2_pci_driver = {
-	.name = DRV_NAME,
-	.id_table = c2_pci_table,
-	.probe = c2_probe,
-	.remove = c2_remove,
-};
-
-module_pci_driver(c2_pci_driver);
diff --git a/drivers/staging/rdma/amso1100/c2.h b/drivers/staging/rdma/amso1100/c2.h
deleted file mode 100644
index 21b565a91fd6..000000000000
--- a/drivers/staging/rdma/amso1100/c2.h
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __C2_H
-#define __C2_H
-
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/idr.h>
-
-#include "c2_provider.h"
-#include "c2_mq.h"
-#include "c2_status.h"
-
-#define DRV_NAME     "c2"
-#define DRV_VERSION  "1.1"
-#define PFX          DRV_NAME ": "
-
-#define BAR_0                0
-#define BAR_2                2
-#define BAR_4                4
-
-#define RX_BUF_SIZE         (1536 + 8)
-#define ETH_JUMBO_MTU        9000
-#define C2_MAGIC            "CEPHEUS"
-#define C2_VERSION           4
-#define C2_IVN              (18 & 0x7fffffff)
-
-#define C2_REG0_SIZE        (16 * 1024)
-#define C2_REG2_SIZE        (2 * 1024 * 1024)
-#define C2_REG4_SIZE        (256 * 1024 * 1024)
-#define C2_NUM_TX_DESC       341
-#define C2_NUM_RX_DESC       256
-#define C2_PCI_REGS_OFFSET  (0x10000)
-#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
-#define C2_RXP_HRXDQ_SIZE   (4096)
-#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
-#define C2_TXP_HTXDQ_SIZE   (4096)
-#define C2_TX_TIMEOUT	    (6*HZ)
-
-/* CEPHEUS */
-static const u8 c2_magic[] = {
-	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
-};
-
-enum adapter_pci_regs {
-	C2_REGS_MAGIC = 0x0000,
-	C2_REGS_VERS = 0x0008,
-	C2_REGS_IVN = 0x000C,
-	C2_REGS_PCI_WINSIZE = 0x0010,
-	C2_REGS_Q0_QSIZE = 0x0014,
-	C2_REGS_Q0_MSGSIZE = 0x0018,
-	C2_REGS_Q0_POOLSTART = 0x001C,
-	C2_REGS_Q0_SHARED = 0x0020,
-	C2_REGS_Q1_QSIZE = 0x0024,
-	C2_REGS_Q1_MSGSIZE = 0x0028,
-	C2_REGS_Q1_SHARED = 0x0030,
-	C2_REGS_Q2_QSIZE = 0x0034,
-	C2_REGS_Q2_MSGSIZE = 0x0038,
-	C2_REGS_Q2_SHARED = 0x0040,
-	C2_REGS_ENADDR = 0x004C,
-	C2_REGS_RDMA_ENADDR = 0x0054,
-	C2_REGS_HRX_CUR = 0x006C,
-};
-
-struct c2_adapter_pci_regs {
-	char reg_magic[8];
-	u32 version;
-	u32 ivn;
-	u32 pci_window_size;
-	u32 q0_q_size;
-	u32 q0_msg_size;
-	u32 q0_pool_start;
-	u32 q0_shared;
-	u32 q1_q_size;
-	u32 q1_msg_size;
-	u32 q1_pool_start;
-	u32 q1_shared;
-	u32 q2_q_size;
-	u32 q2_msg_size;
-	u32 q2_pool_start;
-	u32 q2_shared;
-	u32 log_start;
-	u32 log_size;
-	u8 host_enaddr[8];
-	u8 rdma_enaddr[8];
-	u32 crash_entry;
-	u32 crash_ready[2];
-	u32 fw_txd_cur;
-	u32 fw_hrxd_cur;
-	u32 fw_rxd_cur;
-};
-
-enum pci_regs {
-	C2_HISR = 0x0000,
-	C2_DISR = 0x0004,
-	C2_HIMR = 0x0008,
-	C2_DIMR = 0x000C,
-	C2_NISR0 = 0x0010,
-	C2_NISR1 = 0x0014,
-	C2_NIMR0 = 0x0018,
-	C2_NIMR1 = 0x001C,
-	C2_IDIS = 0x0020,
-};
-
-enum {
-	C2_PCI_HRX_INT = 1 << 8,
-	C2_PCI_HTX_INT = 1 << 17,
-	C2_PCI_HRX_QUI = 1 << 31,
-};
-
-/*
- * Cepheus registers in BAR0.
- */
-struct c2_pci_regs {
-	u32 hostisr;
-	u32 dmaisr;
-	u32 hostimr;
-	u32 dmaimr;
-	u32 netisr0;
-	u32 netisr1;
-	u32 netimr0;
-	u32 netimr1;
-	u32 int_disable;
-};
-
-/* TXP flags */
-enum c2_txp_flags {
-	TXP_HTXD_DONE = 0,
-	TXP_HTXD_READY = 1 << 0,
-	TXP_HTXD_UNINIT = 1 << 1,
-};
-
-/* RXP flags */
-enum c2_rxp_flags {
-	RXP_HRXD_UNINIT = 0,
-	RXP_HRXD_READY = 1 << 0,
-	RXP_HRXD_DONE = 1 << 1,
-};
-
-/* RXP status */
-enum c2_rxp_status {
-	RXP_HRXD_ZERO = 0,
-	RXP_HRXD_OK = 1 << 0,
-	RXP_HRXD_BUF_OV = 1 << 1,
-};
-
-/* TXP descriptor fields */
-enum txp_desc {
-	C2_TXP_FLAGS = 0x0000,
-	C2_TXP_LEN = 0x0002,
-	C2_TXP_ADDR = 0x0004,
-};
-
-/* RXP descriptor fields */
-enum rxp_desc {
-	C2_RXP_FLAGS = 0x0000,
-	C2_RXP_STATUS = 0x0002,
-	C2_RXP_COUNT = 0x0004,
-	C2_RXP_LEN = 0x0006,
-	C2_RXP_ADDR = 0x0008,
-};
-
-struct c2_txp_desc {
-	u16 flags;
-	u16 len;
-	u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_desc {
-	u16 flags;
-	u16 status;
-	u16 count;
-	u16 len;
-	u64 addr;
-} __attribute__ ((packed));
-
-struct c2_rxp_hdr {
-	u16 flags;
-	u16 status;
-	u16 len;
-	u16 rsvd;
-} __attribute__ ((packed));
-
-struct c2_tx_desc {
-	u32 len;
-	u32 status;
-	dma_addr_t next_offset;
-};
-
-struct c2_rx_desc {
-	u32 len;
-	u32 status;
-	dma_addr_t next_offset;
-};
-
-struct c2_alloc {
-	u32 last;
-	u32 max;
-	spinlock_t lock;
-	unsigned long *table;
-};
-
-struct c2_array {
-	struct {
-		void **page;
-		int used;
-	} *page_list;
-};
-
-/*
- * The MQ shared pointer pool is organized as a linked list of
- * chunks. Each chunk contains a linked list of free shared pointers
- * that can be allocated to a given user mode client.
- *
- */
-struct sp_chunk {
-	struct sp_chunk *next;
-	dma_addr_t dma_addr;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	u16 head;
-	u16 shared_ptr[0];
-};
-
-struct c2_pd_table {
-	u32 last;
-	u32 max;
-	spinlock_t lock;
-	unsigned long *table;
-};
-
-struct c2_qp_table {
-	struct idr idr;
-	spinlock_t lock;
-};
-
-struct c2_element {
-	struct c2_element *next;
-	void *ht_desc;		/* host     descriptor */
-	void __iomem *hw_desc;	/* hardware descriptor */
-	struct sk_buff *skb;
-	dma_addr_t mapaddr;
-	u32 maplen;
-};
-
-struct c2_ring {
-	struct c2_element *to_clean;
-	struct c2_element *to_use;
-	struct c2_element *start;
-	unsigned long count;
-};
-
-struct c2_dev {
-	struct ib_device ibdev;
-	void __iomem *regs;
-	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
-	void __iomem *mmio_rxp_ring;
-	spinlock_t lock;
-	struct pci_dev *pcidev;
-	struct net_device *netdev;
-	struct net_device *pseudo_netdev;
-	unsigned int cur_tx;
-	unsigned int cur_rx;
-	u32 adapter_handle;
-	int device_cap_flags;
-	void __iomem *kva;	/* KVA device memory */
-	unsigned long pa;	/* PA device memory */
-	void **qptr_array;
-
-	struct kmem_cache *host_msg_cache;
-
-	struct list_head cca_link;		/* adapter list */
-	struct list_head eh_wakeup_list;	/* event wakeup list */
-	wait_queue_head_t req_vq_wo;
-
-	/* Cached RNIC properties */
-	struct ib_device_attr props;
-
-	struct c2_pd_table pd_table;
-	struct c2_qp_table qp_table;
-	int ports;		/* num of GigE ports */
-	int devnum;
-	spinlock_t vqlock;	/* sync vbs req MQ */
-
-	/* Verbs Queues */
-	struct c2_mq req_vq;	/* Verbs Request MQ */
-	struct c2_mq rep_vq;	/* Verbs Reply MQ */
-	struct c2_mq aeq;	/* Async Events MQ */
-
-	/* Kernel client MQs */
-	struct sp_chunk *kern_mqsp_pool;
-
-	/* Device updates these values when posting messages to a host
-	 * target queue */
-	u16 req_vq_shared;
-	u16 rep_vq_shared;
-	u16 aeq_shared;
-	u16 irq_claimed;
-
-	/*
-	 * Shared host target pages for user-accessible MQs.
-	 */
-	int hthead;		/* index of first free entry */
-	void *htpages;		/* kernel vaddr */
-	int htlen;		/* length of htpages memory */
-	void *htuva;		/* user mapped vaddr */
-	spinlock_t htlock;	/* serialize allocation */
-
-	u64 adapter_hint_uva;	/* access to the activity FIFO */
-
-	//	spinlock_t aeq_lock;
-	//	spinlock_t rnic_lock;
-
-	__be16 *hint_count;
-	dma_addr_t hint_count_dma;
-	u16 hints_read;
-
-	int init;		/* TRUE if it's ready */
-	char ae_cache_name[16];
-	char vq_cache_name[16];
-};
-
-struct c2_port {
-	u32 msg_enable;
-	struct c2_dev *c2dev;
-	struct net_device *netdev;
-
-	spinlock_t tx_lock;
-	u32 tx_avail;
-	struct c2_ring tx_ring;
-	struct c2_ring rx_ring;
-
-	void *mem;		/* PCI memory for host rings */
-	dma_addr_t dma;
-	unsigned long mem_size;
-
-	u32 rx_buf_size;
-};
-
-/*
- * Activity FIFO registers in BAR0.
- */
-#define PCI_BAR0_HOST_HINT	0x100
-#define PCI_BAR0_ADAPTER_HINT	0x2000
-
-/*
- * Ammasso PCI vendor id and Cepheus PCI device id.
- */
-#define CQ_ARMED 	0x01
-#define CQ_WAIT_FOR_DMA	0x80
-
-/*
- * The format of a hint is as follows:
- * Lower 16 bits are the count of hints for the queue.
- * Next 15 bits are the qp_index
- * Upper most bit depends on who reads it:
- *    If read by producer, then it means Full (1) or Not-Full (0)
- *    If read by consumer, then it means Empty (1) or Not-Empty (0)
- */
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-
-/*
- * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
- * struct.
- */
-#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
-
-#ifndef readq
-static inline u64 readq(const void __iomem * addr)
-{
-	u64 ret = readl(addr + 4);
-	ret <<= 32;
-	ret |= readl(addr);
-
-	return ret;
-}
-#endif
-
-#ifndef writeq
-static inline void __raw_writeq(u64 val, void __iomem * addr)
-{
-	__raw_writel((u32) (val), addr);
-	__raw_writel((u32) (val >> 32), (addr + 4));
-}
-#endif
-
-#define C2_SET_CUR_RX(c2dev, cur_rx) \
-	__raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
-
-#define C2_GET_CUR_RX(c2dev) \
-	be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
-
-static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct c2_dev, ibdev);
-}
-
-static inline int c2_errno(void *reply)
-{
-	switch (c2_wr_get_result(reply)) {
-	case C2_OK:
-		return 0;
-	case CCERR_NO_BUFS:
-	case CCERR_INSUFFICIENT_RESOURCES:
-	case CCERR_ZERO_RDMA_READ_RESOURCES:
-		return -ENOMEM;
-	case CCERR_MR_IN_USE:
-	case CCERR_QP_IN_USE:
-		return -EBUSY;
-	case CCERR_ADDR_IN_USE:
-		return -EADDRINUSE;
-	case CCERR_ADDR_NOT_AVAIL:
-		return -EADDRNOTAVAIL;
-	case CCERR_CONN_RESET:
-		return -ECONNRESET;
-	case CCERR_NOT_IMPLEMENTED:
-	case CCERR_INVALID_WQE:
-		return -ENOSYS;
-	case CCERR_QP_NOT_PRIVILEGED:
-		return -EPERM;
-	case CCERR_STACK_ERROR:
-		return -EPROTO;
-	case CCERR_ACCESS_VIOLATION:
-	case CCERR_BASE_AND_BOUNDS_VIOLATION:
-		return -EFAULT;
-	case CCERR_STAG_STATE_NOT_INVALID:
-	case CCERR_INVALID_ADDRESS:
-	case CCERR_INVALID_CQ:
-	case CCERR_INVALID_EP:
-	case CCERR_INVALID_MODIFIER:
-	case CCERR_INVALID_MTU:
-	case CCERR_INVALID_PD_ID:
-	case CCERR_INVALID_QP:
-	case CCERR_INVALID_RNIC:
-	case CCERR_INVALID_STAG:
-		return -EINVAL;
-	default:
-		return -EAGAIN;
-	}
-}
-
-/* Device */
-int c2_register_device(struct c2_dev *c2dev);
-void c2_unregister_device(struct c2_dev *c2dev);
-int c2_rnic_init(struct c2_dev *c2dev);
-void c2_rnic_term(struct c2_dev *c2dev);
-void c2_rnic_interrupt(struct c2_dev *c2dev);
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
-
-/* QPs */
-int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
-		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-			struct ib_qp_attr *attr, int attr_mask);
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-				 int ord, int ird);
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-			struct ib_send_wr **bad_wr);
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-			   struct ib_recv_wr **bad_wr);
-void c2_init_qp_table(struct c2_dev *c2dev);
-void c2_cleanup_qp_table(struct c2_dev *c2dev);
-void c2_set_qp_state(struct c2_qp *, int);
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
-
-/* PDs */
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-int c2_init_pd_table(struct c2_dev *c2dev);
-void c2_cleanup_pd_table(struct c2_dev *c2dev);
-
-/* CQs */
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-		      struct c2_ucontext *ctx, struct c2_cq *cq);
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-
-/* CM */
-int c2_llp_connect(struct iw_cm_id *cm_id,
-			  struct iw_cm_conn_param *iw_param);
-int c2_llp_accept(struct iw_cm_id *cm_id,
-			 struct iw_cm_conn_param *iw_param);
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
-			 u8 pdata_len);
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
-int c2_llp_service_destroy(struct iw_cm_id *cm_id);
-
-/* MM */
-int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
- 				      int page_size, int pbl_depth, u32 length,
- 				      u32 off, u64 *va, enum c2_acf acf,
-				      struct c2_mr *mr);
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
-
-/* AE */
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
-
-/* MQSP Allocator */
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-			     struct sp_chunk **root);
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-			     dma_addr_t *dma_addr, gfp_t gfp_mask);
-void c2_free_mqsp(__be16* mqsp);
-#endif
diff --git a/drivers/staging/rdma/amso1100/c2_ae.c b/drivers/staging/rdma/amso1100/c2_ae.c
deleted file mode 100644
index eb7a92b2692f..000000000000
--- a/drivers/staging/rdma/amso1100/c2_ae.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_status.h"
-#include "c2_ae.h"
-
-static int c2_convert_cm_status(u32 c2_status)
-{
-	switch (c2_status) {
-	case C2_CONN_STATUS_SUCCESS:
-		return 0;
-	case C2_CONN_STATUS_REJECTED:
-		return -ENETRESET;
-	case C2_CONN_STATUS_REFUSED:
-		return -ECONNREFUSED;
-	case C2_CONN_STATUS_TIMEDOUT:
-		return -ETIMEDOUT;
-	case C2_CONN_STATUS_NETUNREACH:
-		return -ENETUNREACH;
-	case C2_CONN_STATUS_HOSTUNREACH:
-		return -EHOSTUNREACH;
-	case C2_CONN_STATUS_INVALID_RNIC:
-		return -EINVAL;
-	case C2_CONN_STATUS_INVALID_QP:
-		return -EINVAL;
-	case C2_CONN_STATUS_INVALID_QP_STATE:
-		return -EINVAL;
-	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
-		return -EADDRNOTAVAIL;
-	default:
-		printk(KERN_ERR PFX
-		       "%s - Unable to convert CM status: %d\n",
-		       __func__, c2_status);
-		return -EIO;
-	}
-}
-
-static const char* to_event_str(int event)
-{
-	static const char* event_str[] = {
-		"CCAE_REMOTE_SHUTDOWN",
-		"CCAE_ACTIVE_CONNECT_RESULTS",
-		"CCAE_CONNECTION_REQUEST",
-		"CCAE_LLP_CLOSE_COMPLETE",
-		"CCAE_TERMINATE_MESSAGE_RECEIVED",
-		"CCAE_LLP_CONNECTION_RESET",
-		"CCAE_LLP_CONNECTION_LOST",
-		"CCAE_LLP_SEGMENT_SIZE_INVALID",
-		"CCAE_LLP_INVALID_CRC",
-		"CCAE_LLP_BAD_FPDU",
-		"CCAE_INVALID_DDP_VERSION",
-		"CCAE_INVALID_RDMA_VERSION",
-		"CCAE_UNEXPECTED_OPCODE",
-		"CCAE_INVALID_DDP_QUEUE_NUMBER",
-		"CCAE_RDMA_READ_NOT_ENABLED",
-		"CCAE_RDMA_WRITE_NOT_ENABLED",
-		"CCAE_RDMA_READ_TOO_SMALL",
-		"CCAE_NO_L_BIT",
-		"CCAE_TAGGED_INVALID_STAG",
-		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
-		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
-		"CCAE_TAGGED_INVALID_PD",
-		"CCAE_WRAP_ERROR",
-		"CCAE_BAD_CLOSE",
-		"CCAE_BAD_LLP_CLOSE",
-		"CCAE_INVALID_MSN_RANGE",
-		"CCAE_INVALID_MSN_GAP",
-		"CCAE_IRRQ_OVERFLOW",
-		"CCAE_IRRQ_MSN_GAP",
-		"CCAE_IRRQ_MSN_RANGE",
-		"CCAE_IRRQ_INVALID_STAG",
-		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
-		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
-		"CCAE_IRRQ_INVALID_PD",
-		"CCAE_IRRQ_WRAP_ERROR",
-		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
-		"CCAE_CQ_RQ_COMPLETION_ERROR",
-		"CCAE_QP_SRQ_WQE_ERROR",
-		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
-		"CCAE_CQ_OVERFLOW",
-		"CCAE_CQ_OPERATION_ERROR",
-		"CCAE_SRQ_LIMIT_REACHED",
-		"CCAE_QP_RQ_LIMIT_REACHED",
-		"CCAE_SRQ_CATASTROPHIC_ERROR",
-		"CCAE_RNIC_CATASTROPHIC_ERROR"
-	};
-
-	if (event < CCAE_REMOTE_SHUTDOWN ||
-	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
-		return "<invalid event>";
-
-	event -= CCAE_REMOTE_SHUTDOWN;
-	return event_str[event];
-}
-
-static const char *to_qp_state_str(int state)
-{
-	switch (state) {
-	case C2_QP_STATE_IDLE:
-		return "C2_QP_STATE_IDLE";
-	case C2_QP_STATE_CONNECTING:
-		return "C2_QP_STATE_CONNECTING";
-	case C2_QP_STATE_RTS:
-		return "C2_QP_STATE_RTS";
-	case C2_QP_STATE_CLOSING:
-		return "C2_QP_STATE_CLOSING";
-	case C2_QP_STATE_TERMINATE:
-		return "C2_QP_STATE_TERMINATE";
-	case C2_QP_STATE_ERROR:
-		return "C2_QP_STATE_ERROR";
-	default:
-		return "<invalid QP state>";
-	}
-}
-
-void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
-{
-	struct c2_mq *mq = c2dev->qptr_array[mq_index];
-	union c2wr *wr;
-	void *resource_user_context;
-	struct iw_cm_event cm_event;
-	struct ib_event ib_event;
-	enum c2_resource_indicator resource_indicator;
-	enum c2_event_id event_id;
-	unsigned long flags;
-	int status;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
-
-	/*
-	 * retrieve the message
-	 */
-	wr = c2_mq_consume(mq);
-	if (!wr)
-		return;
-
-	memset(&ib_event, 0, sizeof(ib_event));
-	memset(&cm_event, 0, sizeof(cm_event));
-
-	event_id = c2_wr_get_id(wr);
-	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
-	resource_user_context =
-	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
-
-	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
-
-	pr_debug("event received c2_dev=%p, event_id=%d, "
-		"resource_indicator=%d, user_context=%p, status = %d\n",
-		c2dev, event_id, resource_indicator, resource_user_context,
-		status);
-
-	switch (resource_indicator) {
-	case C2_RES_IND_QP:{
-
-		struct c2_qp *qp = resource_user_context;
-		struct iw_cm_id *cm_id = qp->cm_id;
-		struct c2wr_ae_active_connect_results *res;
-
-		if (!cm_id) {
-			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
-				qp);
-			goto ignore_it;
-		}
-		pr_debug("%s: event = %s, user_context=%llx, "
-			"resource_type=%x, "
-			"resource=%x, qp_state=%s\n",
-			__func__,
-			to_event_str(event_id),
-			(unsigned long long) wr->ae.ae_generic.user_context,
-			be32_to_cpu(wr->ae.ae_generic.resource_type),
-			be32_to_cpu(wr->ae.ae_generic.resource),
-			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
-
-		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
-
-		switch (event_id) {
-		case CCAE_ACTIVE_CONNECT_RESULTS:
-			res = &wr->ae.ae_active_connect_results;
-			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-			laddr->sin_addr.s_addr = res->laddr;
-			raddr->sin_addr.s_addr = res->raddr;
-			laddr->sin_port = res->lport;
-			raddr->sin_port = res->rport;
-			if (status == 0) {
-				cm_event.private_data_len =
-					be32_to_cpu(res->private_data_length);
-				cm_event.private_data = res->private_data;
-			} else {
-				spin_lock_irqsave(&qp->lock, flags);
-				if (qp->cm_id) {
-					qp->cm_id->rem_ref(qp->cm_id);
-					qp->cm_id = NULL;
-				}
-				spin_unlock_irqrestore(&qp->lock, flags);
-				cm_event.private_data_len = 0;
-				cm_event.private_data = NULL;
-			}
-			if (cm_id->event_handler)
-				cm_id->event_handler(cm_id, &cm_event);
-			break;
-		case CCAE_TERMINATE_MESSAGE_RECEIVED:
-		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
-			ib_event.device = &c2dev->ibdev;
-			ib_event.element.qp = &qp->ibqp;
-			ib_event.event = IB_EVENT_QP_REQ_ERR;
-
-			if (qp->ibqp.event_handler)
-				qp->ibqp.event_handler(&ib_event,
-						       qp->ibqp.
-						       qp_context);
-			break;
-		case CCAE_BAD_CLOSE:
-		case CCAE_LLP_CLOSE_COMPLETE:
-		case CCAE_LLP_CONNECTION_RESET:
-		case CCAE_LLP_CONNECTION_LOST:
-			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
-
-			spin_lock_irqsave(&qp->lock, flags);
-			if (qp->cm_id) {
-				qp->cm_id->rem_ref(qp->cm_id);
-				qp->cm_id = NULL;
-			}
-			spin_unlock_irqrestore(&qp->lock, flags);
-			cm_event.event = IW_CM_EVENT_CLOSE;
-			cm_event.status = 0;
-			if (cm_id->event_handler)
-				cm_id->event_handler(cm_id, &cm_event);
-			break;
-		default:
-			BUG_ON(1);
-			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
-				"CM_ID=%p\n",
-				__func__, __LINE__,
-				event_id, qp, cm_id);
-			break;
-		}
-		break;
-	}
-
-	case C2_RES_IND_EP:{
-
-		struct c2wr_ae_connection_request *req =
-			&wr->ae.ae_connection_request;
-		struct iw_cm_id *cm_id =
-			resource_user_context;
-
-		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
-		if (event_id != CCAE_CONNECTION_REQUEST) {
-			pr_debug("%s: Invalid event_id: %d\n",
-				__func__, event_id);
-			break;
-		}
-		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
-		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
-		laddr->sin_addr.s_addr = req->laddr;
-		raddr->sin_addr.s_addr = req->raddr;
-		laddr->sin_port = req->lport;
-		raddr->sin_port = req->rport;
-		cm_event.private_data_len =
-			be32_to_cpu(req->private_data_length);
-		cm_event.private_data = req->private_data;
-		/*
-		 * Until ird/ord negotiation via MPAv2 support is added, send
-		 * max supported values
-		 */
-		cm_event.ird = cm_event.ord = 128;
-
-		if (cm_id->event_handler)
-			cm_id->event_handler(cm_id, &cm_event);
-		break;
-	}
-
-	case C2_RES_IND_CQ:{
-		struct c2_cq *cq =
-		    resource_user_context;
-
-		pr_debug("IB_EVENT_CQ_ERR\n");
-		ib_event.device = &c2dev->ibdev;
-		ib_event.element.cq = &cq->ibcq;
-		ib_event.event = IB_EVENT_CQ_ERR;
-
-		if (cq->ibcq.event_handler)
-			cq->ibcq.event_handler(&ib_event,
-					       cq->ibcq.cq_context);
-		break;
-	}
-
-	default:
-		printk("Bad resource indicator = %d\n",
-		       resource_indicator);
-		break;
-	}
-
- ignore_it:
-	c2_mq_free(mq);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_ae.h b/drivers/staging/rdma/amso1100/c2_ae.h
deleted file mode 100644
index 3a065c33b83b..000000000000
--- a/drivers/staging/rdma/amso1100/c2_ae.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_AE_H_
-#define _C2_AE_H_
-
-/*
- * WARNING: If you change this file, also bump C2_IVN_BASE
- * in common/include/clustercore/c2_ivn.h.
- */
-
-/*
- * Asynchronous Event Identifiers
- *
- * These start at 0x80 only so it's obvious from inspection that
- * they are not work-request statuses.  This isn't critical.
- *
- * NOTE: these event id's must fit in eight bits.
- */
-enum c2_event_id {
-	CCAE_REMOTE_SHUTDOWN = 0x80,
-	CCAE_ACTIVE_CONNECT_RESULTS,
-	CCAE_CONNECTION_REQUEST,
-	CCAE_LLP_CLOSE_COMPLETE,
-	CCAE_TERMINATE_MESSAGE_RECEIVED,
-	CCAE_LLP_CONNECTION_RESET,
-	CCAE_LLP_CONNECTION_LOST,
-	CCAE_LLP_SEGMENT_SIZE_INVALID,
-	CCAE_LLP_INVALID_CRC,
-	CCAE_LLP_BAD_FPDU,
-	CCAE_INVALID_DDP_VERSION,
-	CCAE_INVALID_RDMA_VERSION,
-	CCAE_UNEXPECTED_OPCODE,
-	CCAE_INVALID_DDP_QUEUE_NUMBER,
-	CCAE_RDMA_READ_NOT_ENABLED,
-	CCAE_RDMA_WRITE_NOT_ENABLED,
-	CCAE_RDMA_READ_TOO_SMALL,
-	CCAE_NO_L_BIT,
-	CCAE_TAGGED_INVALID_STAG,
-	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
-	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
-	CCAE_TAGGED_INVALID_PD,
-	CCAE_WRAP_ERROR,
-	CCAE_BAD_CLOSE,
-	CCAE_BAD_LLP_CLOSE,
-	CCAE_INVALID_MSN_RANGE,
-	CCAE_INVALID_MSN_GAP,
-	CCAE_IRRQ_OVERFLOW,
-	CCAE_IRRQ_MSN_GAP,
-	CCAE_IRRQ_MSN_RANGE,
-	CCAE_IRRQ_INVALID_STAG,
-	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
-	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
-	CCAE_IRRQ_INVALID_PD,
-	CCAE_IRRQ_WRAP_ERROR,
-	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
-	CCAE_CQ_RQ_COMPLETION_ERROR,
-	CCAE_QP_SRQ_WQE_ERROR,
-	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
-	CCAE_CQ_OVERFLOW,
-	CCAE_CQ_OPERATION_ERROR,
-	CCAE_SRQ_LIMIT_REACHED,
-	CCAE_QP_RQ_LIMIT_REACHED,
-	CCAE_SRQ_CATASTROPHIC_ERROR,
-	CCAE_RNIC_CATASTROPHIC_ERROR
-/* WARNING If you add more id's, make sure their values fit in eight bits. */
-};
-
-/*
- * Resource Indicators and Identifiers
- */
-enum c2_resource_indicator {
-	C2_RES_IND_QP = 1,
-	C2_RES_IND_EP,
-	C2_RES_IND_CQ,
-	C2_RES_IND_SRQ,
-};
-
-#endif /* _C2_AE_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_alloc.c b/drivers/staging/rdma/amso1100/c2_alloc.c
deleted file mode 100644
index 039872dfabbc..000000000000
--- a/drivers/staging/rdma/amso1100/c2_alloc.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/bitmap.h>
-
-#include "c2.h"
-
-static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
-			       struct sp_chunk **head)
-{
-	int i;
-	struct sp_chunk *new_head;
-	dma_addr_t dma_addr;
-
-	new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
-				      &dma_addr, gfp_mask);
-	if (new_head == NULL)
-		return -ENOMEM;
-
-	new_head->dma_addr = dma_addr;
-	dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
-
-	new_head->next = NULL;
-	new_head->head = 0;
-
-	/* build list where each index is the next free slot */
-	for (i = 0;
-	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
-		  sizeof(u16)) / sizeof(u16) - 1;
-	     i++) {
-		new_head->shared_ptr[i] = i + 1;
-	}
-	/* terminate list */
-	new_head->shared_ptr[i] = 0xFFFF;
-
-	*head = new_head;
-	return 0;
-}
-
-int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
-		      struct sp_chunk **root)
-{
-	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
-}
-
-void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
-{
-	struct sp_chunk *next;
-
-	while (root) {
-		next = root->next;
-		dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
-				  dma_unmap_addr(root, mapping));
-		root = next;
-	}
-}
-
-__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-		      dma_addr_t *dma_addr, gfp_t gfp_mask)
-{
-	u16 mqsp;
-
-	while (head) {
-		mqsp = head->head;
-		if (mqsp != 0xFFFF) {
-			head->head = head->shared_ptr[mqsp];
-			break;
-		} else if (head->next == NULL) {
-			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
-			    0) {
-				head = head->next;
-				mqsp = head->head;
-				head->head = head->shared_ptr[mqsp];
-				break;
-			} else
-				return NULL;
-		} else
-			head = head->next;
-	}
-	if (head) {
-		*dma_addr = head->dma_addr +
-			    ((unsigned long) &(head->shared_ptr[mqsp]) -
-			     (unsigned long) head);
-		pr_debug("%s addr %p dma_addr %llx\n", __func__,
-			 &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-		return (__force __be16 *) &(head->shared_ptr[mqsp]);
-	}
-	return NULL;
-}
-
-void c2_free_mqsp(__be16 *mqsp)
-{
-	struct sp_chunk *head;
-	u16 idx;
-
-	/* The chunk containing this ptr begins at the page boundary */
-	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
-
-	/* Link head to new mqsp */
-	*mqsp = (__force __be16) head->head;
-
-	/* Compute the shared_ptr index */
-	idx = (offset_in_page(mqsp)) >> 1;
-	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
-
-	/* Point this index at the head */
-	head->shared_ptr[idx] = head->head;
-
-	/* Point head at this index */
-	head->head = idx;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cm.c b/drivers/staging/rdma/amso1100/c2_cm.c
deleted file mode 100644
index f8dbdb9e0f66..000000000000
--- a/drivers/staging/rdma/amso1100/c2_cm.c
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_vq.h"
-#include <rdma/iw_cm.h>
-
-int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	struct c2_dev *c2dev = to_c2dev(cm_id->device);
-	struct ib_qp *ibqp;
-	struct c2_qp *qp;
-	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
-	struct c2_vq_req *vq_req;
-	int err;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-
-	if (cm_id->remote_addr.ss_family != AF_INET)
-		return -ENOSYS;
-
-	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-	if (!ibqp)
-		return -EINVAL;
-	qp = to_c2qp(ibqp);
-
-	/* Associate QP <--> CM_ID */
-	cm_id->provider_data = qp;
-	cm_id->add_ref(cm_id);
-	qp->cm_id = cm_id;
-
-	/*
-	 * only support the max private_data length
-	 */
-	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-		err = -EINVAL;
-		goto bail0;
-	}
-	/*
-	 * Set the rdma read limits
-	 */
-	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Create and send a WR_QP_CONNECT...
-	 */
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	c2_wr_set_id(wr, CCWR_QP_CONNECT);
-	wr->hdr.context = 0;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->qp_handle = qp->adapter_handle;
-
-	wr->remote_addr = raddr->sin_addr.s_addr;
-	wr->remote_port = raddr->sin_port;
-
-	/*
-	 * Move any private data from the callers's buf into
-	 * the WR.
-	 */
-	if (iw_param->private_data) {
-		wr->private_data_length =
-			cpu_to_be32(iw_param->private_data_len);
-		memcpy(&wr->private_data[0], iw_param->private_data,
-		       iw_param->private_data_len);
-	} else
-		wr->private_data_length = 0;
-
-	/*
-	 * Send WR to adapter.  NOTE: There is no synch reply from
-	 * the adapter.
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	vq_req_free(c2dev, vq_req);
-
- bail1:
-	kfree(wr);
- bail0:
-	if (err) {
-		/*
-		 * If we fail, release reference on QP and
-		 * disassociate QP from CM_ID
-		 */
-		cm_id->provider_data = NULL;
-		qp->cm_id = NULL;
-		cm_id->rem_ref(cm_id);
-	}
-	return err;
-}
-
-int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-	struct c2_dev *c2dev;
-	struct c2wr_ep_listen_create_req wr;
-	struct c2wr_ep_listen_create_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-
-	if (cm_id->local_addr.ss_family != AF_INET)
-		return -ENOSYS;
-
-	c2dev = to_c2dev(cm_id->device);
-	if (c2dev == NULL)
-		return -EINVAL;
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
-	wr.hdr.context = (u64) (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.local_addr = laddr->sin_addr.s_addr;
-	wr.local_port = laddr->sin_port;
-	wr.backlog = cpu_to_be32(backlog);
-	wr.user_context = (u64) (unsigned long) cm_id;
-
-	/*
-	 * Reference the request struct.  Dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply =
-	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	if ((err = c2_errno(reply)) != 0)
-		goto bail1;
-
-	/*
-	 * Keep the adapter handle. Used in subsequent destroy
-	 */
-	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
-
-	/*
-	 * free vq stuff
-	 */
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	return 0;
-
- bail1:
-	vq_repbuf_free(c2dev, reply);
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-
-int c2_llp_service_destroy(struct iw_cm_id *cm_id)
-{
-
-	struct c2_dev *c2dev;
-	struct c2wr_ep_listen_destroy_req wr;
-	struct c2wr_ep_listen_destroy_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	c2dev = to_c2dev(cm_id->device);
-	if (c2dev == NULL)
-		return -EINVAL;
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	vq_repbuf_free(c2dev, reply);
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	struct c2_dev *c2dev = to_c2dev(cm_id->device);
-	struct c2_qp *qp;
-	struct ib_qp *ibqp;
-	struct c2wr_cr_accept_req *wr;	/* variable length WR */
-	struct c2_vq_req *vq_req;
-	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
-	int err;
-
-	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
-	if (!ibqp)
-		return -EINVAL;
-	qp = to_c2qp(ibqp);
-
-	/* Set the RDMA read limits */
-	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
-	if (err)
-		goto bail0;
-
-	/* Allocate verbs request. */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-	vq_req->qp = qp;
-	vq_req->cm_id = cm_id;
-	vq_req->event = IW_CM_EVENT_ESTABLISHED;
-
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	/* Build the WR */
-	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
-	wr->qp_handle = qp->adapter_handle;
-
-	/* Replace the cr_handle with the QP after accept */
-	cm_id->provider_data = qp;
-	cm_id->add_ref(cm_id);
-	qp->cm_id = cm_id;
-
-	cm_id->provider_data = qp;
-
-	/* Validate private_data length */
-	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
-		err = -EINVAL;
-		goto bail1;
-	}
-
-	if (iw_param->private_data) {
-		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
-		memcpy(&wr->private_data[0],
-		       iw_param->private_data, iw_param->private_data_len);
-	} else
-		wr->private_data_length = 0;
-
-	/* Reference the request struct.  Dereferenced in the int handler. */
-	vq_req_get(c2dev, vq_req);
-
-	/* Send WR to adapter */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	/* Wait for reply from adapter */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	/* Check that reply is present */
-	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-	if (!err)
-		c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail1:
-	kfree(wr);
-	vq_req_free(c2dev, vq_req);
- bail0:
-	if (err) {
-		/*
-		 * If we fail, release reference on QP and
-		 * disassociate QP from CM_ID
-		 */
-		cm_id->provider_data = NULL;
-		qp->cm_id = NULL;
-		cm_id->rem_ref(cm_id);
-	}
-	return err;
-}
-
-int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-	struct c2_dev *c2dev;
-	struct c2wr_cr_reject_req wr;
-	struct c2_vq_req *vq_req;
-	struct c2wr_cr_reject_rep *reply;
-	int err;
-
-	c2dev = to_c2dev(cm_id->device);
-
-	/*
-	 * Allocate verbs request.
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_CR_REJECT);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
-		vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-	err = c2_errno(reply);
-	/*
-	 * free vq stuff
-	 */
-	vq_repbuf_free(c2dev, reply);
-
- bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
deleted file mode 100644
index 7ad0c082485a..000000000000
--- a/drivers/staging/rdma/amso1100/c2_cq.c
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
-
-static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
-{
-	struct c2_cq *cq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&c2dev->lock, flags);
-	cq = c2dev->qptr_array[cqn];
-	if (!cq) {
-		spin_unlock_irqrestore(&c2dev->lock, flags);
-		return NULL;
-	}
-	atomic_inc(&cq->refcount);
-	spin_unlock_irqrestore(&c2dev->lock, flags);
-	return cq;
-}
-
-static void c2_cq_put(struct c2_cq *cq)
-{
-	if (atomic_dec_and_test(&cq->refcount))
-		wake_up(&cq->wait);
-}
-
-void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
-{
-	struct c2_cq *cq;
-
-	cq = c2_cq_get(c2dev, mq_index);
-	if (!cq) {
-		printk("discarding events on destroyed CQN=%d\n", mq_index);
-		return;
-	}
-
-	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
-	c2_cq_put(cq);
-}
-
-void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
-{
-	struct c2_cq *cq;
-	struct c2_mq *q;
-
-	cq = c2_cq_get(c2dev, mq_index);
-	if (!cq)
-		return;
-
-	spin_lock_irq(&cq->lock);
-	q = &cq->mq;
-	if (q && !c2_mq_empty(q)) {
-		u16 priv = q->priv;
-		struct c2wr_ce *msg;
-
-		while (priv != be16_to_cpu(*q->shared)) {
-			msg = (struct c2wr_ce *)
-				(q->msg_pool.host + priv * q->msg_size);
-			if (msg->qp_user_context == (u64) (unsigned long) qp) {
-				msg->qp_user_context = (u64) 0;
-			}
-			priv = (priv + 1) % q->q_size;
-		}
-	}
-	spin_unlock_irq(&cq->lock);
-	c2_cq_put(cq);
-}
-
-static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
-{
-	switch (status) {
-	case C2_OK:
-		return IB_WC_SUCCESS;
-	case CCERR_FLUSHED:
-		return IB_WC_WR_FLUSH_ERR;
-	case CCERR_BASE_AND_BOUNDS_VIOLATION:
-		return IB_WC_LOC_PROT_ERR;
-	case CCERR_ACCESS_VIOLATION:
-		return IB_WC_LOC_ACCESS_ERR;
-	case CCERR_TOTAL_LENGTH_TOO_BIG:
-		return IB_WC_LOC_LEN_ERR;
-	case CCERR_INVALID_WINDOW:
-		return IB_WC_MW_BIND_ERR;
-	default:
-		return IB_WC_GENERAL_ERR;
-	}
-}
-
-
-static inline int c2_poll_one(struct c2_dev *c2dev,
-			      struct c2_cq *cq, struct ib_wc *entry)
-{
-	struct c2wr_ce *ce;
-	struct c2_qp *qp;
-	int is_recv = 0;
-
-	ce = c2_mq_consume(&cq->mq);
-	if (!ce) {
-		return -EAGAIN;
-	}
-
-	/*
-	 * if the qp returned is null then this qp has already
-	 * been freed and we are unable process the completion.
-	 * try pulling the next message
-	 */
-	while ((qp =
-		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
-		c2_mq_free(&cq->mq);
-		ce = c2_mq_consume(&cq->mq);
-		if (!ce)
-			return -EAGAIN;
-	}
-
-	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
-	entry->wr_id = ce->hdr.context;
-	entry->qp = &qp->ibqp;
-	entry->wc_flags = 0;
-	entry->slid = 0;
-	entry->sl = 0;
-	entry->src_qp = 0;
-	entry->dlid_path_bits = 0;
-	entry->pkey_index = 0;
-
-	switch (c2_wr_get_id(ce)) {
-	case C2_WR_TYPE_SEND:
-		entry->opcode = IB_WC_SEND;
-		break;
-	case C2_WR_TYPE_RDMA_WRITE:
-		entry->opcode = IB_WC_RDMA_WRITE;
-		break;
-	case C2_WR_TYPE_RDMA_READ:
-		entry->opcode = IB_WC_RDMA_READ;
-		break;
-	case C2_WR_TYPE_RECV:
-		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
-		entry->opcode = IB_WC_RECV;
-		is_recv = 1;
-		break;
-	default:
-		break;
-	}
-
-	/* consume the WQEs */
-	if (is_recv)
-		c2_mq_lconsume(&qp->rq_mq, 1);
-	else
-		c2_mq_lconsume(&qp->sq_mq,
-			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
-
-	/* free the message */
-	c2_mq_free(&cq->mq);
-
-	return 0;
-}
-
-int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct c2_dev *c2dev = to_c2dev(ibcq->device);
-	struct c2_cq *cq = to_c2cq(ibcq);
-	unsigned long flags;
-	int npolled, err;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	for (npolled = 0; npolled < num_entries; ++npolled) {
-
-		err = c2_poll_one(c2dev, cq, entry + npolled);
-		if (err)
-			break;
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return npolled;
-}
-
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct c2_mq_shared __iomem *shared;
-	struct c2_cq *cq;
-	unsigned long flags;
-	int ret = 0;
-
-	cq = to_c2cq(ibcq);
-	shared = cq->mq.peer;
-
-	if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
-		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
-	else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
-	else
-		return -EINVAL;
-
-	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
-
-	/*
-	 * Now read back shared->armed to make the PCI
-	 * write synchronous.  This is necessary for
-	 * correct cq notification semantics.
-	 */
-	readb(&shared->armed);
-
-	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-		spin_lock_irqsave(&cq->lock, flags);
-		ret = !c2_mq_empty(&cq->mq);
-		spin_unlock_irqrestore(&cq->lock, flags);
-	}
-
-	return ret;
-}
-
-static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
-{
-	dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
-			  mq->msg_pool.host, dma_unmap_addr(mq, mapping));
-}
-
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
-			   size_t q_size, size_t msg_size)
-{
-	u8 *pool_start;
-
-	if (q_size > SIZE_MAX / msg_size)
-		return -EINVAL;
-
-	pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
-					&mq->host_dma, GFP_KERNEL);
-	if (!pool_start)
-		return -ENOMEM;
-
-	c2_mq_rep_init(mq,
-		       0,		/* index (currently unknown) */
-		       q_size,
-		       msg_size,
-		       pool_start,
-		       NULL,	/* peer (currently unknown) */
-		       C2_MQ_HOST_TARGET);
-
-	dma_unmap_addr_set(mq, mapping, mq->host_dma);
-
-	return 0;
-}
-
-int c2_init_cq(struct c2_dev *c2dev, int entries,
-	       struct c2_ucontext *ctx, struct c2_cq *cq)
-{
-	struct c2wr_cq_create_req wr;
-	struct c2wr_cq_create_rep *reply;
-	unsigned long peer_pa;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	might_sleep();
-
-	cq->ibcq.cqe = entries - 1;
-	cq->is_kernel = !ctx;
-
-	/* Allocate a shared pointer */
-	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-				      &cq->mq.shared_dma, GFP_KERNEL);
-	if (!cq->mq.shared)
-		return -ENOMEM;
-
-	/* Allocate pages for the message pool */
-	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
-	if (err)
-		goto bail0;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
-	wr.depth = cpu_to_be32(cq->mq.q_size);
-	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
-	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
-	wr.user_context = (u64) (unsigned long) (cq);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail2;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail2;
-
-	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-
-	if ((err = c2_errno(reply)) != 0)
-		goto bail3;
-
-	cq->adapter_handle = reply->cq_handle;
-	cq->mq.index = be32_to_cpu(reply->mq_index);
-
-	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
-	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
-	if (!cq->mq.peer) {
-		err = -ENOMEM;
-		goto bail3;
-	}
-
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	spin_lock_init(&cq->lock);
-	atomic_set(&cq->refcount, 1);
-	init_waitqueue_head(&cq->wait);
-
-	/*
-	 * Use the MQ index allocated by the adapter to
-	 * store the CQ in the qptr_array
-	 */
-	cq->cqn = cq->mq.index;
-	c2dev->qptr_array[cq->cqn] = cq;
-
-	return 0;
-
-bail3:
-	vq_repbuf_free(c2dev, reply);
-bail2:
-	vq_req_free(c2dev, vq_req);
-bail1:
-	c2_free_cq_buf(c2dev, &cq->mq);
-bail0:
-	c2_free_mqsp(cq->mq.shared);
-
-	return err;
-}
-
-void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
-{
-	int err;
-	struct c2_vq_req *vq_req;
-	struct c2wr_cq_destroy_req wr;
-	struct c2wr_cq_destroy_rep *reply;
-
-	might_sleep();
-
-	/* Clear CQ from the qptr array */
-	spin_lock_irq(&c2dev->lock);
-	c2dev->qptr_array[cq->mq.index] = NULL;
-	atomic_dec(&cq->refcount);
-	spin_unlock_irq(&c2dev->lock);
-
-	wait_event(cq->wait, !atomic_read(&cq->refcount));
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		goto bail0;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.cq_handle = cq->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-	if (reply)
-		vq_repbuf_free(c2dev, reply);
-bail1:
-	vq_req_free(c2dev, vq_req);
-bail0:
-	if (cq->is_kernel) {
-		c2_free_cq_buf(c2dev, &cq->mq);
-	}
-
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_intr.c b/drivers/staging/rdma/amso1100/c2_intr.c
deleted file mode 100644
index 74b32a971124..000000000000
--- a/drivers/staging/rdma/amso1100/c2_intr.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include <rdma/iw_cm.h>
-#include "c2_vq.h"
-
-static void handle_mq(struct c2_dev *c2dev, u32 index);
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
-
-/*
- * Handle RNIC interrupts
- */
-void c2_rnic_interrupt(struct c2_dev *c2dev)
-{
-	unsigned int mq_index;
-
-	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
-		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
-		if (mq_index & 0x80000000) {
-			break;
-		}
-
-		c2dev->hints_read++;
-		handle_mq(c2dev, mq_index);
-	}
-
-}
-
-/*
- * Top level MQ handler
- */
-static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
-{
-	if (c2dev->qptr_array[mq_index] == NULL) {
-		pr_debug("handle_mq: stray activity for mq_index=%d\n",
-			 mq_index);
-		return;
-	}
-
-	switch (mq_index) {
-	case (0):
-		/*
-		 * An index of 0 in the activity queue
-		 * indicates the req vq now has messages
-		 * available...
-		 *
-		 * Wake up any waiters waiting on req VQ
-		 * message availability.
-		 */
-		wake_up(&c2dev->req_vq_wo);
-		break;
-	case (1):
-		handle_vq(c2dev, mq_index);
-		break;
-	case (2):
-		/* We have to purge the VQ in case there are pending
-		 * accept reply requests that would result in the
-		 * generation of an ESTABLISHED event. If we don't
-		 * generate these first, a CLOSE event could end up
-		 * being delivered before the ESTABLISHED event.
-		 */
-		handle_vq(c2dev, 1);
-
-		c2_ae_event(c2dev, mq_index);
-		break;
-	default:
-		/* There is no event synchronization between CQ events
-		 * and AE or CM events. In fact, CQE could be
-		 * delivered for all of the I/O up to and including the
-		 * FLUSH for a peer disconenct prior to the ESTABLISHED
-		 * event being delivered to the app. The reason for this
-		 * is that CM events are delivered on a thread, while AE
-		 * and CM events are delivered on interrupt context.
-		 */
-		c2_cq_event(c2dev, mq_index);
-		break;
-	}
-
-	return;
-}
-
-/*
- * Handles verbs WR replies.
- */
-static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
-{
-	void *adapter_msg, *reply_msg;
-	struct c2wr_hdr *host_msg;
-	struct c2wr_hdr tmp;
-	struct c2_mq *reply_vq;
-	struct c2_vq_req *req;
-	struct iw_cm_event cm_event;
-	int err;
-
-	reply_vq = c2dev->qptr_array[mq_index];
-
-	/*
-	 * get next msg from mq_index into adapter_msg.
-	 * don't free it yet.
-	 */
-	adapter_msg = c2_mq_consume(reply_vq);
-	if (adapter_msg == NULL) {
-		return;
-	}
-
-	host_msg = vq_repbuf_alloc(c2dev);
-
-	/*
-	 * If we can't get a host buffer, then we'll still
-	 * wakeup the waiter, we just won't give him the msg.
-	 * It is assumed the waiter will deal with this...
-	 */
-	if (!host_msg) {
-		pr_debug("handle_vq: no repbufs!\n");
-
-		/*
-		 * just copy the WR header into a local variable.
-		 * this allows us to still demux on the context
-		 */
-		host_msg = &tmp;
-		memcpy(host_msg, adapter_msg, sizeof(tmp));
-		reply_msg = NULL;
-	} else {
-		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
-		reply_msg = host_msg;
-	}
-
-	/*
-	 * consume the msg from the MQ
-	 */
-	c2_mq_free(reply_vq);
-
-	/*
-	 * wakeup the waiter.
-	 */
-	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
-	if (req == NULL) {
-		/*
-		 * We should never get here, as the adapter should
-		 * never send us a reply that we're not expecting.
-		 */
-		if (reply_msg != NULL)
-			vq_repbuf_free(c2dev, host_msg);
-		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
-		return;
-	}
-
-	if (reply_msg)
-		err = c2_errno(reply_msg);
-	else
-		err = -ENOMEM;
-
-	if (!err) switch (req->event) {
-	case IW_CM_EVENT_ESTABLISHED:
-		c2_set_qp_state(req->qp,
-				C2_QP_STATE_RTS);
-		/*
-		 * Until ird/ord negotiation via MPAv2 support is added, send
-		 * max supported values
-		 */
-		cm_event.ird = cm_event.ord = 128;
-	case IW_CM_EVENT_CLOSE:
-
-		/*
-		 * Move the QP to RTS if this is
-		 * the established event
-		 */
-		cm_event.event = req->event;
-		cm_event.status = 0;
-		cm_event.local_addr = req->cm_id->local_addr;
-		cm_event.remote_addr = req->cm_id->remote_addr;
-		cm_event.private_data = NULL;
-		cm_event.private_data_len = 0;
-		req->cm_id->event_handler(req->cm_id, &cm_event);
-		break;
-	default:
-		break;
-	}
-
-	req->reply_msg = (u64) (unsigned long) (reply_msg);
-	atomic_set(&req->reply_ready, 1);
-	wake_up(&req->wait_object);
-
-	/*
-	 * If the request was cancelled, then this put will
-	 * free the vq_req memory...and reply_msg!!!
-	 */
-	vq_req_put(c2dev, req);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mm.c b/drivers/staging/rdma/amso1100/c2_mm.c
deleted file mode 100644
index 25081e2913de..000000000000
--- a/drivers/staging/rdma/amso1100/c2_mm.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-
-#define PBL_VIRT 1
-#define PBL_PHYS 2
-
-/*
- * Send all the PBL messages to convey the remainder of the PBL
- * Wait for the adapter's reply on the last one.
- * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
- *
- * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
- *	  Reply buffer _is_ freed by this function.
- */
-static int
-send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
-		  unsigned long va, u32 pbl_depth,
-		  struct c2_vq_req *vq_req, int pbl_type)
-{
-	u32 pbe_count;		/* amt that fits in a PBL msg */
-	u32 count;		/* amt in this PBL MSG. */
-	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
-	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
- 	int err, pbl_virt, pbl_index, i;
-
-	switch (pbl_type) {
-	case PBL_VIRT:
-		pbl_virt = 1;
-		break;
-	case PBL_PHYS:
-		pbl_virt = 0;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-
-	pbe_count = (c2dev->req_vq.msg_size -
-		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		return -ENOMEM;
-	}
-	c2_wr_set_id(wr, CCWR_NSMR_PBL);
-
-	/*
-	 * Only the last PBL message will generate a reply from the verbs,
-	 * so we set the context to 0 indicating there is no kernel verbs
-	 * handler blocked awaiting this reply.
-	 */
-	wr->hdr.context = 0;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->stag_index = stag_index;	/* already swapped */
-	wr->flags = 0;
-	pbl_index = 0;
-	while (pbl_depth) {
-		count = min(pbe_count, pbl_depth);
-		wr->addrs_length = cpu_to_be32(count);
-
-		/*
-		 *  If this is the last message, then reference the
-		 *  vq request struct cuz we're gonna wait for a reply.
-		 *  also make this PBL msg as the last one.
-		 */
-		if (count == pbl_depth) {
-			/*
-			 * reference the request struct.  dereferenced in the
-			 * int handler.
-			 */
-			vq_req_get(c2dev, vq_req);
-			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
-
-			/*
-			 * This is the last PBL message.
-			 * Set the context to our VQ Request Object so we can
-			 * wait for the reply.
-			 */
-			wr->hdr.context = (unsigned long) vq_req;
-		}
-
-		/*
-		 * If pbl_virt is set then va is a virtual address
-		 * that describes a virtually contiguous memory
-		 * allocation. The wr needs the start of each virtual page
-		 * to be converted to the corresponding physical address
-		 * of the page. If pbl_virt is not set then va is an array
-		 * of physical addresses and there is no conversion to do.
-		 * Just fill in the wr with what is in the array.
-		 */
-		for (i = 0; i < count; i++) {
-			if (pbl_virt) {
-				va += PAGE_SIZE;
-			} else {
- 				wr->paddrs[i] =
-				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
-			}
-		}
-
-		/*
-		 * Send WR to adapter
-		 */
-		err = vq_send_wr(c2dev, (union c2wr *) wr);
-		if (err) {
-			if (count <= pbe_count) {
-				vq_req_put(c2dev, vq_req);
-			}
-			goto bail0;
-		}
-		pbl_depth -= count;
-		pbl_index += count;
-	}
-
-	/*
-	 *  Now wait for the reply...
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	kfree(wr);
-	return err;
-}
-
-#define C2_PBL_MAX_DEPTH 131072
-int
-c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
- 			   int page_size, int pbl_depth, u32 length,
- 			   u32 offset, u64 *va, enum c2_acf acf,
-			   struct c2_mr *mr)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_nsmr_register_req *wr;
-	struct c2wr_nsmr_register_rep *reply;
-	u16 flags;
-	int i, pbe_count, count;
-	int err;
-
-	if (!va || !length || !addr_list || !pbl_depth)
-		return -EINTR;
-
-	/*
-	 * Verify PBL depth is within rnic max
-	 */
-	if (pbl_depth > C2_PBL_MAX_DEPTH) {
-		return -EINTR;
-	}
-
-	/*
-	 * allocate verbs request object
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	/*
-	 * build the WR
-	 */
-	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-
-	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
-
-	/*
-	 * compute how many pbes can fit in the message
-	 */
-	pbe_count = (c2dev->req_vq.msg_size -
-		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
-
-	if (pbl_depth <= pbe_count) {
-		flags |= MEM_PBL_COMPLETE;
-	}
-	wr->flags = cpu_to_be16(flags);
-	wr->stag_key = 0;	//stag_key;
-	wr->va = cpu_to_be64(*va);
-	wr->pd_id = mr->pd->pd_id;
-	wr->pbe_size = cpu_to_be32(page_size);
-	wr->length = cpu_to_be32(length);
-	wr->pbl_depth = cpu_to_be32(pbl_depth);
-	wr->fbo = cpu_to_be32(offset);
-	count = min(pbl_depth, pbe_count);
-	wr->addrs_length = cpu_to_be32(count);
-
-	/*
-	 * fill out the PBL for this message
-	 */
-	for (i = 0; i < count; i++) {
-		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
-	}
-
-	/*
-	 * regerence the request struct
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * send the WR to the adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	/*
-	 * wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail1;
-	}
-
-	/*
-	 * process reply
-	 */
-	reply =
-	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-	if ((err = c2_errno(reply))) {
-		goto bail2;
-	}
-	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
-	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
-	vq_repbuf_free(c2dev, reply);
-
-	/*
-	 * if there are still more PBEs we need to send them to
-	 * the adapter and wait for a reply on the final one.
-	 * reuse vq_req for this purpose.
-	 */
-	pbl_depth -= count;
-	if (pbl_depth) {
-
-		vq_req->reply_msg = (unsigned long) NULL;
-		atomic_set(&vq_req->reply_ready, 0);
-		err = send_pbl_messages(c2dev,
-					cpu_to_be32(mr->ibmr.lkey),
-					(unsigned long) &addr_list[i],
-					pbl_depth, vq_req, PBL_PHYS);
-		if (err) {
-			goto bail1;
-		}
-	}
-
-	vq_req_free(c2dev, vq_req);
-	kfree(wr);
-
-	return err;
-
-bail2:
-	vq_repbuf_free(c2dev, reply);
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
-{
-	struct c2_vq_req *vq_req;	/* verbs request object */
-	struct c2wr_stag_dealloc_req wr;	/* work request */
-	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
-	int err;
-
-
-	/*
-	 * allocate verbs request object
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		return -ENOMEM;
-	}
-
-	/*
-	 * Build the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
-	wr.hdr.context = (u64) (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.stag_index = cpu_to_be32(stag_index);
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.c b/drivers/staging/rdma/amso1100/c2_mq.c
deleted file mode 100644
index 7827fb8bdb10..000000000000
--- a/drivers/staging/rdma/amso1100/c2_mq.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "c2.h"
-#include "c2_mq.h"
-
-void *c2_mq_alloc(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	if (c2_mq_full(q)) {
-		return NULL;
-	} else {
-#ifdef DEBUG
-		struct c2wr_hdr *m =
-		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
-		m->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-		return m;
-#else
-		return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-	}
-}
-
-void c2_mq_produce(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	if (!c2_mq_full(q)) {
-		q->priv = (q->priv + 1) % q->q_size;
-		q->hint_count++;
-		/* Update peer's offset. */
-		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-	}
-}
-
-void *c2_mq_consume(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-	if (c2_mq_empty(q)) {
-		return NULL;
-	} else {
-#ifdef DEBUG
-		struct c2wr_hdr *m = (struct c2wr_hdr *)
-		    (q->msg_pool.host + q->priv * q->msg_size);
-#ifdef CCMSGMAGIC
-		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
-#endif
-		return m;
-#else
-		return q->msg_pool.host + q->priv * q->msg_size;
-#endif
-	}
-}
-
-void c2_mq_free(struct c2_mq *q)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_HOST_TARGET);
-
-	if (!c2_mq_empty(q)) {
-
-#ifdef CCMSGMAGIC
-		{
-			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
-			    (q->msg_pool.adapter + q->priv * q->msg_size);
-			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
-		}
-#endif
-		q->priv = (q->priv + 1) % q->q_size;
-		/* Update peer's offset. */
-		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
-	}
-}
-
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
-{
-	BUG_ON(q->magic != C2_MQ_MAGIC);
-	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
-
-	while (wqe_count--) {
-		BUG_ON(c2_mq_empty(q));
-		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
-	}
-}
-
-#if 0
-u32 c2_mq_count(struct c2_mq *q)
-{
-	s32 count;
-
-	if (q->type == C2_MQ_HOST_TARGET)
-		count = be16_to_cpu(*q->shared) - q->priv;
-	else
-		count = q->priv - be16_to_cpu(*q->shared);
-
-	if (count < 0)
-		count += q->q_size;
-
-	return (u32) count;
-}
-#endif  /*  0  */
-
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
-{
-	BUG_ON(!q->shared);
-
-	/* This code assumes the byte swapping has already been done! */
-	q->index = index;
-	q->q_size = q_size;
-	q->msg_size = msg_size;
-	q->msg_pool.adapter = pool_start;
-	q->peer = (struct c2_mq_shared __iomem *) peer;
-	q->magic = C2_MQ_MAGIC;
-	q->type = type;
-	q->priv = 0;
-	q->hint_count = 0;
-	return;
-}
-
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		    u8 *pool_start, u16 __iomem *peer, u32 type)
-{
-	BUG_ON(!q->shared);
-
-	/* This code assumes the byte swapping has already been done! */
-	q->index = index;
-	q->q_size = q_size;
-	q->msg_size = msg_size;
-	q->msg_pool.host = pool_start;
-	q->peer = (struct c2_mq_shared __iomem *) peer;
-	q->magic = C2_MQ_MAGIC;
-	q->type = type;
-	q->priv = 0;
-	q->hint_count = 0;
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_mq.h b/drivers/staging/rdma/amso1100/c2_mq.h
deleted file mode 100644
index 8e1b4d13409e..000000000000
--- a/drivers/staging/rdma/amso1100/c2_mq.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _C2_MQ_H_
-#define _C2_MQ_H_
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include "c2_wr.h"
-
-enum c2_shared_regs {
-
-	C2_SHARED_ARMED = 0x10,
-	C2_SHARED_NOTIFY = 0x18,
-	C2_SHARED_SHARED = 0x40,
-};
-
-struct c2_mq_shared {
-	u16 unused1;
-	u8 armed;
-	u8 notification_type;
-	u32 unused2;
-	u16 shared;
-	/* Pad to 64 bytes. */
-	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
-};
-
-enum c2_mq_type {
-	C2_MQ_HOST_TARGET = 1,
-	C2_MQ_ADAPTER_TARGET = 2,
-};
-
-/*
- * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
- * c2_user_mq_t (which is the same format) is for user-mode MQs...
- */
-#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
-struct c2_mq {
-	u32 magic;
-	union {
-		u8 *host;
-		u8 __iomem *adapter;
-	} msg_pool;
-	dma_addr_t host_dma;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-	u16 hint_count;
-	u16 priv;
-	struct c2_mq_shared __iomem *peer;
-	__be16 *shared;
-	dma_addr_t shared_dma;
-	u32 q_size;
-	u32 msg_size;
-	u32 index;
-	enum c2_mq_type type;
-};
-
-static __inline__ int c2_mq_empty(struct c2_mq *q)
-{
-	return q->priv == be16_to_cpu(*q->shared);
-}
-
-static __inline__ int c2_mq_full(struct c2_mq *q)
-{
-	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
-}
-
-void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
-void *c2_mq_alloc(struct c2_mq *q);
-void c2_mq_produce(struct c2_mq *q);
-void *c2_mq_consume(struct c2_mq *q);
-void c2_mq_free(struct c2_mq *q);
-void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
-void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
-			   u8 *pool_start, u16 __iomem *peer, u32 type);
-
-#endif				/* _C2_MQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_pd.c b/drivers/staging/rdma/amso1100/c2_pd.c
deleted file mode 100644
index f3e81dc357bb..000000000000
--- a/drivers/staging/rdma/amso1100/c2_pd.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-
-#include "c2.h"
-#include "c2_provider.h"
-
-int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
-{
-	u32 obj;
-	int ret = 0;
-
-	spin_lock(&c2dev->pd_table.lock);
-	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
-				 c2dev->pd_table.last);
-	if (obj >= c2dev->pd_table.max)
-		obj = find_first_zero_bit(c2dev->pd_table.table,
-					  c2dev->pd_table.max);
-	if (obj < c2dev->pd_table.max) {
-		pd->pd_id = obj;
-		__set_bit(obj, c2dev->pd_table.table);
-		c2dev->pd_table.last = obj+1;
-		if (c2dev->pd_table.last >= c2dev->pd_table.max)
-			c2dev->pd_table.last = 0;
-	} else
-		ret = -ENOMEM;
-	spin_unlock(&c2dev->pd_table.lock);
-	return ret;
-}
-
-void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
-{
-	spin_lock(&c2dev->pd_table.lock);
-	__clear_bit(pd->pd_id, c2dev->pd_table.table);
-	spin_unlock(&c2dev->pd_table.lock);
-}
-
-int c2_init_pd_table(struct c2_dev *c2dev)
-{
-
-	c2dev->pd_table.last = 0;
-	c2dev->pd_table.max = c2dev->props.max_pd;
-	spin_lock_init(&c2dev->pd_table.lock);
-	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
-					sizeof(long), GFP_KERNEL);
-	if (!c2dev->pd_table.table)
-		return -ENOMEM;
-	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
-	return 0;
-}
-
-void c2_cleanup_pd_table(struct c2_dev *c2dev)
-{
-	kfree(c2dev->pd_table.table);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
deleted file mode 100644
index de8d10e1bde3..000000000000
--- a/drivers/staging/rdma/amso1100/c2_provider.c
+++ /dev/null
@@ -1,862 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/if_arp.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include "c2.h"
-#include "c2_provider.h"
-#include "c2_user.h"
-
-static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			   struct ib_udata *uhw)
-{
-	struct c2_dev *c2dev = to_c2dev(ibdev);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	*props = c2dev->props;
-	return 0;
-}
-
-static int c2_query_port(struct ib_device *ibdev,
-			 u8 port, struct ib_port_attr *props)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	props->max_mtu = IB_MTU_4096;
-	props->lid = 0;
-	props->lmc = 0;
-	props->sm_lid = 0;
-	props->sm_sl = 0;
-	props->state = IB_PORT_ACTIVE;
-	props->phys_state = 0;
-	props->port_cap_flags =
-	    IB_PORT_CM_SUP |
-	    IB_PORT_REINIT_SUP |
-	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
-	props->gid_tbl_len = 1;
-	props->pkey_tbl_len = 1;
-	props->qkey_viol_cntr = 0;
-	props->active_width = 1;
-	props->active_speed = IB_SPEED_SDR;
-
-	return 0;
-}
-
-static int c2_query_pkey(struct ib_device *ibdev,
-			 u8 port, u16 index, u16 * pkey)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	*pkey = 0;
-	return 0;
-}
-
-static int c2_query_gid(struct ib_device *ibdev, u8 port,
-			int index, union ib_gid *gid)
-{
-	struct c2_dev *c2dev = to_c2dev(ibdev);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
-	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
-
-	return 0;
-}
-
-/* Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
-					     struct ib_udata *udata)
-{
-	struct c2_ucontext *context;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
-	return &context->ibucontext;
-}
-
-static int c2_dealloc_ucontext(struct ib_ucontext *context)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	kfree(context);
-	return 0;
-}
-
-static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
-				 struct ib_ucontext *context,
-				 struct ib_udata *udata)
-{
-	struct c2_pd *pd;
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
-	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
-	if (err) {
-		kfree(pd);
-		return ERR_PTR(err);
-	}
-
-	if (context) {
-		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
-			c2_pd_free(to_c2dev(ibdev), pd);
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
-		}
-	}
-
-	return &pd->ibpd;
-}
-
-static int c2_dealloc_pd(struct ib_pd *pd)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
-	kfree(pd);
-
-	return 0;
-}
-
-static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return ERR_PTR(-ENOSYS);
-}
-
-static int c2_ah_destroy(struct ib_ah *ah)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static void c2_add_ref(struct ib_qp *ibqp)
-{
-	struct c2_qp *qp;
-	BUG_ON(!ibqp);
-	qp = to_c2qp(ibqp);
-	atomic_inc(&qp->refcount);
-}
-
-static void c2_rem_ref(struct ib_qp *ibqp)
-{
-	struct c2_qp *qp;
-	BUG_ON(!ibqp);
-	qp = to_c2qp(ibqp);
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
-{
-	struct c2_dev* c2dev = to_c2dev(device);
-	struct c2_qp *qp;
-
-	qp = c2_find_qpn(c2dev, qpn);
-	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-		__func__, qp, qpn, device,
-		(qp?atomic_read(&qp->refcount):0));
-
-	return (qp?&qp->ibqp:NULL);
-}
-
-static struct ib_qp *c2_create_qp(struct ib_pd *pd,
-				  struct ib_qp_init_attr *init_attr,
-				  struct ib_udata *udata)
-{
-	struct c2_qp *qp;
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	if (init_attr->create_flags)
-		return ERR_PTR(-EINVAL);
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_RC:
-		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
-		if (!qp) {
-			pr_debug("%s: Unable to allocate QP\n", __func__);
-			return ERR_PTR(-ENOMEM);
-		}
-		spin_lock_init(&qp->lock);
-		if (pd->uobject) {
-			/* userspace specific */
-		}
-
-		err = c2_alloc_qp(to_c2dev(pd->device),
-				  to_c2pd(pd), init_attr, qp);
-
-		if (err && pd->uobject) {
-			/* userspace specific */
-		}
-
-		break;
-	default:
-		pr_debug("%s: Invalid QP type: %d\n", __func__,
-			init_attr->qp_type);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (err) {
-		kfree(qp);
-		return ERR_PTR(err);
-	}
-
-	return &qp->ibqp;
-}
-
-static int c2_destroy_qp(struct ib_qp *ib_qp)
-{
-	struct c2_qp *qp = to_c2qp(ib_qp);
-
-	pr_debug("%s:%u qp=%p,qp->state=%d\n",
-		__func__, __LINE__, ib_qp, qp->state);
-	c2_free_qp(to_c2dev(ib_qp->device), qp);
-	kfree(qp);
-	return 0;
-}
-
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
-				  const struct ib_cq_init_attr *attr,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct c2_cq *cq;
-	int err;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		pr_debug("%s: Unable to allocate CQ\n", __func__);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
-	if (err) {
-		pr_debug("%s: error initializing CQ\n", __func__);
-		kfree(cq);
-		return ERR_PTR(err);
-	}
-
-	return &cq->ibcq;
-}
-
-static int c2_destroy_cq(struct ib_cq *ib_cq)
-{
-	struct c2_cq *cq = to_c2cq(ib_cq);
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	c2_free_cq(to_c2dev(ib_cq->device), cq);
-	kfree(cq);
-
-	return 0;
-}
-
-static inline u32 c2_convert_access(int acc)
-{
-	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
-	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
-	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
-	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
-}
-
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct c2_mr *mr;
-	u64 *page_list;
-	const u32 total_len = 0xffffffff;	/* AMSO1100 limit */
-	int err, page_shift, pbl_depth, i;
-	u64 kva = 0;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/*
-	 * This is a map of all phy mem...use a 32k page_shift.
-	 */
-	page_shift = PAGE_SHIFT + 3;
-	pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
-
-	page_list = vmalloc(sizeof(u64) * pbl_depth);
-	if (!page_list) {
-		pr_debug("couldn't vmalloc page_list of size %zd\n",
-			(sizeof(u64) * pbl_depth));
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < pbl_depth; i++)
-		page_list[i] = (i << page_shift);
-
-	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		vfree(page_list);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	mr->pd = to_c2pd(pd);
-	mr->umem = NULL;
-	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
-		"*iova_start %llx, first pa %llx, last pa %llx\n",
-		__func__, page_shift, pbl_depth, total_len,
-		(unsigned long long) kva,
-	       	(unsigned long long) page_list[0],
-	       	(unsigned long long) page_list[pbl_depth-1]);
-	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
-					 BIT(page_shift), pbl_depth,
-					 total_len, 0, &kva,
-					 c2_convert_access(acc), mr);
-	vfree(page_list);
-	if (err) {
-		kfree(mr);
-		return ERR_PTR(err);
-	}
-
-	return &mr->ibmr;
-}
-
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				    u64 virt, int acc, struct ib_udata *udata)
-{
-	u64 *pages;
-	u64 kva = 0;
-	int shift, n, len;
-	int i, k, entry;
-	int err = 0;
-	struct scatterlist *sg;
-	struct c2_pd *c2pd = to_c2pd(pd);
-	struct c2_mr *c2mr;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
-	if (!c2mr)
-		return ERR_PTR(-ENOMEM);
-	c2mr->pd = c2pd;
-
-	c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
-	if (IS_ERR(c2mr->umem)) {
-		err = PTR_ERR(c2mr->umem);
-		kfree(c2mr);
-		return ERR_PTR(err);
-	}
-
-	shift = ffs(c2mr->umem->page_size) - 1;
-	n = c2mr->umem->nmap;
-
-	pages = kmalloc_array(n, sizeof(u64), GFP_KERNEL);
-	if (!pages) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	i = 0;
-	for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
-		len = sg_dma_len(sg) >> shift;
-		for (k = 0; k < len; ++k) {
-			pages[i++] =
-				sg_dma_address(sg) +
-				(c2mr->umem->page_size * k);
-		}
-	}
-
-	kva = virt;
-  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
-					 pages,
-					 c2mr->umem->page_size,
-					 i,
-					 length,
-					 ib_umem_offset(c2mr->umem),
-					 &kva,
-					 c2_convert_access(acc),
-					 c2mr);
-	kfree(pages);
-	if (err)
-		goto err;
-	return &c2mr->ibmr;
-
-err:
-	ib_umem_release(c2mr->umem);
-	kfree(c2mr);
-	return ERR_PTR(err);
-}
-
-static int c2_dereg_mr(struct ib_mr *ib_mr)
-{
-	struct c2_mr *mr = to_c2mr(ib_mr);
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
-	if (err)
-		pr_debug("c2_stag_dealloc failed: %d\n", err);
-	else {
-		if (mr->umem)
-			ib_umem_release(mr->umem);
-		kfree(mr);
-	}
-
-	return err;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%x\n", c2dev->props.hw_ver);
-}
-
-static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%x.%x.%x\n",
-		       (int) (c2dev->props.fw_ver >> 32),
-		       (int) (c2dev->props.fw_ver >> 16) & 0xffff,
-		       (int) (c2dev->props.fw_ver & 0xffff));
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "AMSO1100\n");
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *c2_dev_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_fw_ver,
-	&dev_attr_hca_type,
-	&dev_attr_board_id
-};
-
-static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-			int attr_mask, struct ib_udata *udata)
-{
-	int err;
-
-	err =
-	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
-			 attr_mask);
-
-	return err;
-}
-
-static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_process_mad(struct ib_device *ibdev,
-			  int mad_flags,
-			  u8 port_num,
-			  const struct ib_wc *in_wc,
-			  const struct ib_grh *in_grh,
-			  const struct ib_mad_hdr *in_mad,
-			  size_t in_mad_size,
-			  struct ib_mad_hdr *out_mad,
-			  size_t *out_mad_size,
-			  u16 *out_mad_pkey_index)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	return -ENOSYS;
-}
-
-static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/* Request a connection */
-	return c2_llp_connect(cm_id, iw_param);
-}
-
-static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	/* Accept the new connection */
-	return c2_llp_accept(cm_id, iw_param);
-}
-
-static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	return c2_llp_reject(cm_id, pdata, pdata_len);
-}
-
-static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
-{
-	int err;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	err = c2_llp_service_create(cm_id, backlog);
-	pr_debug("%s:%u err=%d\n",
-		__func__, __LINE__,
-		err);
-	return err;
-}
-
-static int c2_service_destroy(struct iw_cm_id *cm_id)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-
-	return c2_llp_service_destroy(cm_id);
-}
-
-static int c2_pseudo_up(struct net_device *netdev)
-{
-	struct in_device *ind;
-	struct c2_dev *c2dev = netdev->ml_priv;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	pr_debug("adding...\n");
-	for_ifa(ind) {
-#ifdef DEBUG
-		u8 *ip = (u8 *) & ifa->ifa_address;
-
-		pr_debug("%s: %d.%d.%d.%d\n",
-		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-
-	return 0;
-}
-
-static int c2_pseudo_down(struct net_device *netdev)
-{
-	struct in_device *ind;
-	struct c2_dev *c2dev = netdev->ml_priv;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	pr_debug("deleting...\n");
-	for_ifa(ind) {
-#ifdef DEBUG
-		u8 *ip = (u8 *) & ifa->ifa_address;
-
-		pr_debug("%s: %d.%d.%d.%d\n",
-		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
-#endif
-		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-
-	return 0;
-}
-
-static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-	kfree_skb(skb);
-	return NETDEV_TX_OK;
-}
-
-static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-
-	/* TODO: Tell rnic about new rmda interface mtu */
-	return 0;
-}
-
-static const struct net_device_ops c2_pseudo_netdev_ops = {
-	.ndo_open 		= c2_pseudo_up,
-	.ndo_stop 		= c2_pseudo_down,
-	.ndo_start_xmit 	= c2_pseudo_xmit_frame,
-	.ndo_change_mtu 	= c2_pseudo_change_mtu,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static void setup(struct net_device *netdev)
-{
-	netdev->netdev_ops = &c2_pseudo_netdev_ops;
-
-	netdev->watchdog_timeo = 0;
-	netdev->type = ARPHRD_ETHER;
-	netdev->mtu = 1500;
-	netdev->hard_header_len = ETH_HLEN;
-	netdev->addr_len = ETH_ALEN;
-	netdev->tx_queue_len = 0;
-	netdev->flags |= IFF_NOARP;
-}
-
-static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
-{
-	char name[IFNAMSIZ];
-	struct net_device *netdev;
-
-	/* change ethxxx to iwxxx */
-	strcpy(name, "iw");
-	strcat(name, &c2dev->netdev->name[3]);
-	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
-	if (!netdev) {
-		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-			__func__);
-		return NULL;
-	}
-
-	netdev->ml_priv = c2dev;
-
-	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
-
-	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
-
-	/* Print out the MAC address */
-	pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
-
-#if 0
-	/* Disable network packets */
-	netif_stop_queue(netdev);
-#endif
-	return netdev;
-}
-
-static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
-			     struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = c2_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
-	return 0;
-}
-
-int c2_register_device(struct c2_dev *dev)
-{
-	int ret = -ENOMEM;
-	int i;
-
-	/* Register pseudo network device */
-	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
-	if (!dev->pseudo_netdev)
-		goto out;
-
-	ret = register_netdev(dev->pseudo_netdev);
-	if (ret)
-		goto out_free_netdev;
-
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
-	dev->ibdev.owner = THIS_MODULE;
-	dev->ibdev.uverbs_cmd_mask =
-	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
-	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
-	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
-	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
-	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
-	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
-	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
-
-	dev->ibdev.node_type = RDMA_NODE_RNIC;
-	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
-	dev->ibdev.phys_port_cnt = 1;
-	dev->ibdev.num_comp_vectors = 1;
-	dev->ibdev.dma_device = &dev->pcidev->dev;
-	dev->ibdev.query_device = c2_query_device;
-	dev->ibdev.query_port = c2_query_port;
-	dev->ibdev.query_pkey = c2_query_pkey;
-	dev->ibdev.query_gid = c2_query_gid;
-	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
-	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
-	dev->ibdev.mmap = c2_mmap_uar;
-	dev->ibdev.alloc_pd = c2_alloc_pd;
-	dev->ibdev.dealloc_pd = c2_dealloc_pd;
-	dev->ibdev.create_ah = c2_ah_create;
-	dev->ibdev.destroy_ah = c2_ah_destroy;
-	dev->ibdev.create_qp = c2_create_qp;
-	dev->ibdev.modify_qp = c2_modify_qp;
-	dev->ibdev.destroy_qp = c2_destroy_qp;
-	dev->ibdev.create_cq = c2_create_cq;
-	dev->ibdev.destroy_cq = c2_destroy_cq;
-	dev->ibdev.poll_cq = c2_poll_cq;
-	dev->ibdev.get_dma_mr = c2_get_dma_mr;
-	dev->ibdev.reg_user_mr = c2_reg_user_mr;
-	dev->ibdev.dereg_mr = c2_dereg_mr;
-	dev->ibdev.get_port_immutable = c2_port_immutable;
-
-	dev->ibdev.alloc_fmr = NULL;
-	dev->ibdev.unmap_fmr = NULL;
-	dev->ibdev.dealloc_fmr = NULL;
-	dev->ibdev.map_phys_fmr = NULL;
-
-	dev->ibdev.attach_mcast = c2_multicast_attach;
-	dev->ibdev.detach_mcast = c2_multicast_detach;
-	dev->ibdev.process_mad = c2_process_mad;
-
-	dev->ibdev.req_notify_cq = c2_arm_cq;
-	dev->ibdev.post_send = c2_post_send;
-	dev->ibdev.post_recv = c2_post_receive;
-
-	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
-	if (dev->ibdev.iwcm == NULL) {
-		ret = -ENOMEM;
-		goto out_unregister_netdev;
-	}
-	dev->ibdev.iwcm->add_ref = c2_add_ref;
-	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
-	dev->ibdev.iwcm->get_qp = c2_get_qp;
-	dev->ibdev.iwcm->connect = c2_connect;
-	dev->ibdev.iwcm->accept = c2_accept;
-	dev->ibdev.iwcm->reject = c2_reject;
-	dev->ibdev.iwcm->create_listen = c2_service_create;
-	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
-
-	ret = ib_register_device(&dev->ibdev, NULL);
-	if (ret)
-		goto out_free_iwcm;
-
-	for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
-		ret = device_create_file(&dev->ibdev.dev,
-					       c2_dev_attributes[i]);
-		if (ret)
-			goto out_unregister_ibdev;
-	}
-	goto out;
-
-out_unregister_ibdev:
-	ib_unregister_device(&dev->ibdev);
-out_free_iwcm:
-	kfree(dev->ibdev.iwcm);
-out_unregister_netdev:
-	unregister_netdev(dev->pseudo_netdev);
-out_free_netdev:
-	free_netdev(dev->pseudo_netdev);
-out:
-	pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
-	return ret;
-}
-
-void c2_unregister_device(struct c2_dev *dev)
-{
-	pr_debug("%s:%u\n", __func__, __LINE__);
-	unregister_netdev(dev->pseudo_netdev);
-	free_netdev(dev->pseudo_netdev);
-	ib_unregister_device(&dev->ibdev);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_provider.h b/drivers/staging/rdma/amso1100/c2_provider.h
deleted file mode 100644
index bf189987711f..000000000000
--- a/drivers/staging/rdma/amso1100/c2_provider.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_PROVIDER_H
-#define C2_PROVIDER_H
-#include <linux/inetdevice.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-
-#include "c2_mq.h"
-#include <rdma/iw_cm.h>
-
-#define C2_MPT_FLAG_ATOMIC        (1 << 14)
-#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
-#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
-#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
-#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
-
-struct c2_buf_list {
-	void *buf;
-	DEFINE_DMA_UNMAP_ADDR(mapping);
-};
-
-
-/* The user context keeps track of objects allocated for a
- * particular user-mode client. */
-struct c2_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-struct c2_mtt;
-
-/* All objects associated with a PD are kept in the
- * associated user context if present.
- */
-struct c2_pd {
-	struct ib_pd ibpd;
-	u32 pd_id;
-};
-
-struct c2_mr {
-	struct ib_mr ibmr;
-	struct c2_pd *pd;
-	struct ib_umem *umem;
-};
-
-struct c2_av;
-
-enum c2_ah_type {
-	C2_AH_ON_HCA,
-	C2_AH_PCI_POOL,
-	C2_AH_KMALLOC
-};
-
-struct c2_ah {
-	struct ib_ah ibah;
-};
-
-struct c2_cq {
-	struct ib_cq ibcq;
-	spinlock_t lock;
-	atomic_t refcount;
-	int cqn;
-	int is_kernel;
-	wait_queue_head_t wait;
-
-	u32 adapter_handle;
-	struct c2_mq mq;
-};
-
-struct c2_wq {
-	spinlock_t lock;
-};
-struct iw_cm_id;
-struct c2_qp {
-	struct ib_qp ibqp;
-	struct iw_cm_id *cm_id;
-	spinlock_t lock;
-	atomic_t refcount;
-	wait_queue_head_t wait;
-	int qpn;
-
-	u32 adapter_handle;
-	u32 send_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u8 state;
-
-	struct c2_mq sq_mq;
-	struct c2_mq rq_mq;
-};
-
-struct c2_cr_query_attrs {
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-};
-
-static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct c2_pd, ibpd);
-}
-
-static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
-{
-	return container_of(ibucontext, struct c2_ucontext, ibucontext);
-}
-
-static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct c2_mr, ibmr);
-}
-
-
-static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct c2_ah, ibah);
-}
-
-static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct c2_cq, ibcq);
-}
-
-static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct c2_qp, ibqp);
-}
-
-static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
-{
-	struct in_device *ind;
-	int ret = 0;
-
-	ind = in_dev_get(netdev);
-	if (!ind)
-		return 0;
-
-	for_ifa(ind) {
-		if (ifa->ifa_address == addr) {
-			ret = 1;
-			break;
-		}
-	}
-	endfor_ifa(ind);
-	in_dev_put(ind);
-	return ret;
-}
-#endif				/* C2_PROVIDER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
deleted file mode 100644
index ca364dbe369c..000000000000
--- a/drivers/staging/rdma/amso1100/c2_qp.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/delay.h>
-#include <linux/gfp.h>
-
-#include "c2.h"
-#include "c2_vq.h"
-#include "c2_status.h"
-
-#define C2_MAX_ORD_PER_QP 128
-#define C2_MAX_IRD_PER_QP 128
-
-#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
-#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
-#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
-
-#define NO_SUPPORT -1
-static const u8 c2_opcode[] = {
-	[IB_WR_SEND] = C2_WR_TYPE_SEND,
-	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
-	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
-	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
-};
-
-static int to_c2_state(enum ib_qp_state ib_state)
-{
-	switch (ib_state) {
-	case IB_QPS_RESET:
-		return C2_QP_STATE_IDLE;
-	case IB_QPS_RTS:
-		return C2_QP_STATE_RTS;
-	case IB_QPS_SQD:
-		return C2_QP_STATE_CLOSING;
-	case IB_QPS_SQE:
-		return C2_QP_STATE_CLOSING;
-	case IB_QPS_ERR:
-		return C2_QP_STATE_ERROR;
-	default:
-		return -1;
-	}
-}
-
-static int to_ib_state(enum c2_qp_state c2_state)
-{
-	switch (c2_state) {
-	case C2_QP_STATE_IDLE:
-		return IB_QPS_RESET;
-	case C2_QP_STATE_CONNECTING:
-		return IB_QPS_RTR;
-	case C2_QP_STATE_RTS:
-		return IB_QPS_RTS;
-	case C2_QP_STATE_CLOSING:
-		return IB_QPS_SQD;
-	case C2_QP_STATE_ERROR:
-		return IB_QPS_ERR;
-	case C2_QP_STATE_TERMINATE:
-		return IB_QPS_SQE;
-	default:
-		return -1;
-	}
-}
-
-static const char *to_ib_state_str(int ib_state)
-{
-	static const char *state_str[] = {
-		"IB_QPS_RESET",
-		"IB_QPS_INIT",
-		"IB_QPS_RTR",
-		"IB_QPS_RTS",
-		"IB_QPS_SQD",
-		"IB_QPS_SQE",
-		"IB_QPS_ERR"
-	};
-	if (ib_state < IB_QPS_RESET ||
-	    ib_state > IB_QPS_ERR)
-		return "<invalid IB QP state>";
-
-	ib_state -= IB_QPS_RESET;
-	return state_str[ib_state];
-}
-
-void c2_set_qp_state(struct c2_qp *qp, int c2_state)
-{
-	int new_state = to_ib_state(c2_state);
-
-	pr_debug("%s: qp[%p] state modify %s --> %s\n",
-	       __func__,
-		qp,
-		to_ib_state_str(qp->state),
-		to_ib_state_str(new_state));
-	qp->state = new_state;
-}
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
-		 struct ib_qp_attr *attr, int attr_mask)
-{
-	struct c2wr_qp_modify_req wr;
-	struct c2wr_qp_modify_rep *reply;
-	struct c2_vq_req *vq_req;
-	unsigned long flags;
-	u8 next_state;
-	int err;
-
-	pr_debug("%s:%d qp=%p, %s --> %s\n",
-		__func__, __LINE__,
-		qp,
-		to_ib_state_str(qp->state),
-		to_ib_state_str(attr->qp_state));
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-	if (attr_mask & IB_QP_STATE) {
-		/* Ensure the state is valid */
-		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
-			err = -EINVAL;
-			goto bail0;
-		}
-
-		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
-
-		if (attr->qp_state == IB_QPS_ERR) {
-			spin_lock_irqsave(&qp->lock, flags);
-			if (qp->cm_id && qp->state == IB_QPS_RTS) {
-				pr_debug("Generating CLOSE event for QP-->ERR, "
-					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
-				/* Generate an CLOSE event */
-				vq_req->cm_id = qp->cm_id;
-				vq_req->event = IW_CM_EVENT_CLOSE;
-			}
-			spin_unlock_irqrestore(&qp->lock, flags);
-		}
-		next_state =  attr->qp_state;
-
-	} else if (attr_mask & IB_QP_CUR_STATE) {
-
-		if (attr->cur_qp_state != IB_QPS_RTR &&
-		    attr->cur_qp_state != IB_QPS_RTS &&
-		    attr->cur_qp_state != IB_QPS_SQD &&
-		    attr->cur_qp_state != IB_QPS_SQE) {
-			err = -EINVAL;
-			goto bail0;
-		} else
-			wr.next_qp_state =
-			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
-
-		next_state = attr->cur_qp_state;
-
-	} else {
-		err = 0;
-		goto bail0;
-	}
-
-	/* reference the request struct */
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-	if (!err)
-		qp->state = next_state;
-#ifdef DEBUG
-	else
-		pr_debug("%s: c2_errno=%d\n", __func__, err);
-#endif
-	/*
-	 * If we're going to error and generating the event here, then
-	 * we need to remove the reference because there will be no
-	 * close event generated by the adapter
-	*/
-	spin_lock_irqsave(&qp->lock, flags);
-	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
-		qp->cm_id->rem_ref(qp->cm_id);
-		qp->cm_id = NULL;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-
-	pr_debug("%s:%d qp=%p, cur_state=%s\n",
-		__func__, __LINE__,
-		qp,
-		to_ib_state_str(qp->state));
-	return err;
-}
-
-int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
-			  int ord, int ird)
-{
-	struct c2wr_qp_modify_req wr;
-	struct c2wr_qp_modify_rep *reply;
-	struct c2_vq_req *vq_req;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-	wr.ord = cpu_to_be32(ord);
-	wr.ird = cpu_to_be32(ird);
-	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
-
-	/* reference the request struct */
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail0;
-
-	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
-		vq_req->reply_msg;
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_qp_destroy_req wr;
-	struct c2wr_qp_destroy_rep *reply;
-	unsigned long flags;
-	int err;
-
-	/*
-	 * Allocate a verb request message
-	 */
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req) {
-		return -ENOMEM;
-	}
-
-	/*
-	 * Initialize the WR
-	 */
-	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.qp_handle = qp->adapter_handle;
-
-	/*
-	 * reference the request struct.  dereferenced in the int handler.
-	 */
-	vq_req_get(c2dev, vq_req);
-
-	spin_lock_irqsave(&qp->lock, flags);
-	if (qp->cm_id && qp->state == IB_QPS_RTS) {
-		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
-			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
-		/* Generate an CLOSE event */
-		vq_req->qp = qp;
-		vq_req->cm_id = qp->cm_id;
-		vq_req->event = IW_CM_EVENT_CLOSE;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	/*
-	 * Send WR to adapter
-	 */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	/*
-	 * Wait for reply from adapter
-	 */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	/*
-	 * Process reply
-	 */
-	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	spin_lock_irqsave(&qp->lock, flags);
-	if (qp->cm_id) {
-		qp->cm_id->rem_ref(qp->cm_id);
-		qp->cm_id = NULL;
-	}
-	spin_unlock_irqrestore(&qp->lock, flags);
-
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	int ret;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock_irq(&c2dev->qp_table.lock);
-
-	ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
-	if (ret >= 0)
-		qp->qpn = ret;
-
-	spin_unlock_irq(&c2dev->qp_table.lock);
-	idr_preload_end();
-	return ret < 0 ? ret : 0;
-}
-
-static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
-{
-	spin_lock_irq(&c2dev->qp_table.lock);
-	idr_remove(&c2dev->qp_table.idr, qpn);
-	spin_unlock_irq(&c2dev->qp_table.lock);
-}
-
-struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
-{
-	unsigned long flags;
-	struct c2_qp *qp;
-
-	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
-	qp = idr_find(&c2dev->qp_table.idr, qpn);
-	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
-	return qp;
-}
-
-int c2_alloc_qp(struct c2_dev *c2dev,
-		struct c2_pd *pd,
-		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
-{
-	struct c2wr_qp_create_req wr;
-	struct c2wr_qp_create_rep *reply;
-	struct c2_vq_req *vq_req;
-	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
-	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
-	unsigned long peer_pa;
-	u32 q_size, msg_size, mmap_size;
-	void __iomem *mmap;
-	int err;
-
-	err = c2_alloc_qpn(c2dev, qp);
-	if (err)
-		return err;
-	qp->ibqp.qp_num = qp->qpn;
-	qp->ibqp.qp_type = IB_QPT_RC;
-
-	/* Allocate the SQ and RQ shared pointers */
-	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					 &qp->sq_mq.shared_dma, GFP_KERNEL);
-	if (!qp->sq_mq.shared) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					 &qp->rq_mq.shared_dma, GFP_KERNEL);
-	if (!qp->rq_mq.shared) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	/* Allocate the verbs request */
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-
-	/* Initialize the work request */
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_QP_CREATE);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-	wr.sq_cq_handle = send_cq->adapter_handle;
-	wr.rq_cq_handle = recv_cq->adapter_handle;
-	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
-	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
-	wr.srq_handle = 0;
-	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
-			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
-	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
-	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
-	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
-	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
-	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
-	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
-	wr.pd_id = pd->pd_id;
-	wr.user_context = (unsigned long) qp;
-
-	vq_req_get(c2dev, vq_req);
-
-	/* Send the WR to the adapter */
-	err = vq_send_wr(c2dev, (union c2wr *) & wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail3;
-	}
-
-	/* Wait for the verb reply  */
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail3;
-	}
-
-	/* Process the reply */
-	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail3;
-	}
-
-	if ((err = c2_wr_get_result(reply)) != 0) {
-		goto bail4;
-	}
-
-	/* Fill in the kernel QP struct */
-	atomic_set(&qp->refcount, 1);
-	qp->adapter_handle = reply->qp_handle;
-	qp->state = IB_QPS_RESET;
-	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
-	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
-	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
-	init_waitqueue_head(&qp->wait);
-
-	/* Initialize the SQ MQ */
-	q_size = be32_to_cpu(reply->sq_depth);
-	msg_size = be32_to_cpu(reply->sq_msg_size);
-	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
-	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-	mmap = ioremap_nocache(peer_pa, mmap_size);
-	if (!mmap) {
-		err = -ENOMEM;
-		goto bail5;
-	}
-
-	c2_mq_req_init(&qp->sq_mq,
-		       be32_to_cpu(reply->sq_mq_index),
-		       q_size,
-		       msg_size,
-		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
-		       mmap,				/* peer */
-		       C2_MQ_ADAPTER_TARGET);
-
-	/* Initialize the RQ mq */
-	q_size = be32_to_cpu(reply->rq_depth);
-	msg_size = be32_to_cpu(reply->rq_msg_size);
-	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
-	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
-	mmap = ioremap_nocache(peer_pa, mmap_size);
-	if (!mmap) {
-		err = -ENOMEM;
-		goto bail6;
-	}
-
-	c2_mq_req_init(&qp->rq_mq,
-		       be32_to_cpu(reply->rq_mq_index),
-		       q_size,
-		       msg_size,
-		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
-		       mmap,				/* peer */
-		       C2_MQ_ADAPTER_TARGET);
-
-	vq_repbuf_free(c2dev, reply);
-	vq_req_free(c2dev, vq_req);
-
-	return 0;
-
-bail6:
-	iounmap(qp->sq_mq.peer);
-bail5:
-	destroy_qp(c2dev, qp);
-bail4:
-	vq_repbuf_free(c2dev, reply);
-bail3:
-	vq_req_free(c2dev, vq_req);
-bail2:
-	c2_free_mqsp(qp->rq_mq.shared);
-bail1:
-	c2_free_mqsp(qp->sq_mq.shared);
-bail0:
-	c2_free_qpn(c2dev, qp->qpn);
-	return err;
-}
-
-static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-	if (send_cq == recv_cq)
-		spin_lock_irq(&send_cq->lock);
-	else if (send_cq > recv_cq) {
-		spin_lock_irq(&send_cq->lock);
-		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
-	} else {
-		spin_lock_irq(&recv_cq->lock);
-		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
-	}
-}
-
-static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
-{
-	if (send_cq == recv_cq)
-		spin_unlock_irq(&send_cq->lock);
-	else if (send_cq > recv_cq) {
-		spin_unlock(&recv_cq->lock);
-		spin_unlock_irq(&send_cq->lock);
-	} else {
-		spin_unlock(&send_cq->lock);
-		spin_unlock_irq(&recv_cq->lock);
-	}
-}
-
-void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
-{
-	struct c2_cq *send_cq;
-	struct c2_cq *recv_cq;
-
-	send_cq = to_c2cq(qp->ibqp.send_cq);
-	recv_cq = to_c2cq(qp->ibqp.recv_cq);
-
-	/*
-	 * Lock CQs here, so that CQ polling code can do QP lookup
-	 * without taking a lock.
-	 */
-	c2_lock_cqs(send_cq, recv_cq);
-	c2_free_qpn(c2dev, qp->qpn);
-	c2_unlock_cqs(send_cq, recv_cq);
-
-	/*
-	 * Destroy qp in the rnic...
-	 */
-	destroy_qp(c2dev, qp);
-
-	/*
-	 * Mark any unreaped CQEs as null and void.
-	 */
-	c2_cq_clean(c2dev, qp, send_cq->cqn);
-	if (send_cq != recv_cq)
-		c2_cq_clean(c2dev, qp, recv_cq->cqn);
-	/*
-	 * Unmap the MQs and return the shared pointers
-	 * to the message pool.
-	 */
-	iounmap(qp->sq_mq.peer);
-	iounmap(qp->rq_mq.peer);
-	c2_free_mqsp(qp->sq_mq.shared);
-	c2_free_mqsp(qp->rq_mq.shared);
-
-	atomic_dec(&qp->refcount);
-	wait_event(qp->wait, !atomic_read(&qp->refcount));
-}
-
-/*
- * Function: move_sgl
- *
- * Description:
- * Move an SGL from the user's work request struct into a CCIL Work Request
- * message, swapping to WR byte order and ensure the total length doesn't
- * overflow.
- *
- * IN:
- * dst		- ptr to CCIL Work Request message SGL memory.
- * src		- ptr to the consumers SGL memory.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int
-move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
-	 u8 * actual_count)
-{
-	u32 tot = 0;		/* running total */
-	u8 acount = 0;		/* running total non-0 len sge's */
-
-	while (count > 0) {
-		/*
-		 * If the addition of this SGE causes the
-		 * total SGL length to exceed 2^32-1, then
-		 * fail-n-bail.
-		 *
-		 * If the current total plus the next element length
-		 * wraps, then it will go negative and be less than the
-		 * current total...
-		 */
-		if ((tot + src->length) < tot) {
-			return -EINVAL;
-		}
-		/*
-		 * Bug: 1456 (as well as 1498 & 1643)
-		 * Skip over any sge's supplied with len=0
-		 */
-		if (src->length) {
-			tot += src->length;
-			dst->stag = cpu_to_be32(src->lkey);
-			dst->to = cpu_to_be64(src->addr);
-			dst->length = cpu_to_be32(src->length);
-			dst++;
-			acount++;
-		}
-		src++;
-		count--;
-	}
-
-	if (acount == 0) {
-		/*
-		 * Bug: 1476 (as well as 1498, 1456 and 1643)
-		 * Setup the SGL in the WR to make it easier for the RNIC.
-		 * This way, the FW doesn't have to deal with special cases.
-		 * Setting length=0 should be sufficient.
-		 */
-		dst->stag = 0;
-		dst->to = 0;
-		dst->length = 0;
-	}
-
-	*p_len = tot;
-	*actual_count = acount;
-	return 0;
-}
-
-/*
- * Function: c2_activity (private function)
- *
- * Description:
- * Post an mq index to the host->adapter activity fifo.
- *
- * IN:
- * c2dev	- ptr to c2dev structure
- * mq_index	- mq index to post
- * shared	- value most recently written to shared
- *
- * OUT:
- *
- * Return:
- * none
- */
-static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
-{
-	/*
-	 * First read the register to see if the FIFO is full, and if so,
-	 * spin until it's not.  This isn't perfect -- there is no
-	 * synchronization among the clients of the register, but in
-	 * practice it prevents multiple CPU from hammering the bus
-	 * with PCI RETRY. Note that when this does happen, the card
-	 * cannot get on the bus and the card and system hang in a
-	 * deadlock -- thus the need for this code. [TOT]
-	 */
-	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
-		udelay(10);
-
-	__raw_writel(C2_HINT_MAKE(mq_index, shared),
-		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
-}
-
-/*
- * Function: qp_wr_post
- *
- * Description:
- * This in-line function allocates a MQ msg, then moves the host-copy of
- * the completed WR into msg.  Then it posts the message.
- *
- * IN:
- * q		- ptr to user MQ.
- * wr		- ptr to host-copy of the WR.
- * qp		- ptr to user qp
- * size		- Number of bytes to post.  Assumed to be divisible by 4.
- *
- * OUT: none
- *
- * Return:
- * CCIL status codes.
- */
-static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
-{
-	union c2wr *msg;
-
-	msg = c2_mq_alloc(q);
-	if (msg == NULL) {
-		return -EINVAL;
-	}
-#ifdef CCMSGMAGIC
-	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
-#endif
-
-	/*
-	 * Since all header fields in the WR are the same as the
-	 * CQE, set the following so the adapter need not.
-	 */
-	c2_wr_set_result(wr, CCERR_PENDING);
-
-	/*
-	 * Copy the wr down to the adapter
-	 */
-	memcpy((void *) msg, (void *) wr, size);
-
-	c2_mq_produce(q);
-	return 0;
-}
-
-
-int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-		 struct ib_send_wr **bad_wr)
-{
-	struct c2_dev *c2dev = to_c2dev(ibqp->device);
-	struct c2_qp *qp = to_c2qp(ibqp);
-	union c2wr wr;
-	unsigned long lock_flags;
-	int err = 0;
-
-	u32 flags;
-	u32 tot_len;
-	u8 actual_sge_count;
-	u32 msg_size;
-
-	if (qp->state > IB_QPS_RTS) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	while (ib_wr) {
-
-		flags = 0;
-		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-			flags |= SQ_SIGNALED;
-		}
-
-		switch (ib_wr->opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_INV:
-			if (ib_wr->opcode == IB_WR_SEND) {
-				if (ib_wr->send_flags & IB_SEND_SOLICITED)
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-				else
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-				wr.sqwr.send.remote_stag = 0;
-			} else {
-				if (ib_wr->send_flags & IB_SEND_SOLICITED)
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
-				else
-					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
-				wr.sqwr.send.remote_stag =
-					cpu_to_be32(ib_wr->ex.invalidate_rkey);
-			}
-
-			msg_size = sizeof(struct c2wr_send_req) +
-				sizeof(struct c2_data_addr) * ib_wr->num_sge;
-			if (ib_wr->num_sge > qp->send_sgl_depth) {
-				err = -EINVAL;
-				break;
-			}
-			if (ib_wr->send_flags & IB_SEND_FENCE) {
-				flags |= SQ_READ_FENCE;
-			}
-			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
-				       ib_wr->sg_list,
-				       ib_wr->num_sge,
-				       &tot_len, &actual_sge_count);
-			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
-			c2_wr_set_sge_count(&wr, actual_sge_count);
-			break;
-		case IB_WR_RDMA_WRITE:
-			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
-			msg_size = sizeof(struct c2wr_rdma_write_req) +
-			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
-			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
-				err = -EINVAL;
-				break;
-			}
-			if (ib_wr->send_flags & IB_SEND_FENCE) {
-				flags |= SQ_READ_FENCE;
-			}
-			wr.sqwr.rdma_write.remote_stag =
-			    cpu_to_be32(rdma_wr(ib_wr)->rkey);
-			wr.sqwr.rdma_write.remote_to =
-			    cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-			err = move_sgl((struct c2_data_addr *)
-				       & (wr.sqwr.rdma_write.data),
-				       ib_wr->sg_list,
-				       ib_wr->num_sge,
-				       &tot_len, &actual_sge_count);
-			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
-			c2_wr_set_sge_count(&wr, actual_sge_count);
-			break;
-		case IB_WR_RDMA_READ:
-			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
-			msg_size = sizeof(struct c2wr_rdma_read_req);
-
-			/* IWarp only suppots 1 sge for RDMA reads */
-			if (ib_wr->num_sge > 1) {
-				err = -EINVAL;
-				break;
-			}
-
-			/*
-			 * Move the local and remote stag/to/len into the WR.
-			 */
-			wr.sqwr.rdma_read.local_stag =
-			    cpu_to_be32(ib_wr->sg_list->lkey);
-			wr.sqwr.rdma_read.local_to =
-			    cpu_to_be64(ib_wr->sg_list->addr);
-			wr.sqwr.rdma_read.remote_stag =
-			    cpu_to_be32(rdma_wr(ib_wr)->rkey);
-			wr.sqwr.rdma_read.remote_to =
-			    cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
-			wr.sqwr.rdma_read.length =
-			    cpu_to_be32(ib_wr->sg_list->length);
-			break;
-		default:
-			/* error */
-			msg_size = 0;
-			err = -EINVAL;
-			break;
-		}
-
-		/*
-		 * If we had an error on the last wr build, then
-		 * break out.  Possible errors include bogus WR
-		 * type, and a bogus SGL length...
-		 */
-		if (err) {
-			break;
-		}
-
-		/*
-		 * Store flags
-		 */
-		c2_wr_set_flags(&wr, flags);
-
-		/*
-		 * Post the puppy!
-		 */
-		spin_lock_irqsave(&qp->lock, lock_flags);
-		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
-		if (err) {
-			spin_unlock_irqrestore(&qp->lock, lock_flags);
-			break;
-		}
-
-		/*
-		 * Enqueue mq index to activity FIFO.
-		 */
-		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
-		spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-		ib_wr = ib_wr->next;
-	}
-
-out:
-	if (err)
-		*bad_wr = ib_wr;
-	return err;
-}
-
-int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-		    struct ib_recv_wr **bad_wr)
-{
-	struct c2_dev *c2dev = to_c2dev(ibqp->device);
-	struct c2_qp *qp = to_c2qp(ibqp);
-	union c2wr wr;
-	unsigned long lock_flags;
-	int err = 0;
-
-	if (qp->state > IB_QPS_RTS) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Try and post each work request
-	 */
-	while (ib_wr) {
-		u32 tot_len;
-		u8 actual_sge_count;
-
-		if (ib_wr->num_sge > qp->recv_sgl_depth) {
-			err = -EINVAL;
-			break;
-		}
-
-		/*
-		 * Create local host-copy of the WR
-		 */
-		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
-		c2_wr_set_id(&wr, CCWR_RECV);
-		c2_wr_set_flags(&wr, 0);
-
-		/* sge_count is limited to eight bits. */
-		BUG_ON(ib_wr->num_sge >= 256);
-		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
-			       ib_wr->sg_list,
-			       ib_wr->num_sge, &tot_len, &actual_sge_count);
-		c2_wr_set_sge_count(&wr, actual_sge_count);
-
-		/*
-		 * If we had an error on the last wr build, then
-		 * break out.  Possible errors include bogus WR
-		 * type, and a bogus SGL length...
-		 */
-		if (err) {
-			break;
-		}
-
-		spin_lock_irqsave(&qp->lock, lock_flags);
-		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
-		if (err) {
-			spin_unlock_irqrestore(&qp->lock, lock_flags);
-			break;
-		}
-
-		/*
-		 * Enqueue mq index to activity FIFO
-		 */
-		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
-		spin_unlock_irqrestore(&qp->lock, lock_flags);
-
-		ib_wr = ib_wr->next;
-	}
-
-out:
-	if (err)
-		*bad_wr = ib_wr;
-	return err;
-}
-
-void c2_init_qp_table(struct c2_dev *c2dev)
-{
-	spin_lock_init(&c2dev->qp_table.lock);
-	idr_init(&c2dev->qp_table.idr);
-}
-
-void c2_cleanup_qp_table(struct c2_dev *c2dev)
-{
-	idr_destroy(&c2dev->qp_table.idr);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_rnic.c b/drivers/staging/rdma/amso1100/c2_rnic.c
deleted file mode 100644
index 5e65c6d07ca4..000000000000
--- a/drivers/staging/rdma/amso1100/c2_rnic.c
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/inet.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-
-#include <linux/route.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include "c2.h"
-#include "c2_vq.h"
-
-/* Device capabilities */
-#define C2_MIN_PAGESIZE  1024
-
-#define C2_MAX_MRS       32768
-#define C2_MAX_QPS       16000
-#define C2_MAX_WQE_SZ    256
-#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
-#define C2_MAX_SGES      4
-#define C2_MAX_SGE_RD    1
-#define C2_MAX_CQS       32768
-#define C2_MAX_CQES      4096
-#define C2_MAX_PDS       16384
-
-/*
- * Send the adapter INIT message to the amso1100
- */
-static int c2_adapter_init(struct c2_dev *c2dev)
-{
-	struct c2wr_init_req wr;
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_INIT);
-	wr.hdr.context = 0;
-	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
-	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
-	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
-	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
-	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
-	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
-
-	/* Post the init message */
-	return vq_send_wr(c2dev, (union c2wr *) & wr);
-}
-
-/*
- * Send the adapter TERM message to the amso1100
- */
-static void c2_adapter_term(struct c2_dev *c2dev)
-{
-	struct c2wr_init_req wr;
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_TERM);
-	wr.hdr.context = 0;
-
-	/* Post the init message */
-	vq_send_wr(c2dev, (union c2wr *) & wr);
-	c2dev->init = 0;
-
-	return;
-}
-
-/*
- * Query the adapter
- */
-static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_query_req wr;
-	struct c2wr_rnic_query_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
-	wr.hdr.context = (unsigned long) vq_req;
-	wr.rnic_handle = c2dev->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply)
-		err = -ENOMEM;
-	else
-		err = c2_errno(reply);
-	if (err)
-		goto bail2;
-
-	props->fw_ver =
-		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
-		((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
-		(be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
-	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
-	props->max_mr_size         = 0xFFFFFFFF;
-	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
-	props->vendor_id           = be32_to_cpu(reply->vendor_id);
-	props->vendor_part_id      = be32_to_cpu(reply->part_number);
-	props->hw_ver              = be32_to_cpu(reply->hw_version);
-	props->max_qp              = be32_to_cpu(reply->max_qps);
-	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
-	props->device_cap_flags    = c2dev->device_cap_flags;
-	props->max_sge             = C2_MAX_SGES;
-	props->max_sge_rd          = C2_MAX_SGE_RD;
-	props->max_cq              = be32_to_cpu(reply->max_cqs);
-	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
-	props->max_mr              = be32_to_cpu(reply->max_mrs);
-	props->max_pd              = be32_to_cpu(reply->max_pds);
-	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
-	props->max_ee_rd_atom      = 0;
-	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
-	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
-	props->max_ee_init_rd_atom = 0;
-	props->atomic_cap          = IB_ATOMIC_NONE;
-	props->max_ee              = 0;
-	props->max_rdd             = 0;
-	props->max_mw              = be32_to_cpu(reply->max_mws);
-	props->max_raw_ipv6_qp     = 0;
-	props->max_raw_ethy_qp     = 0;
-	props->max_mcast_grp       = 0;
-	props->max_mcast_qp_attach = 0;
-	props->max_total_mcast_qp_attach = 0;
-	props->max_ah              = 0;
-	props->max_fmr             = 0;
-	props->max_map_per_fmr     = 0;
-	props->max_srq             = 0;
-	props->max_srq_wr          = 0;
-	props->max_srq_sge         = 0;
-	props->max_pkeys           = 0;
-	props->local_ca_ack_delay  = 0;
-
- bail2:
-	vq_repbuf_free(c2dev, reply);
-
- bail1:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Add an IP address to the RNIC interface
- */
-int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_setconfig_req *wr;
-	struct c2wr_rnic_setconfig_rep *reply;
-	struct c2_netaddr netaddr;
-	int err, len;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	len = sizeof(struct c2_netaddr);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
-
-	netaddr.ip_addr = inaddr;
-	netaddr.netmask = inmask;
-	netaddr.mtu = 0;
-
-	memcpy(wr->data, &netaddr, len);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Delete an IP address from the RNIC interface
- */
-int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
-{
-	struct c2_vq_req *vq_req;
-	struct c2wr_rnic_setconfig_req *wr;
-	struct c2wr_rnic_setconfig_rep *reply;
-	struct c2_netaddr netaddr;
-	int err, len;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (!vq_req)
-		return -ENOMEM;
-
-	len = sizeof(struct c2_netaddr);
-	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
-	if (!wr) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
-	wr->hdr.context = (unsigned long) vq_req;
-	wr->rnic_handle = c2dev->adapter_handle;
-	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
-
-	netaddr.ip_addr = inaddr;
-	netaddr.netmask = inmask;
-	netaddr.mtu = 0;
-
-	memcpy(wr->data, &netaddr, len);
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, (union c2wr *) wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail1;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err)
-		goto bail1;
-
-	reply =
-	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	err = c2_errno(reply);
-	vq_repbuf_free(c2dev, reply);
-
-bail1:
-	kfree(wr);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Open a single RNIC instance to use with all
- * low level openib calls
- */
-static int c2_rnic_open(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *vq_req;
-	union c2wr wr;
-	struct c2wr_rnic_open_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		return -ENOMEM;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
-	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
-	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
-	wr.rnic_open.req.port_num = cpu_to_be16(0);
-	wr.rnic_open.req.user_context = (unsigned long) c2dev;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	if ((err = c2_errno(reply)) != 0) {
-		goto bail1;
-	}
-
-	c2dev->adapter_handle = reply->rnic_handle;
-
-bail1:
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Close the RNIC instance
- */
-static int c2_rnic_close(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *vq_req;
-	union c2wr wr;
-	struct c2wr_rnic_close_rep *reply;
-	int err;
-
-	vq_req = vq_req_alloc(c2dev);
-	if (vq_req == NULL) {
-		return -ENOMEM;
-	}
-
-	memset(&wr, 0, sizeof(wr));
-	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
-	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
-	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
-
-	vq_req_get(c2dev, vq_req);
-
-	err = vq_send_wr(c2dev, &wr);
-	if (err) {
-		vq_req_put(c2dev, vq_req);
-		goto bail0;
-	}
-
-	err = vq_wait_for_reply(c2dev, vq_req);
-	if (err) {
-		goto bail0;
-	}
-
-	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
-	if (!reply) {
-		err = -ENOMEM;
-		goto bail0;
-	}
-
-	if ((err = c2_errno(reply)) != 0) {
-		goto bail1;
-	}
-
-	c2dev->adapter_handle = 0;
-
-bail1:
-	vq_repbuf_free(c2dev, reply);
-bail0:
-	vq_req_free(c2dev, vq_req);
-	return err;
-}
-
-/*
- * Called by c2_probe to initialize the RNIC. This principally
- * involves initializing the various limits and resource pools that
- * comprise the RNIC instance.
- */
-int c2_rnic_init(struct c2_dev *c2dev)
-{
-	int err;
-	u32 qsize, msgsize;
-	void *q1_pages;
-	void *q2_pages;
-	void __iomem *mmio_regs;
-
-	/* Device capabilities */
-	c2dev->device_cap_flags =
-	    (IB_DEVICE_RESIZE_MAX_WR |
-	     IB_DEVICE_CURR_QP_STATE_MOD |
-	     IB_DEVICE_SYS_IMAGE_GUID |
-	     IB_DEVICE_LOCAL_DMA_LKEY |
-	     IB_DEVICE_MEM_WINDOW);
-
-	/* Allocate the qptr_array */
-	c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
-	if (!c2dev->qptr_array) {
-		return -ENOMEM;
-	}
-
-	/* Initialize the qptr_array */
-	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
-	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
-	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
-
-	/* Initialize data structures */
-	init_waitqueue_head(&c2dev->req_vq_wo);
-	spin_lock_init(&c2dev->vqlock);
-	spin_lock_init(&c2dev->lock);
-
-	/* Allocate MQ shared pointer pool for kernel clients. User
-	 * mode client pools are hung off the user context
-	 */
-	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
-	if (err) {
-		goto bail0;
-	}
-
-	/* Allocate shared pointers for Q0, Q1, and Q2 from
-	 * the shared pointer pool.
-	 */
-
-	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->hint_count_dma,
-					     GFP_KERNEL);
-	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->req_vq.shared_dma,
-					     GFP_KERNEL);
-	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					     &c2dev->rep_vq.shared_dma,
-					     GFP_KERNEL);
-	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
-					  &c2dev->aeq.shared_dma, GFP_KERNEL);
-	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
-	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-
-	mmio_regs = c2dev->kva;
-	/* Initialize the Verbs Request Queue */
-	c2_mq_req_init(&c2dev->req_vq, 0,
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
-		       C2_MQ_ADAPTER_TARGET);
-
-	/* Initialize the Verbs Reply Queue */
-	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
-	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
-	q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-				      &c2dev->rep_vq.host_dma, GFP_KERNEL);
-	if (!q1_pages) {
-		err = -ENOMEM;
-		goto bail1;
-	}
-	dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-	pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
-		 (unsigned long long) c2dev->rep_vq.host_dma);
-	c2_mq_rep_init(&c2dev->rep_vq,
-		   1,
-		   qsize,
-		   msgsize,
-		   q1_pages,
-		   mmio_regs +
-		   be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
-		   C2_MQ_HOST_TARGET);
-
-	/* Initialize the Asynchronus Event Queue */
-	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
-	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
-	q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
-				      &c2dev->aeq.host_dma, GFP_KERNEL);
-	if (!q2_pages) {
-		err = -ENOMEM;
-		goto bail2;
-	}
-	dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-	pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
-		 (unsigned long long) c2dev->aeq.host_dma);
-	c2_mq_rep_init(&c2dev->aeq,
-		       2,
-		       qsize,
-		       msgsize,
-		       q2_pages,
-		       mmio_regs +
-		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
-		       C2_MQ_HOST_TARGET);
-
-	/* Initialize the verbs request allocator */
-	err = vq_init(c2dev);
-	if (err)
-		goto bail3;
-
-	/* Enable interrupts on the adapter */
-	writel(0, c2dev->regs + C2_IDIS);
-
-	/* create the WR init message */
-	err = c2_adapter_init(c2dev);
-	if (err)
-		goto bail4;
-	c2dev->init++;
-
-	/* open an adapter instance */
-	err = c2_rnic_open(c2dev);
-	if (err)
-		goto bail4;
-
-	/* Initialize cached the adapter limits */
-	err = c2_rnic_query(c2dev, &c2dev->props);
-	if (err)
-		goto bail5;
-
-	/* Initialize the PD pool */
-	err = c2_init_pd_table(c2dev);
-	if (err)
-		goto bail5;
-
-	/* Initialize the QP pool */
-	c2_init_qp_table(c2dev);
-	return 0;
-
-bail5:
-	c2_rnic_close(c2dev);
-bail4:
-	vq_term(c2dev);
-bail3:
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
-			  q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
-bail2:
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-			  q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
-bail1:
-	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-bail0:
-	vfree(c2dev->qptr_array);
-
-	return err;
-}
-
-/*
- * Called by c2_remove to cleanup the RNIC resources.
- */
-void c2_rnic_term(struct c2_dev *c2dev)
-{
-
-	/* Close the open adapter instance */
-	c2_rnic_close(c2dev);
-
-	/* Send the TERM message to the adapter */
-	c2_adapter_term(c2dev);
-
-	/* Disable interrupts on the adapter */
-	writel(1, c2dev->regs + C2_IDIS);
-
-	/* Free the QP pool */
-	c2_cleanup_qp_table(c2dev);
-
-	/* Free the PD pool */
-	c2_cleanup_pd_table(c2dev);
-
-	/* Free the verbs request allocator */
-	vq_term(c2dev);
-
-	/* Free the asynchronus event queue */
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->aeq.q_size * c2dev->aeq.msg_size,
-			  c2dev->aeq.msg_pool.host,
-			  dma_unmap_addr(&c2dev->aeq, mapping));
-
-	/* Free the verbs reply queue */
-	dma_free_coherent(&c2dev->pcidev->dev,
-			  c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
-			  c2dev->rep_vq.msg_pool.host,
-			  dma_unmap_addr(&c2dev->rep_vq, mapping));
-
-	/* Free the MQ shared pointer pool */
-	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
-
-	/* Free the qptr_array */
-	vfree(c2dev->qptr_array);
-
-	return;
-}
diff --git a/drivers/staging/rdma/amso1100/c2_status.h b/drivers/staging/rdma/amso1100/c2_status.h
deleted file mode 100644
index 6ee4aa92d875..000000000000
--- a/drivers/staging/rdma/amso1100/c2_status.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef	_C2_STATUS_H_
-#define _C2_STATUS_H_
-
-/*
- * Verbs Status Codes
- */
-enum c2_status {
-	C2_OK = 0,		/* This must be zero */
-	CCERR_INSUFFICIENT_RESOURCES = 1,
-	CCERR_INVALID_MODIFIER = 2,
-	CCERR_INVALID_MODE = 3,
-	CCERR_IN_USE = 4,
-	CCERR_INVALID_RNIC = 5,
-	CCERR_INTERRUPTED_OPERATION = 6,
-	CCERR_INVALID_EH = 7,
-	CCERR_INVALID_CQ = 8,
-	CCERR_CQ_EMPTY = 9,
-	CCERR_NOT_IMPLEMENTED = 10,
-	CCERR_CQ_DEPTH_TOO_SMALL = 11,
-	CCERR_PD_IN_USE = 12,
-	CCERR_INVALID_PD = 13,
-	CCERR_INVALID_SRQ = 14,
-	CCERR_INVALID_ADDRESS = 15,
-	CCERR_INVALID_NETMASK = 16,
-	CCERR_INVALID_QP = 17,
-	CCERR_INVALID_QP_STATE = 18,
-	CCERR_TOO_MANY_WRS_POSTED = 19,
-	CCERR_INVALID_WR_TYPE = 20,
-	CCERR_INVALID_SGL_LENGTH = 21,
-	CCERR_INVALID_SQ_DEPTH = 22,
-	CCERR_INVALID_RQ_DEPTH = 23,
-	CCERR_INVALID_ORD = 24,
-	CCERR_INVALID_IRD = 25,
-	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
-	CCERR_INVALID_STAG = 27,
-	CCERR_QP_IN_USE = 28,
-	CCERR_OUTSTANDING_WRS = 29,
-	CCERR_STAG_IN_USE = 30,
-	CCERR_INVALID_STAG_INDEX = 31,
-	CCERR_INVALID_SGL_FORMAT = 32,
-	CCERR_ADAPTER_TIMEOUT = 33,
-	CCERR_INVALID_CQ_DEPTH = 34,
-	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
-	CCERR_INVALID_EP = 36,
-	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
-	CCERR_FLUSHED = 38,
-	CCERR_INVALID_WQE = 39,
-	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
-	CCERR_REMOTE_TERMINATION_ERROR = 41,
-	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
-	CCERR_ACCESS_VIOLATION = 43,
-	CCERR_INVALID_PD_ID = 44,
-	CCERR_WRAP_ERROR = 45,
-	CCERR_INV_STAG_ACCESS_ERROR = 46,
-	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
-	CCERR_QP_NOT_PRIVILEGED = 48,
-	CCERR_STAG_STATE_NOT_INVALID = 49,
-	CCERR_INVALID_PAGE_SIZE = 50,
-	CCERR_INVALID_BUFFER_SIZE = 51,
-	CCERR_INVALID_PBE = 52,
-	CCERR_INVALID_FBO = 53,
-	CCERR_INVALID_LENGTH = 54,
-	CCERR_INVALID_ACCESS_RIGHTS = 55,
-	CCERR_PBL_TOO_BIG = 56,
-	CCERR_INVALID_VA = 57,
-	CCERR_INVALID_REGION = 58,
-	CCERR_INVALID_WINDOW = 59,
-	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
-	CCERR_INVALID_QP_ID = 61,
-	CCERR_ADDR_IN_USE = 62,
-	CCERR_ADDR_NOT_AVAIL = 63,
-	CCERR_NET_DOWN = 64,
-	CCERR_NET_UNREACHABLE = 65,
-	CCERR_CONN_ABORTED = 66,
-	CCERR_CONN_RESET = 67,
-	CCERR_NO_BUFS = 68,
-	CCERR_CONN_TIMEDOUT = 69,
-	CCERR_CONN_REFUSED = 70,
-	CCERR_HOST_UNREACHABLE = 71,
-	CCERR_INVALID_SEND_SGL_DEPTH = 72,
-	CCERR_INVALID_RECV_SGL_DEPTH = 73,
-	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
-	CCERR_INSUFFICIENT_PRIVILEGES = 75,
-	CCERR_STACK_ERROR = 76,
-	CCERR_INVALID_VERSION = 77,
-	CCERR_INVALID_MTU = 78,
-	CCERR_INVALID_IMAGE = 79,
-	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
-	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
-	CCERR_FAILED_WRITE = 100,
-	CCERR_FAILED_ERASE = 101,
-	CCERR_FAILED_VERIFICATION = 102,
-	CCERR_NOT_FOUND = 103,
-
-};
-
-/*
- * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
- */
-enum c2_connect_status {
-	C2_CONN_STATUS_SUCCESS = C2_OK,
-	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
-	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
-	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
-	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
-	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
-	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
-	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
-	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
-	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
-	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
-};
-
-/*
- * Flash programming status codes.
- */
-enum c2_flash_status {
-	C2_FLASH_STATUS_SUCCESS = 0x0000,
-	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
-	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
-	C2_FLASH_STATUS_ECLBS = 0x0400,
-	C2_FLASH_STATUS_PSLBS = 0x0800,
-	C2_FLASH_STATUS_VPENS = 0x1000,
-};
-
-#endif				/* _C2_STATUS_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_user.h b/drivers/staging/rdma/amso1100/c2_user.h
deleted file mode 100644
index 7e9e7ad65467..000000000000
--- a/drivers/staging/rdma/amso1100/c2_user.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef C2_USER_H
-#define C2_USER_H
-
-#include <linux/types.h>
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct c2_alloc_ucontext_resp {
-	__u32 qp_tab_size;
-	__u32 uarc_size;
-};
-
-struct c2_alloc_pd_resp {
-	__u32 pdn;
-	__u32 reserved;
-};
-
-struct c2_create_cq {
-	__u32 lkey;
-	__u32 pdn;
-	__u64 arm_db_page;
-	__u64 set_db_page;
-	__u32 arm_db_index;
-	__u32 set_db_index;
-};
-
-struct c2_create_cq_resp {
-	__u32 cqn;
-	__u32 reserved;
-};
-
-struct c2_create_qp {
-	__u32 lkey;
-	__u32 reserved;
-	__u64 sq_db_page;
-	__u64 rq_db_page;
-	__u32 sq_db_index;
-	__u32 rq_db_index;
-};
-
-#endif				/* C2_USER_H */
diff --git a/drivers/staging/rdma/amso1100/c2_vq.c b/drivers/staging/rdma/amso1100/c2_vq.c
deleted file mode 100644
index 2ec716fb2edb..000000000000
--- a/drivers/staging/rdma/amso1100/c2_vq.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-
-#include "c2_vq.h"
-#include "c2_provider.h"
-
-/*
- * Verbs Request Objects:
- *
- * VQ Request Objects are allocated by the kernel verbs handlers.
- * They contain a wait object, a refcnt, an atomic bool indicating that the
- * adapter has replied, and a copy of the verb reply work request.
- * A pointer to the VQ Request Object is passed down in the context
- * field of the work request message, and reflected back by the adapter
- * in the verbs reply message.  The function handle_vq() in the interrupt
- * path will use this pointer to:
- * 	1) append a copy of the verbs reply message
- * 	2) mark that the reply is ready
- * 	3) wake up the kernel verbs handler blocked awaiting the reply.
- *
- *
- * The kernel verbs handlers do a "get" to put a 2nd reference on the
- * VQ Request object.  If the kernel verbs handler exits before the adapter
- * can respond, this extra reference will keep the VQ Request object around
- * until the adapter's reply can be processed.  The reason we need this is
- * because a pointer to this object is stuffed into the context field of
- * the verbs work request message, and reflected back in the reply message.
- * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
- * kernel verb handler that is blocked awaiting the verb reply.
- * So handle_vq() will do a "put" on the object when it's done accessing it.
- * NOTE:  If we guarantee that the kernel verb handler will never bail before
- *        getting the reply, then we don't need these refcnts.
- *
- *
- * VQ Request objects are freed by the kernel verbs handlers only
- * after the verb has been processed, or when the adapter fails and
- * does not reply.
- *
- *
- * Verbs Reply Buffers:
- *
- * VQ Reply bufs are local host memory copies of a
- * outstanding Verb Request reply
- * message.  The are always allocated by the kernel verbs handlers, and _may_ be
- * freed by either the kernel verbs handler -or- the interrupt handler.  The
- * kernel verbs handler _must_ free the repbuf, then free the vq request object
- * in that order.
- */
-
-int vq_init(struct c2_dev *c2dev)
-{
-	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
-		(char) ('0' + c2dev->devnum));
-	c2dev->host_msg_cache =
-	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
-			      SLAB_HWCACHE_ALIGN, NULL);
-	if (c2dev->host_msg_cache == NULL) {
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void vq_term(struct c2_dev *c2dev)
-{
-	kmem_cache_destroy(c2dev->host_msg_cache);
-}
-
-/* vq_req_alloc - allocate a VQ Request Object and initialize it.
- * The refcnt is set to 1.
- */
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
-{
-	struct c2_vq_req *r;
-
-	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
-	if (r) {
-		init_waitqueue_head(&r->wait_object);
-		r->reply_msg = 0;
-		r->event = 0;
-		r->cm_id = NULL;
-		r->qp = NULL;
-		atomic_set(&r->refcnt, 1);
-		atomic_set(&r->reply_ready, 0);
-	}
-	return r;
-}
-
-
-/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
- * has already free the VQ Reply Buffer if it existed.
- */
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	r->reply_msg = 0;
-	if (atomic_dec_and_test(&r->refcnt)) {
-		kfree(r);
-	}
-}
-
-/* vq_req_get - reference a VQ Request Object.  Done
- * only in the kernel verbs handlers.
- */
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	atomic_inc(&r->refcnt);
-}
-
-
-/* vq_req_put - dereference and potentially free a VQ Request Object.
- *
- * This is only called by handle_vq() on the
- * interrupt when it is done processing
- * a verb reply message.  If the associated
- * kernel verbs handler has already bailed,
- * then this put will actually free the VQ
- * Request object _and_ the VQ Reply Buffer
- * if it exists.
- */
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
-{
-	if (atomic_dec_and_test(&r->refcnt)) {
-		if (r->reply_msg != 0)
-			vq_repbuf_free(c2dev,
-				       (void *) (unsigned long) r->reply_msg);
-		kfree(r);
-	}
-}
-
-
-/*
- * vq_repbuf_alloc - allocate a VQ Reply Buffer.
- */
-void *vq_repbuf_alloc(struct c2_dev *c2dev)
-{
-	return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
-}
-
-/*
- * vq_send_wr - post a verbs request message to the Verbs Request Queue.
- * If a message is not available in the MQ, then block until one is available.
- * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
- * When the adapter drains the Verbs Request Queue,
- * it inserts MQ index 0 in to the
- * adapter->host activity fifo and interrupts the host.
- */
-int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
-{
-	void *msg;
-	wait_queue_t __wait;
-
-	/*
-	 * grab adapter vq lock
-	 */
-	spin_lock(&c2dev->vqlock);
-
-	/*
-	 * allocate msg
-	 */
-	msg = c2_mq_alloc(&c2dev->req_vq);
-
-	/*
-	 * If we cannot get a msg, then we'll wait
-	 * When a messages are available, the int handler will wake_up()
-	 * any waiters.
-	 */
-	while (msg == NULL) {
-		pr_debug("%s:%d no available msg in VQ, waiting...\n",
-		       __func__, __LINE__);
-		init_waitqueue_entry(&__wait, current);
-		add_wait_queue(&c2dev->req_vq_wo, &__wait);
-		spin_unlock(&c2dev->vqlock);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (!c2_mq_full(&c2dev->req_vq)) {
-				break;
-			}
-			if (!signal_pending(current)) {
-				schedule_timeout(1 * HZ);	/* 1 second... */
-				continue;
-			}
-			set_current_state(TASK_RUNNING);
-			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-			return -EINTR;
-		}
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
-		spin_lock(&c2dev->vqlock);
-		msg = c2_mq_alloc(&c2dev->req_vq);
-	}
-
-	/*
-	 * copy wr into adapter msg
-	 */
-	memcpy(msg, wr, c2dev->req_vq.msg_size);
-
-	/*
-	 * post msg
-	 */
-	c2_mq_produce(&c2dev->req_vq);
-
-	/*
-	 * release adapter vq lock
-	 */
-	spin_unlock(&c2dev->vqlock);
-	return 0;
-}
-
-
-/*
- * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
- */
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
-{
-	if (!wait_event_timeout(req->wait_object,
-				atomic_read(&req->reply_ready),
-				60*HZ))
-		return -ETIMEDOUT;
-
-	return 0;
-}
-
-/*
- * vq_repbuf_free - Free a Verbs Reply Buffer.
- */
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
-{
-	kmem_cache_free(c2dev->host_msg_cache, reply);
-}
diff --git a/drivers/staging/rdma/amso1100/c2_vq.h b/drivers/staging/rdma/amso1100/c2_vq.h
deleted file mode 100644
index c1f6cef60213..000000000000
--- a/drivers/staging/rdma/amso1100/c2_vq.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_VQ_H_
-#define _C2_VQ_H_
-#include <linux/sched.h>
-#include "c2.h"
-#include "c2_wr.h"
-#include "c2_provider.h"
-
-struct c2_vq_req {
-	u64 reply_msg;		/* ptr to reply msg */
-	wait_queue_head_t wait_object;	/* wait object for vq reqs */
-	atomic_t reply_ready;	/* set when reply is ready */
-	atomic_t refcnt;	/* used to cancel WRs... */
-	int event;
-	struct iw_cm_id *cm_id;
-	struct c2_qp *qp;
-};
-
-int vq_init(struct c2_dev *c2dev);
-void vq_term(struct c2_dev *c2dev);
-
-struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
-void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
-void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
-int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
-
-void *vq_repbuf_alloc(struct c2_dev *c2dev);
-void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
-
-int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
-#endif				/* _C2_VQ_H_ */
diff --git a/drivers/staging/rdma/amso1100/c2_wr.h b/drivers/staging/rdma/amso1100/c2_wr.h
deleted file mode 100644
index 8d4b4ca463ca..000000000000
--- a/drivers/staging/rdma/amso1100/c2_wr.h
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
- * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _C2_WR_H_
-#define _C2_WR_H_
-
-#ifdef CCDEBUG
-#define CCWR_MAGIC		0xb07700b0
-#endif
-
-#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
-
-/* Maximum allowed size in bytes of private_data exchange
- * on connect.
- */
-#define C2_MAX_PRIVATE_DATA_SIZE 200
-
-/*
- * These types are shared among the adapter, host, and CCIL consumer.
- */
-enum c2_cq_notification_type {
-	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
-	C2_CQ_NOTIFICATION_TYPE_NEXT,
-	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
-};
-
-enum c2_setconfig_cmd {
-	C2_CFG_ADD_ADDR = 1,
-	C2_CFG_DEL_ADDR = 2,
-	C2_CFG_ADD_ROUTE = 3,
-	C2_CFG_DEL_ROUTE = 4
-};
-
-enum c2_getconfig_cmd {
-	C2_GETCONFIG_ROUTES = 1,
-	C2_GETCONFIG_ADDRS
-};
-
-/*
- *  CCIL Work Request Identifiers
- */
-enum c2wr_ids {
-	CCWR_RNIC_OPEN = 1,
-	CCWR_RNIC_QUERY,
-	CCWR_RNIC_SETCONFIG,
-	CCWR_RNIC_GETCONFIG,
-	CCWR_RNIC_CLOSE,
-	CCWR_CQ_CREATE,
-	CCWR_CQ_QUERY,
-	CCWR_CQ_MODIFY,
-	CCWR_CQ_DESTROY,
-	CCWR_QP_CONNECT,
-	CCWR_PD_ALLOC,
-	CCWR_PD_DEALLOC,
-	CCWR_SRQ_CREATE,
-	CCWR_SRQ_QUERY,
-	CCWR_SRQ_MODIFY,
-	CCWR_SRQ_DESTROY,
-	CCWR_QP_CREATE,
-	CCWR_QP_QUERY,
-	CCWR_QP_MODIFY,
-	CCWR_QP_DESTROY,
-	CCWR_NSMR_STAG_ALLOC,
-	CCWR_NSMR_REGISTER,
-	CCWR_NSMR_PBL,
-	CCWR_STAG_DEALLOC,
-	CCWR_NSMR_REREGISTER,
-	CCWR_SMR_REGISTER,
-	CCWR_MR_QUERY,
-	CCWR_MW_ALLOC,
-	CCWR_MW_QUERY,
-	CCWR_EP_CREATE,
-	CCWR_EP_GETOPT,
-	CCWR_EP_SETOPT,
-	CCWR_EP_DESTROY,
-	CCWR_EP_BIND,
-	CCWR_EP_CONNECT,
-	CCWR_EP_LISTEN,
-	CCWR_EP_SHUTDOWN,
-	CCWR_EP_LISTEN_CREATE,
-	CCWR_EP_LISTEN_DESTROY,
-	CCWR_EP_QUERY,
-	CCWR_CR_ACCEPT,
-	CCWR_CR_REJECT,
-	CCWR_CONSOLE,
-	CCWR_TERM,
-	CCWR_FLASH_INIT,
-	CCWR_FLASH,
-	CCWR_BUF_ALLOC,
-	CCWR_BUF_FREE,
-	CCWR_FLASH_WRITE,
-	CCWR_INIT,		/* WARNING: Don't move this ever again! */
-
-
-
-	/* Add new IDs here */
-
-
-
-	/*
-	 * WARNING: CCWR_LAST must always be the last verbs id defined!
-	 *          All the preceding IDs are fixed, and must not change.
-	 *          You can add new IDs, but must not remove or reorder
-	 *          any IDs. If you do, YOU will ruin any hope of
-	 *          compatibility between versions.
-	 */
-	CCWR_LAST,
-
-	/*
-	 * Start over at 1 so that arrays indexed by user wr id's
-	 * begin at 1.  This is OK since the verbs and user wr id's
-	 * are always used on disjoint sets of queues.
-	 */
-	/*
-	 * The order of the CCWR_SEND_XX verbs must
-	 * match the order of the RDMA_OPs
-	 */
-	CCWR_SEND = 1,
-	CCWR_SEND_INV,
-	CCWR_SEND_SE,
-	CCWR_SEND_SE_INV,
-	CCWR_RDMA_WRITE,
-	CCWR_RDMA_READ,
-	CCWR_RDMA_READ_INV,
-	CCWR_MW_BIND,
-	CCWR_NSMR_FASTREG,
-	CCWR_STAG_INVALIDATE,
-	CCWR_RECV,
-	CCWR_NOP,
-	CCWR_UNIMPL,
-/* WARNING: This must always be the last user wr id defined! */
-};
-#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
-
-/*
- * SQ/RQ Work Request Types
- */
-enum c2_wr_type {
-	C2_WR_TYPE_SEND = CCWR_SEND,
-	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
-	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
-	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
-	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
-	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
-	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
-	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
-	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
-	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
-	C2_WR_TYPE_RECV = CCWR_RECV,
-	C2_WR_TYPE_NOP = CCWR_NOP,
-};
-
-struct c2_netaddr {
-	__be32 ip_addr;
-	__be32 netmask;
-	u32 mtu;
-};
-
-struct c2_route {
-	u32 ip_addr;		/* 0 indicates the default route */
-	u32 netmask;		/* netmask associated with dst */
-	u32 flags;
-	union {
-		u32 ipaddr;	/* address of the nexthop interface */
-		u8 enaddr[6];
-	} nexthop;
-};
-
-/*
- * A Scatter Gather Entry.
- */
-struct c2_data_addr {
-	__be32 stag;
-	__be32 length;
-	__be64 to;
-};
-
-/*
- * MR and MW flags used by the consumer, RI, and RNIC.
- */
-enum c2_mm_flags {
-	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
-	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
-	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
-	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
-	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
-	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
-	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
-	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
-	MEM_SHARED = 0x0100,	/* set if MR is shared */
-	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
-};
-
-/*
- * CCIL API ACF flags defined in terms of the low level mem flags.
- * This minimizes translation needed in the user API
- */
-enum c2_acf {
-	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
-	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
-	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
-	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
-	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
-};
-
-/*
- * Image types of objects written to flash
- */
-#define C2_FLASH_IMG_BITFILE 1
-#define C2_FLASH_IMG_OPTION_ROM 2
-#define C2_FLASH_IMG_VPD 3
-
-/*
- *  to fix bug 1815 we define the max size allowable of the
- *  terminate message (per the IETF spec).Refer to the IETF
- *  protocol specification, section 12.1.6, page 64)
- *  The message is prefixed by 20 types of DDP info.
- *
- *  Then the message has 6 bytes for the terminate control
- *  and DDP segment length info plus a DDP header (either
- *  14 or 18 byts) plus 28 bytes for the RDMA header.
- *  Thus the max size in:
- *  20 + (6 + 18 + 28) = 72
- */
-#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
-
-/*
- * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
- */
-#define WR_BUILD_STR_LEN 64
-
-/*
- * WARNING:  All of these structs need to align any 64bit types on
- * 64 bit boundaries!  64bit types include u64 and u64.
- */
-
-/*
- * Clustercore Work Request Header.  Be sensitive to field layout
- * and alignment.
- */
-struct c2wr_hdr {
-	/* wqe_count is part of the cqe.  It is put here so the
-	 * adapter can write to it while the wr is pending without
-	 * clobbering part of the wr.  This word need not be dma'd
-	 * from the host to adapter by libccil, but we copy it anyway
-	 * to make the memcpy to the adapter better aligned.
-	 */
-	__be32 wqe_count;
-
-	/* Put these fields next so that later 32- and 64-bit
-	 * quantities are naturally aligned.
-	 */
-	u8 id;
-	u8 result;		/* adapter -> host */
-	u8 sge_count;		/* host -> adapter */
-	u8 flags;		/* host -> adapter */
-
-	u64 context;
-#ifdef CCMSGMAGIC
-	u32 magic;
-	u32 pad;
-#endif
-} __attribute__((packed));
-
-/*
- *------------------------ RNIC ------------------------
- */
-
-/*
- * WR_RNIC_OPEN
- */
-
-/*
- * Flags for the RNIC WRs
- */
-enum c2_rnic_flags {
-	RNIC_IRD_STATIC = 0x0001,
-	RNIC_ORD_STATIC = 0x0002,
-	RNIC_QP_STATIC = 0x0004,
-	RNIC_SRQ_SUPPORTED = 0x0008,
-	RNIC_PBL_BLOCK_MODE = 0x0010,
-	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
-	RNIC_CQ_OVF_DETECTED = 0x0040,
-	RNIC_PRIV_MODE = 0x0080
-};
-
-struct c2wr_rnic_open_req {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	__be16 flags;		/* See enum c2_rnic_flags */
-	__be16 port_num;
-} __attribute__((packed));
-
-struct c2wr_rnic_open_rep {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-union c2wr_rnic_open {
-	struct c2wr_rnic_open_req req;
-	struct c2wr_rnic_open_rep rep;
-} __attribute__((packed));
-
-struct c2wr_rnic_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_QUERY
- */
-struct c2wr_rnic_query_rep {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	__be32 vendor_id;
-	__be32 part_number;
-	__be32 hw_version;
-	__be32 fw_ver_major;
-	__be32 fw_ver_minor;
-	__be32 fw_ver_patch;
-	char fw_ver_build_str[WR_BUILD_STR_LEN];
-	__be32 max_qps;
-	__be32 max_qp_depth;
-	u32 max_srq_depth;
-	u32 max_send_sgl_depth;
-	u32 max_rdma_sgl_depth;
-	__be32 max_cqs;
-	__be32 max_cq_depth;
-	u32 max_cq_event_handlers;
-	__be32 max_mrs;
-	u32 max_pbl_depth;
-	__be32 max_pds;
-	__be32 max_global_ird;
-	u32 max_global_ord;
-	__be32 max_qp_ird;
-	__be32 max_qp_ord;
-	u32 flags;
-	__be32 max_mws;
-	u32 pbe_range_low;
-	u32 pbe_range_high;
-	u32 max_srqs;
-	u32 page_size;
-} __attribute__((packed));
-
-union c2wr_rnic_query {
-	struct c2wr_rnic_query_req req;
-	struct c2wr_rnic_query_rep rep;
-} __attribute__((packed));
-
-/*
- * WR_RNIC_GETCONFIG
- */
-
-struct c2wr_rnic_getconfig_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 option;		/* see c2_getconfig_cmd_t */
-	u64 reply_buf;
-	u32 reply_buf_len;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_getconfig_rep {
-	struct c2wr_hdr hdr;
-	u32 option;		/* see c2_getconfig_cmd_t */
-	u32 count_len;		/* length of the number of addresses configured */
-} __attribute__((packed)) ;
-
-union c2wr_rnic_getconfig {
-	struct c2wr_rnic_getconfig_req req;
-	struct c2wr_rnic_getconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_SETCONFIG
- */
-struct c2wr_rnic_setconfig_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 option;		/* See c2_setconfig_cmd_t */
-	/* variable data and pad. See c2_netaddr and c2_route */
-	u8 data[0];
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_setconfig_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_setconfig {
-	struct c2wr_rnic_setconfig_req req;
-	struct c2wr_rnic_setconfig_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * WR_RNIC_CLOSE
- */
-struct c2wr_rnic_close_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_rnic_close_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_rnic_close {
-	struct c2wr_rnic_close_req req;
-	struct c2wr_rnic_close_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ CQ ------------------------
- */
-struct c2wr_cq_create_req {
-	struct c2wr_hdr hdr;
-	__be64 shared_ht;
-	u64 user_context;
-	__be64 msg_pool;
-	u32 rnic_handle;
-	__be32 msg_size;
-	__be32 depth;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_create_rep {
-	struct c2wr_hdr hdr;
-	__be32 mq_index;
-	__be32 adapter_shared;
-	u32 cq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_cq_create {
-	struct c2wr_cq_create_req req;
-	struct c2wr_cq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 cq_handle;
-	u32 new_depth;
-	u64 new_msg_pool;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_modify_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_modify {
-	struct c2wr_cq_modify_req req;
-	struct c2wr_cq_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 cq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_cq_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_cq_destroy {
-	struct c2wr_cq_destroy_req req;
-	struct c2wr_cq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ PD ------------------------
- */
-struct c2wr_pd_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_alloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_alloc {
-	struct c2wr_pd_alloc_req req;
-	struct c2wr_pd_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_pd_dealloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_pd_dealloc {
-	struct c2wr_pd_dealloc_req req;
-	struct c2wr_pd_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ SRQ ------------------------
- */
-struct c2wr_srq_create_req {
-	struct c2wr_hdr hdr;
-	u64 shared_ht;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 srq_depth;
-	u32 srq_limit;
-	u32 sgl_depth;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_create_rep {
-	struct c2wr_hdr hdr;
-	u32 srq_depth;
-	u32 sgl_depth;
-	u32 msg_size;
-	u32 mq_index;
-	u32 mq_start;
-	u32 srq_handle;
-} __attribute__((packed)) ;
-
-union c2wr_srq_create {
-	struct c2wr_srq_create_req req;
-	struct c2wr_srq_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 srq_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_srq_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_srq_destroy {
-	struct c2wr_srq_destroy_req req;
-	struct c2wr_srq_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ QP ------------------------
- */
-enum c2wr_qp_flags {
-	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
-	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
-	QP_MW_BIND = 0x00000004,	/* MWs enabled */
-	QP_ZERO_STAG = 0x00000008,	/* enabled? */
-	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
-	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
-	    /* enabled? */
-};
-
-struct c2wr_qp_create_req {
-	struct c2wr_hdr hdr;
-	__be64 shared_sq_ht;
-	__be64 shared_rq_ht;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 sq_cq_handle;
-	u32 rq_cq_handle;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 srq_handle;
-	u32 srq_limit;
-	__be32 flags;		/* see enum c2wr_qp_flags */
-	__be32 send_sgl_depth;
-	__be32 recv_sgl_depth;
-	__be32 rdma_write_sgl_depth;
-	__be32 ord;
-	__be32 ird;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_create_rep {
-	struct c2wr_hdr hdr;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 send_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u32 ord;
-	u32 ird;
-	__be32 sq_msg_size;
-	__be32 sq_mq_index;
-	__be32 sq_mq_start;
-	__be32 rq_msg_size;
-	__be32 rq_mq_index;
-	__be32 rq_mq_start;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-union c2wr_qp_create {
-	struct c2wr_qp_create_req req;
-	struct c2wr_qp_create_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_query_rep {
-	struct c2wr_hdr hdr;
-	u64 user_context;
-	u32 rnic_handle;
-	u32 sq_depth;
-	u32 rq_depth;
-	u32 send_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 ord;
-	u32 ird;
-	u16 qp_state;
-	u16 flags;		/* see c2wr_qp_flags_t */
-	u32 qp_id;
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-	u32 terminate_msg_length;	/* 0 if not present */
-	u8 data[0];
-	/* Terminate Message in-line here. */
-} __attribute__((packed)) ;
-
-union c2wr_qp_query {
-	struct c2wr_qp_query_req req;
-	struct c2wr_qp_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_req {
-	struct c2wr_hdr hdr;
-	u64 stream_msg;
-	u32 stream_msg_length;
-	u32 rnic_handle;
-	u32 qp_handle;
-	__be32 next_qp_state;
-	__be32 ord;
-	__be32 ird;
-	__be32 sq_depth;
-	__be32 rq_depth;
-	u32 llp_ep_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_modify_rep {
-	struct c2wr_hdr hdr;
-	u32 ord;
-	u32 ird;
-	u32 sq_depth;
-	u32 rq_depth;
-	u32 sq_msg_size;
-	u32 sq_mq_index;
-	u32 sq_mq_start;
-	u32 rq_msg_size;
-	u32 rq_mq_index;
-	u32 rq_mq_start;
-} __attribute__((packed)) ;
-
-union c2wr_qp_modify {
-	struct c2wr_qp_modify_req req;
-	struct c2wr_qp_modify_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-} __attribute__((packed)) ;
-
-struct c2wr_qp_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_qp_destroy {
-	struct c2wr_qp_destroy_req req;
-	struct c2wr_qp_destroy_rep rep;
-} __attribute__((packed)) ;
-
-/*
- * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
- * only be posted when a QP is in IDLE state.  After the connect request is
- * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
- * No synchronous reply from adapter to this WR.  The results of
- * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
- * See c2wr_ae_active_connect_results_t
- */
-struct c2wr_qp_connect_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;
-	__be32 remote_addr;
-	__be16 remote_port;
-	u16 pad;
-	__be32 private_data_length;
-	u8 private_data[0];	/* Private data in-line. */
-} __attribute__((packed)) ;
-
-struct c2wr_qp_connect {
-	struct c2wr_qp_connect_req req;
-	/* no synchronous reply.         */
-} __attribute__((packed)) ;
-
-
-/*
- *------------------------ MM ------------------------
- */
-
-struct c2wr_nsmr_stag_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pbl_depth;
-	u32 pd_id;
-	u32 flags;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_stag_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_stag_alloc {
-	struct c2wr_nsmr_stag_alloc_req req;
-	struct c2wr_nsmr_stag_alloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_req {
-	struct c2wr_hdr hdr;
-	__be64 va;
-	u32 rnic_handle;
-	__be16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 pd_id;
-	__be32 pbl_depth;
-	__be32 pbe_size;
-	__be32 fbo;
-	__be32 length;
-	__be32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	__be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_register_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	__be32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_register {
-	struct c2wr_nsmr_register_req req;
-	struct c2wr_nsmr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 flags;
-	__be32 stag_index;
-	__be32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	__be64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_pbl_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_pbl {
-	struct c2wr_nsmr_pbl_req req;
-	struct c2wr_nsmr_pbl_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mr_query_rep {
-	struct c2wr_hdr hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 pd_id;
-	u32 flags;
-	u32 pbl_depth;
-} __attribute__((packed)) ;
-
-union c2wr_mr_query {
-	struct c2wr_mr_query_req req;
-	struct c2wr_mr_query_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_query_rep {
-	struct c2wr_hdr hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 pd_id;
-	u32 flags;
-} __attribute__((packed)) ;
-
-union c2wr_mw_query {
-	struct c2wr_mw_query_req req;
-	struct c2wr_mw_query_rep rep;
-} __attribute__((packed)) ;
-
-
-struct c2wr_stag_dealloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	__be32 stag_index;
-} __attribute__((packed)) ;
-
-struct c2wr_stag_dealloc_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed)) ;
-
-union c2wr_stag_dealloc {
-	struct c2wr_stag_dealloc_req req;
-	struct c2wr_stag_dealloc_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_req {
-	struct c2wr_hdr hdr;
-	u64 va;
-	u32 rnic_handle;
-	u16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 stag_index;
-	u32 pd_id;
-	u32 pbl_depth;
-	u32 pbe_size;
-	u32 fbo;
-	u32 length;
-	u32 addrs_length;
-	u32 pad1;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
-} __attribute__((packed)) ;
-
-struct c2wr_nsmr_reregister_rep {
-	struct c2wr_hdr hdr;
-	u32 pbl_depth;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_nsmr_reregister {
-	struct c2wr_nsmr_reregister_req req;
-	struct c2wr_nsmr_reregister_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_req {
-	struct c2wr_hdr hdr;
-	u64 va;
-	u32 rnic_handle;
-	u16 flags;
-	u8 stag_key;
-	u8 pad;
-	u32 stag_index;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_smr_register_rep {
-	struct c2wr_hdr hdr;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_smr_register {
-	struct c2wr_smr_register_req req;
-	struct c2wr_smr_register_rep rep;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 pd_id;
-} __attribute__((packed)) ;
-
-struct c2wr_mw_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 stag_index;
-} __attribute__((packed)) ;
-
-union c2wr_mw_alloc {
-	struct c2wr_mw_alloc_req req;
-	struct c2wr_mw_alloc_rep rep;
-} __attribute__((packed)) ;
-
-/*
- *------------------------ WRs -----------------------
- */
-
-struct c2wr_user_hdr {
-	struct c2wr_hdr hdr;		/* Has status and WR Type */
-} __attribute__((packed)) ;
-
-enum c2_qp_state {
-	C2_QP_STATE_IDLE = 0x01,
-	C2_QP_STATE_CONNECTING = 0x02,
-	C2_QP_STATE_RTS = 0x04,
-	C2_QP_STATE_CLOSING = 0x08,
-	C2_QP_STATE_TERMINATE = 0x10,
-	C2_QP_STATE_ERROR = 0x20,
-};
-
-/* Completion queue entry. */
-struct c2wr_ce {
-	struct c2wr_hdr hdr;		/* Has status and WR Type */
-	u64 qp_user_context;	/* c2_user_qp_t * */
-	u32 qp_state;		/* Current QP State */
-	u32 handle;		/* QPID or EP Handle */
-	__be32 bytes_rcvd;		/* valid for RECV WCs */
-	u32 stag;
-} __attribute__((packed)) ;
-
-
-/*
- * Flags used for all post-sq WRs.  These must fit in the flags
- * field of the struct c2wr_hdr (eight bits).
- */
-enum {
-	SQ_SIGNALED = 0x01,
-	SQ_READ_FENCE = 0x02,
-	SQ_FENCE = 0x04,
-};
-
-/*
- * Common fields for all post-sq WRs.  Namely the standard header and a
- * secondary header with fields common to all post-sq WRs.
- */
-struct c2_sq_hdr {
-	struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * Same as above but for post-rq WRs.
- */
-struct c2_rq_hdr {
-	struct c2wr_user_hdr user_hdr;
-} __attribute__((packed));
-
-/*
- * use the same struct for all sends.
- */
-struct c2wr_send_req {
-	struct c2_sq_hdr sq_hdr;
-	__be32 sge_len;
-	__be32 remote_stag;
-	u8 data[0];		/* SGE array */
-} __attribute__((packed));
-
-union c2wr_send {
-	struct c2wr_send_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_write_req {
-	struct c2_sq_hdr sq_hdr;
-	__be64 remote_to;
-	__be32 remote_stag;
-	__be32 sge_len;
-	u8 data[0];		/* SGE array */
-} __attribute__((packed));
-
-union c2wr_rdma_write {
-	struct c2wr_rdma_write_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_rdma_read_req {
-	struct c2_sq_hdr sq_hdr;
-	__be64 local_to;
-	__be64 remote_to;
-	__be32 local_stag;
-	__be32 remote_stag;
-	__be32 length;
-} __attribute__((packed));
-
-union c2wr_rdma_read {
-	struct c2wr_rdma_read_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_mw_bind_req {
-	struct c2_sq_hdr sq_hdr;
-	u64 va;
-	u8 stag_key;
-	u8 pad[3];
-	u32 mw_stag_index;
-	u32 mr_stag_index;
-	u32 length;
-	u32 flags;
-} __attribute__((packed));
-
-union c2wr_mw_bind {
-	struct c2wr_mw_bind_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_nsmr_fastreg_req {
-	struct c2_sq_hdr sq_hdr;
-	u64 va;
-	u8 stag_key;
-	u8 pad[3];
-	u32 stag_index;
-	u32 pbe_size;
-	u32 fbo;
-	u32 length;
-	u32 addrs_length;
-	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
-} __attribute__((packed));
-
-union c2wr_nsmr_fastreg {
-	struct c2wr_nsmr_fastreg_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_stag_invalidate_req {
-	struct c2_sq_hdr sq_hdr;
-	u8 stag_key;
-	u8 pad[3];
-	u32 stag_index;
-} __attribute__((packed));
-
-union c2wr_stag_invalidate {
-	struct c2wr_stag_invalidate_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-union c2wr_sqwr {
-	struct c2_sq_hdr sq_hdr;
-	struct c2wr_send_req send;
-	struct c2wr_send_req send_se;
-	struct c2wr_send_req send_inv;
-	struct c2wr_send_req send_se_inv;
-	struct c2wr_rdma_write_req rdma_write;
-	struct c2wr_rdma_read_req rdma_read;
-	struct c2wr_mw_bind_req mw_bind;
-	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
-	struct c2wr_stag_invalidate_req stag_inv;
-} __attribute__((packed));
-
-
-/*
- * RQ WRs
- */
-struct c2wr_rqwr {
-	struct c2_rq_hdr rq_hdr;
-	u8 data[0];		/* array of SGEs */
-} __attribute__((packed));
-
-union c2wr_recv {
-	struct c2wr_rqwr req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-/*
- * All AEs start with this header.  Most AEs only need to convey the
- * information in the header.  Some, like LLP connection events, need
- * more info.  The union typdef c2wr_ae_t has all the possible AEs.
- *
- * hdr.context is the user_context from the rnic_open WR.  NULL If this
- * is not affiliated with an rnic
- *
- * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
- * CCAE_LLP_CLOSE_COMPLETE)
- *
- * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
- *
- * user_context is the context passed down when the host created the resource.
- */
-struct c2wr_ae_hdr {
-	struct c2wr_hdr hdr;
-	u64 user_context;	/* user context for this res. */
-	__be32 resource_type;	/* see enum c2_resource_indicator */
-	__be32 resource;	/* handle for resource */
-	__be32 qp_state;	/* current QP State */
-} __attribute__((packed));
-
-/*
- * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
- * the adapter moves the QP into RTS state
- */
-struct c2wr_ae_active_connect_results {
-	struct c2wr_ae_hdr ae_hdr;
-	__be32 laddr;
-	__be32 raddr;
-	__be16 lport;
-	__be16 rport;
-	__be32 private_data_length;
-	u8 private_data[0];	/* data is in-line in the msg. */
-} __attribute__((packed));
-
-/*
- * When connections are established by the stack (and the private data
- * MPA frame is received), the adapter will generate an event to the host.
- * The details of the connection, any private data, and the new connection
- * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
- * AE queue:
- */
-struct c2wr_ae_connection_request {
-	struct c2wr_ae_hdr ae_hdr;
-	u32 cr_handle;		/* connreq handle (sock ptr) */
-	__be32 laddr;
-	__be32 raddr;
-	__be16 lport;
-	__be16 rport;
-	__be32 private_data_length;
-	u8 private_data[0];	/* data is in-line in the msg. */
-} __attribute__((packed));
-
-union c2wr_ae {
-	struct c2wr_ae_hdr ae_generic;
-	struct c2wr_ae_active_connect_results ae_active_connect_results;
-	struct c2wr_ae_connection_request ae_connection_request;
-} __attribute__((packed));
-
-struct c2wr_init_req {
-	struct c2wr_hdr hdr;
-	__be64 hint_count;
-	__be64 q0_host_shared;
-	__be64 q1_host_shared;
-	__be64 q1_host_msg_pool;
-	__be64 q2_host_shared;
-	__be64 q2_host_msg_pool;
-} __attribute__((packed));
-
-struct c2wr_init_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_init {
-	struct c2wr_init_req req;
-	struct c2wr_init_rep rep;
-} __attribute__((packed));
-
-/*
- * For upgrading flash.
- */
-
-struct c2wr_flash_init_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-} __attribute__((packed));
-
-struct c2wr_flash_init_rep {
-	struct c2wr_hdr hdr;
-	u32 adapter_flash_buf_offset;
-	u32 adapter_flash_len;
-} __attribute__((packed));
-
-union c2wr_flash_init {
-	struct c2wr_flash_init_req req;
-	struct c2wr_flash_init_rep rep;
-} __attribute__((packed));
-
-struct c2wr_flash_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 len;
-} __attribute__((packed));
-
-struct c2wr_flash_rep {
-	struct c2wr_hdr hdr;
-	u32 status;
-} __attribute__((packed));
-
-union c2wr_flash {
-	struct c2wr_flash_req req;
-	struct c2wr_flash_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 size;
-} __attribute__((packed));
-
-struct c2wr_buf_alloc_rep {
-	struct c2wr_hdr hdr;
-	u32 offset;		/* 0 if mem not available */
-	u32 size;		/* 0 if mem not available */
-} __attribute__((packed));
-
-union c2wr_buf_alloc {
-	struct c2wr_buf_alloc_req req;
-	struct c2wr_buf_alloc_rep rep;
-} __attribute__((packed));
-
-struct c2wr_buf_free_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 offset;		/* Must match value from alloc */
-	u32 size;		/* Must match value from alloc */
-} __attribute__((packed));
-
-struct c2wr_buf_free_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_buf_free {
-	struct c2wr_buf_free_req req;
-	struct c2wr_ce rep;
-} __attribute__((packed));
-
-struct c2wr_flash_write_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 offset;
-	u32 size;
-	u32 type;
-	u32 flags;
-} __attribute__((packed));
-
-struct c2wr_flash_write_rep {
-	struct c2wr_hdr hdr;
-	u32 status;
-} __attribute__((packed));
-
-union c2wr_flash_write {
-	struct c2wr_flash_write_req req;
-	struct c2wr_flash_write_rep rep;
-} __attribute__((packed));
-
-/*
- * Messages for LLP connection setup.
- */
-
-/*
- * Listen Request.  This allocates a listening endpoint to allow passive
- * connection setup.  Newly established LLP connections are passed up
- * via an AE.  See c2wr_ae_connection_request_t
- */
-struct c2wr_ep_listen_create_req {
-	struct c2wr_hdr hdr;
-	u64 user_context;	/* returned in AEs. */
-	u32 rnic_handle;
-	__be32 local_addr;		/* local addr, or 0  */
-	__be16 local_port;		/* 0 means "pick one" */
-	u16 pad;
-	__be32 backlog;		/* tradional tcp listen bl */
-} __attribute__((packed));
-
-struct c2wr_ep_listen_create_rep {
-	struct c2wr_hdr hdr;
-	u32 ep_handle;		/* handle to new listening ep */
-	u16 local_port;		/* resulting port... */
-	u16 pad;
-} __attribute__((packed));
-
-union c2wr_ep_listen_create {
-	struct c2wr_ep_listen_create_req req;
-	struct c2wr_ep_listen_create_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_listen_destroy_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_ep_listen_destroy {
-	struct c2wr_ep_listen_destroy_req req;
-	struct c2wr_ep_listen_destroy_rep rep;
-} __attribute__((packed));
-
-struct c2wr_ep_query_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;
-} __attribute__((packed));
-
-struct c2wr_ep_query_rep {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 local_addr;
-	u32 remote_addr;
-	u16 local_port;
-	u16 remote_port;
-} __attribute__((packed));
-
-union c2wr_ep_query {
-	struct c2wr_ep_query_req req;
-	struct c2wr_ep_query_rep rep;
-} __attribute__((packed));
-
-
-/*
- * The host passes this down to indicate acceptance of a pending iWARP
- * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
- * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
- */
-struct c2wr_cr_accept_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 qp_handle;		/* QP to bind to this LLP conn */
-	u32 ep_handle;		/* LLP  handle to accept */
-	__be32 private_data_length;
-	u8 private_data[0];	/* data in-line in msg. */
-} __attribute__((packed));
-
-/*
- * adapter sends reply when private data is successfully submitted to
- * the LLP.
- */
-struct c2wr_cr_accept_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_accept {
-	struct c2wr_cr_accept_req req;
-	struct c2wr_cr_accept_rep rep;
-} __attribute__((packed));
-
-/*
- * The host sends this down if a given iWARP connection request was
- * rejected by the consumer.  The cr_handle was obtained from a
- * previous c2wr_ae_connection_request_t AE sent by the adapter.
- */
-struct  c2wr_cr_reject_req {
-	struct c2wr_hdr hdr;
-	u32 rnic_handle;
-	u32 ep_handle;		/* LLP handle to reject */
-} __attribute__((packed));
-
-/*
- * Dunno if this is needed, but we'll add it for now.  The adapter will
- * send the reject_reply after the LLP endpoint has been destroyed.
- */
-struct  c2wr_cr_reject_rep {
-	struct c2wr_hdr hdr;
-} __attribute__((packed));
-
-union c2wr_cr_reject {
-	struct c2wr_cr_reject_req req;
-	struct c2wr_cr_reject_rep rep;
-} __attribute__((packed));
-
-/*
- * console command.  Used to implement a debug console over the verbs
- * request and reply queues.
- */
-
-/*
- * Console request message.  It contains:
- *	- message hdr with id = CCWR_CONSOLE
- *	- the physaddr/len of host memory to be used for the reply.
- *	- the command string.  eg:  "netstat -s" or "zoneinfo"
- */
-struct c2wr_console_req {
-	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
-	u64 reply_buf;		/* pinned host buf for reply */
-	u32 reply_buf_len;	/* length of reply buffer */
-	u8 command[0];		/* NUL terminated ascii string */
-	/* containing the command req */
-} __attribute__((packed));
-
-/*
- * flags used in the console reply.
- */
-enum c2_console_flags {
-	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
-} __attribute__((packed));
-
-/*
- * Console reply message.
- * hdr.result contains the c2_status_t error if the reply was _not_ generated,
- * or C2_OK if the reply was generated.
- */
-struct c2wr_console_rep {
-	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
-	u32 flags;
-} __attribute__((packed));
-
-union c2wr_console {
-	struct c2wr_console_req req;
-	struct c2wr_console_rep rep;
-} __attribute__((packed));
-
-
-/*
- * Giant union with all WRs.  Makes life easier...
- */
-union c2wr {
-	struct c2wr_hdr hdr;
-	struct c2wr_user_hdr user_hdr;
-	union c2wr_rnic_open rnic_open;
-	union c2wr_rnic_query rnic_query;
-	union c2wr_rnic_getconfig rnic_getconfig;
-	union c2wr_rnic_setconfig rnic_setconfig;
-	union c2wr_rnic_close rnic_close;
-	union c2wr_cq_create cq_create;
-	union c2wr_cq_modify cq_modify;
-	union c2wr_cq_destroy cq_destroy;
-	union c2wr_pd_alloc pd_alloc;
-	union c2wr_pd_dealloc pd_dealloc;
-	union c2wr_srq_create srq_create;
-	union c2wr_srq_destroy srq_destroy;
-	union c2wr_qp_create qp_create;
-	union c2wr_qp_query qp_query;
-	union c2wr_qp_modify qp_modify;
-	union c2wr_qp_destroy qp_destroy;
-	struct c2wr_qp_connect qp_connect;
-	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
-	union c2wr_nsmr_register nsmr_register;
-	union c2wr_nsmr_pbl nsmr_pbl;
-	union c2wr_mr_query mr_query;
-	union c2wr_mw_query mw_query;
-	union c2wr_stag_dealloc stag_dealloc;
-	union c2wr_sqwr sqwr;
-	struct c2wr_rqwr rqwr;
-	struct c2wr_ce ce;
-	union c2wr_ae ae;
-	union c2wr_init init;
-	union c2wr_ep_listen_create ep_listen_create;
-	union c2wr_ep_listen_destroy ep_listen_destroy;
-	union c2wr_cr_accept cr_accept;
-	union c2wr_cr_reject cr_reject;
-	union c2wr_console console;
-	union c2wr_flash_init flash_init;
-	union c2wr_flash flash;
-	union c2wr_buf_alloc buf_alloc;
-	union c2wr_buf_free buf_free;
-	union c2wr_flash_write flash_write;
-} __attribute__((packed));
-
-
-/*
- * Accessors for the wr fields that are packed together tightly to
- * reduce the wr message size.  The wr arguments are void* so that
- * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
- * in the struct c2wr union can be passed in.
- */
-static __inline__ u8 c2_wr_get_id(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->id;
-}
-static __inline__ void c2_wr_set_id(void *wr, u8 id)
-{
-	((struct c2wr_hdr *) wr)->id = id;
-}
-static __inline__ u8 c2_wr_get_result(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->result;
-}
-static __inline__ void c2_wr_set_result(void *wr, u8 result)
-{
-	((struct c2wr_hdr *) wr)->result = result;
-}
-static __inline__ u8 c2_wr_get_flags(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->flags;
-}
-static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
-{
-	((struct c2wr_hdr *) wr)->flags = flags;
-}
-static __inline__ u8 c2_wr_get_sge_count(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->sge_count;
-}
-static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
-{
-	((struct c2wr_hdr *) wr)->sge_count = sge_count;
-}
-static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
-{
-	return ((struct c2wr_hdr *) wr)->wqe_count;
-}
-static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
-{
-	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
-}
-
-#endif				/* _C2_WR_H_ */
diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig
deleted file mode 100644
index 3fadd2ad6426..000000000000
--- a/drivers/staging/rdma/ehca/Kconfig
+++ /dev/null
@@ -1,10 +0,0 @@
-config INFINIBAND_EHCA
-	tristate "eHCA support"
-	depends on IBMEBUS
-	---help---
-	This driver supports the deprecated IBM pSeries eHCA InfiniBand
-	adapter.
-
-	To compile the driver as a module, choose M here. The module
-	will be called ib_ehca.
-
diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile
deleted file mode 100644
index 74d284e46a40..000000000000
--- a/drivers/staging/rdma/ehca/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-#  Authors: Heiko J Schick <schickhj@de.ibm.com>
-#           Christoph Raisch <raisch@de.ibm.com>
-#           Joachim Fenkes <fenkes@de.ibm.com>
-#
-#  Copyright (c) 2005 IBM Corporation
-#
-#  All rights reserved.
-#
-#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
-
-obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
-
-ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
-		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
-		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
-
diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO
deleted file mode 100644
index 199a4a600142..000000000000
--- a/drivers/staging/rdma/ehca/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-9/2015
-
-The ehca driver has been deprecated and moved to drivers/staging/rdma.
-It will be removed in the 4.6 merge window.
diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c
deleted file mode 100644
index 94e088c2d989..000000000000
--- a/drivers/staging/rdma/ehca/ehca_av.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  address vector functions
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *av_cache;
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-		  enum ib_rate path_rate, u32 *ipd)
-{
-	int path = ib_rate_to_mult(path_rate);
-	int link, ret;
-	struct ib_port_attr pa;
-
-	if (path_rate == IB_RATE_PORT_CURRENT) {
-		*ipd = 0;
-		return 0;
-	}
-
-	if (unlikely(path < 0)) {
-		ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
-			 path_rate);
-		return -EINVAL;
-	}
-
-	ret = ehca_query_port(&shca->ib_device, port, &pa);
-	if (unlikely(ret < 0)) {
-		ehca_err(&shca->ib_device, "Failed to query port  ret=%i", ret);
-		return ret;
-	}
-
-	link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
-
-	if (path >= link)
-		/* no need to throttle if path faster than link */
-		*ipd = 0;
-	else
-		/* IPD = round((link / path) - 1) */
-		*ipd = ((link + (path >> 1)) / path) - 1;
-
-	return 0;
-}
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
-{
-	int ret;
-	struct ehca_av *av;
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-
-	av = kmem_cache_alloc(av_cache, GFP_KERNEL);
-	if (!av) {
-		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
-			 pd, ah_attr);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	av->av.sl = ah_attr->sl;
-	av->av.dlid = ah_attr->dlid;
-	av->av.slid_path_bits = ah_attr->src_path_bits;
-
-	if (ehca_static_rate < 0) {
-		u32 ipd;
-
-		if (ehca_calc_ipd(shca, ah_attr->port_num,
-				  ah_attr->static_rate, &ipd)) {
-			ret = -EINVAL;
-			goto create_ah_exit1;
-		}
-		av->av.ipd = ipd;
-	} else
-		av->av.ipd = ehca_static_rate;
-
-	av->av.lnh = ah_attr->ah_flags;
-	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
-					    ah_attr->grh.traffic_class);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-					    ah_attr->grh.flow_label);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-					    ah_attr->grh.hop_limit);
-	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
-	/* set sgid in grh.word_1 */
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		int rc;
-		struct ib_port_attr port_attr;
-		union ib_gid gid;
-
-		memset(&port_attr, 0, sizeof(port_attr));
-		rc = ehca_query_port(pd->device, ah_attr->port_num,
-				     &port_attr);
-		if (rc) { /* invalid port number */
-			ret = -EINVAL;
-			ehca_err(pd->device, "Invalid port number "
-				 "ehca_query_port() returned %x "
-				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
-			goto create_ah_exit1;
-		}
-		memset(&gid, 0, sizeof(gid));
-		rc = ehca_query_gid(pd->device,
-				    ah_attr->port_num,
-				    ah_attr->grh.sgid_index, &gid);
-		if (rc) {
-			ret = -EINVAL;
-			ehca_err(pd->device, "Failed to retrieve sgid "
-				 "ehca_query_gid() returned %x "
-				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
-			goto create_ah_exit1;
-		}
-		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
-	}
-	av->av.pmtu = shca->max_mtu;
-
-	/* dgid comes in grh.word_3 */
-	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
-	       sizeof(ah_attr->grh.dgid));
-
-	return &av->ib_ah;
-
-create_ah_exit1:
-	kmem_cache_free(av_cache, av);
-
-	return ERR_PTR(ret);
-}
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-	struct ehca_av *av;
-	struct ehca_ud_av new_ehca_av;
-	struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
-					      ib_device);
-
-	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
-	new_ehca_av.sl = ah_attr->sl;
-	new_ehca_av.dlid = ah_attr->dlid;
-	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
-	new_ehca_av.ipd = ah_attr->static_rate;
-	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
-					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
-	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
-						ah_attr->grh.traffic_class);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
-						 ah_attr->grh.flow_label);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
-						 ah_attr->grh.hop_limit);
-	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
-
-	/* set sgid in grh.word_1 */
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		int rc;
-		struct ib_port_attr port_attr;
-		union ib_gid gid;
-
-		memset(&port_attr, 0, sizeof(port_attr));
-		rc = ehca_query_port(ah->device, ah_attr->port_num,
-				     &port_attr);
-		if (rc) { /* invalid port number */
-			ehca_err(ah->device, "Invalid port number "
-				 "ehca_query_port() returned %x "
-				 "ah=%p ah_attr=%p port_num=%x",
-				 rc, ah, ah_attr, ah_attr->port_num);
-			return -EINVAL;
-		}
-		memset(&gid, 0, sizeof(gid));
-		rc = ehca_query_gid(ah->device,
-				    ah_attr->port_num,
-				    ah_attr->grh.sgid_index, &gid);
-		if (rc) {
-			ehca_err(ah->device, "Failed to retrieve sgid "
-				 "ehca_query_gid() returned %x "
-				 "ah=%p ah_attr=%p port_num=%x "
-				 "sgid_index=%x",
-				 rc, ah, ah_attr, ah_attr->port_num,
-				 ah_attr->grh.sgid_index);
-			return -EINVAL;
-		}
-		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
-	}
-
-	new_ehca_av.pmtu = shca->max_mtu;
-
-	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
-	       sizeof(ah_attr->grh.dgid));
-
-	av = container_of(ah, struct ehca_av, ib_ah);
-	av->av = new_ehca_av;
-
-	return 0;
-}
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
-{
-	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-
-	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
-	       sizeof(ah_attr->grh.dgid));
-	ah_attr->sl = av->av.sl;
-
-	ah_attr->dlid = av->av.dlid;
-
-	ah_attr->src_path_bits = av->av.slid_path_bits;
-	ah_attr->static_rate = av->av.ipd;
-	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
-	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
-						    av->av.grh.word_0);
-	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
-						av->av.grh.word_0);
-	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
-						 av->av.grh.word_0);
-
-	return 0;
-}
-
-int ehca_destroy_ah(struct ib_ah *ah)
-{
-	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
-
-	return 0;
-}
-
-int ehca_init_av_cache(void)
-{
-	av_cache = kmem_cache_create("ehca_cache_av",
-				   sizeof(struct ehca_av), 0,
-				   SLAB_HWCACHE_ALIGN,
-				   NULL);
-	if (!av_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_av_cache(void)
-{
-	kmem_cache_destroy(av_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
deleted file mode 100644
index e8c3387d7aaa..000000000000
--- a/drivers/staging/rdma/ehca/ehca_classes.h
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Struct definition for eHCA internal structures
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_H__
-#define __EHCA_CLASSES_H__
-
-struct ehca_module;
-struct ehca_qp;
-struct ehca_cq;
-struct ehca_eq;
-struct ehca_mr;
-struct ehca_mw;
-struct ehca_pd;
-struct ehca_av;
-
-#include <linux/wait.h>
-#include <linux/mutex.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
-
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
-#include "ipz_pt_fn.h"
-#include "ehca_qes.h"
-#include "ehca_irq.h"
-
-#define EHCA_EQE_CACHE_SIZE 20
-#define EHCA_MAX_NUM_QUEUES 0xffff
-
-struct ehca_eqe_cache_entry {
-	struct ehca_eqe *eqe;
-	struct ehca_cq *cq;
-};
-
-struct ehca_eq {
-	u32 length;
-	struct ipz_queue ipz_queue;
-	struct ipz_eq_handle ipz_eq_handle;
-	struct work_struct work;
-	struct h_galpas galpas;
-	int is_initialized;
-	struct ehca_pfeq pf;
-	spinlock_t spinlock;
-	struct tasklet_struct interrupt_task;
-	u32 ist;
-	spinlock_t irq_spinlock;
-	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
-};
-
-struct ehca_sma_attr {
-	u16 lid, lmc, sm_sl, sm_lid;
-	u16 pkey_tbl_len, pkeys[16];
-};
-
-struct ehca_sport {
-	struct ib_cq *ibcq_aqp1;
-	struct ib_qp *ibqp_sqp[2];
-	/* lock to serialze modify_qp() calls for sqp in normal
-	 * and irq path (when event PORT_ACTIVE is received first time)
-	 */
-	spinlock_t mod_sqp_lock;
-	enum ib_port_state port_state;
-	struct ehca_sma_attr saved_attr;
-	u32 pma_qp_nr;
-};
-
-#define HCA_CAP_MR_PGSIZE_4K  0x80000000
-#define HCA_CAP_MR_PGSIZE_64K 0x40000000
-#define HCA_CAP_MR_PGSIZE_1M  0x20000000
-#define HCA_CAP_MR_PGSIZE_16M 0x10000000
-
-struct ehca_shca {
-	struct ib_device ib_device;
-	struct platform_device *ofdev;
-	u8 num_ports;
-	int hw_level;
-	struct list_head shca_list;
-	struct ipz_adapter_handle ipz_hca_handle;
-	struct ehca_sport sport[2];
-	struct ehca_eq eq;
-	struct ehca_eq neq;
-	struct ehca_mr *maxmr;
-	struct ehca_pd *pd;
-	struct h_galpas galpas;
-	struct mutex modify_mutex;
-	u64 hca_cap;
-	/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
-	u32 hca_cap_mr_pgsize;
-	int max_mtu;
-	int max_num_qps;
-	int max_num_cqs;
-	atomic_t num_cqs;
-	atomic_t num_qps;
-};
-
-struct ehca_pd {
-	struct ib_pd ib_pd;
-	struct ipz_pd fw_pd;
-	/* small queue mgmt */
-	struct mutex lock;
-	struct list_head free[2];
-	struct list_head full[2];
-};
-
-enum ehca_ext_qp_type {
-	EQPT_NORMAL    = 0,
-	EQPT_LLQP      = 1,
-	EQPT_SRQBASE   = 2,
-	EQPT_SRQ       = 3,
-};
-
-/* struct to cache modify_qp()'s parms for GSI/SMI qp */
-struct ehca_mod_qp_parm {
-	int mask;
-	struct ib_qp_attr attr;
-};
-
-#define EHCA_MOD_QP_PARM_MAX 4
-
-#define QMAP_IDX_MASK 0xFFFFULL
-
-/* struct for tracking if cqes have been reported to the application */
-struct ehca_qmap_entry {
-	u16 app_wr_id;
-	u8 reported;
-	u8 cqe_req;
-};
-
-struct ehca_queue_map {
-	struct ehca_qmap_entry *map;
-	unsigned int entries;
-	unsigned int tail;
-	unsigned int left_to_poll;
-	unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
-};
-
-/* function to calculate the next index for the qmap */
-static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
-{
-	unsigned int temp = cur_index + 1;
-	return (temp == limit) ? 0 : temp;
-}
-
-struct ehca_qp {
-	union {
-		struct ib_qp ib_qp;
-		struct ib_srq ib_srq;
-	};
-	u32 qp_type;
-	enum ehca_ext_qp_type ext_type;
-	enum ib_qp_state state;
-	struct ipz_queue ipz_squeue;
-	struct ehca_queue_map sq_map;
-	struct ipz_queue ipz_rqueue;
-	struct ehca_queue_map rq_map;
-	struct h_galpas galpas;
-	u32 qkey;
-	u32 real_qp_num;
-	u32 token;
-	spinlock_t spinlock_s;
-	spinlock_t spinlock_r;
-	u32 sq_max_inline_data_size;
-	struct ipz_qp_handle ipz_qp_handle;
-	struct ehca_pfqp pf;
-	struct ib_qp_init_attr init_attr;
-	struct ehca_cq *send_cq;
-	struct ehca_cq *recv_cq;
-	unsigned int sqerr_purgeflag;
-	struct hlist_node list_entries;
-	/* array to cache modify_qp()'s parms for GSI/SMI qp */
-	struct ehca_mod_qp_parm *mod_qp_parm;
-	int mod_qp_parm_idx;
-	/* mmap counter for resources mapped into user space */
-	u32 mm_count_squeue;
-	u32 mm_count_rqueue;
-	u32 mm_count_galpa;
-	/* unsolicited ack circumvention */
-	int unsol_ack_circ;
-	int mtu_shift;
-	u32 message_count;
-	u32 packet_count;
-	atomic_t nr_events; /* events seen */
-	wait_queue_head_t wait_completion;
-	int mig_armed;
-	struct list_head sq_err_node;
-	struct list_head rq_err_node;
-};
-
-#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
-#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
-#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
-
-/* must be power of 2 */
-#define QP_HASHTAB_LEN 8
-
-struct ehca_cq {
-	struct ib_cq ib_cq;
-	struct ipz_queue ipz_queue;
-	struct h_galpas galpas;
-	spinlock_t spinlock;
-	u32 cq_number;
-	u32 token;
-	u32 nr_of_entries;
-	struct ipz_cq_handle ipz_cq_handle;
-	struct ehca_pfcq pf;
-	spinlock_t cb_lock;
-	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
-	struct list_head entry;
-	u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
-	atomic_t nr_events; /* #events seen */
-	wait_queue_head_t wait_completion;
-	spinlock_t task_lock;
-	/* mmap counter for resources mapped into user space */
-	u32 mm_count_queue;
-	u32 mm_count_galpa;
-	struct list_head sqp_err_list;
-	struct list_head rqp_err_list;
-};
-
-enum ehca_mr_flag {
-	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
-	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
-};
-
-struct ehca_mr {
-	union {
-		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
-		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
-	} ib;
-	struct ib_umem *umem;
-	spinlock_t mrlock;
-
-	enum ehca_mr_flag flags;
-	u32 num_kpages;		/* number of kernel pages */
-	u32 num_hwpages;	/* number of hw pages to form MR */
-	u64 hwpage_size;	/* hw page size used for this MR */
-	int acl;		/* ACL (stored here for usage in reregister) */
-	u64 *start;		/* virtual start address (stored here for */
-				/* usage in reregister) */
-	u64 size;		/* size (stored here for usage in reregister) */
-	u32 fmr_page_size;	/* page size for FMR */
-	u32 fmr_max_pages;	/* max pages for FMR */
-	u32 fmr_max_maps;	/* max outstanding maps for FMR */
-	u32 fmr_map_cnt;	/* map counter for FMR */
-	/* fw specific data */
-	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
-	struct h_galpas galpas;
-};
-
-struct ehca_mw {
-	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
-	spinlock_t mwlock;
-
-	u8 never_bound;		/* indication MW was never bound */
-	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
-	struct h_galpas galpas;
-};
-
-enum ehca_mr_pgi_type {
-	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
-				  * ehca_rereg_phys_mr,
-				  * ehca_reg_internal_maxmr */
-	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
-	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
-};
-
-struct ehca_mr_pginfo {
-	enum ehca_mr_pgi_type type;
-	u64 num_kpages;
-	u64 kpage_cnt;
-	u64 hwpage_size;     /* hw page size used for this MR */
-	u64 num_hwpages;     /* number of hw pages */
-	u64 hwpage_cnt;      /* counter for hw pages */
-	u64 next_hwpage;     /* next hw page in buffer/chunk/listelem */
-
-	union {
-		struct { /* type EHCA_MR_PGI_PHYS section */
-			u64 addr;
-			u16 size;
-		} phy;
-		struct { /* type EHCA_MR_PGI_USER section */
-			struct ib_umem *region;
-			struct scatterlist *next_sg;
-			u64 next_nmap;
-		} usr;
-		struct { /* type EHCA_MR_PGI_FMR section */
-			u64 fmr_pgsize;
-			u64 *page_list;
-			u64 next_listelem;
-		} fmr;
-	} u;
-};
-
-/* output parameters for MR/FMR hipz calls */
-struct ehca_mr_hipzout_parms {
-	struct ipz_mrmw_handle handle;
-	u32 lkey;
-	u32 rkey;
-	u64 len;
-	u64 vaddr;
-	u32 acl;
-};
-
-/* output parameters for MW hipz calls */
-struct ehca_mw_hipzout_parms {
-	struct ipz_mrmw_handle handle;
-	u32 rkey;
-};
-
-struct ehca_av {
-	struct ib_ah ib_ah;
-	struct ehca_ud_av av;
-};
-
-struct ehca_ucontext {
-	struct ib_ucontext ib_ucontext;
-};
-
-int ehca_init_pd_cache(void);
-void ehca_cleanup_pd_cache(void);
-int ehca_init_cq_cache(void);
-void ehca_cleanup_cq_cache(void);
-int ehca_init_qp_cache(void);
-void ehca_cleanup_qp_cache(void);
-int ehca_init_av_cache(void);
-void ehca_cleanup_av_cache(void);
-int ehca_init_mrmw_cache(void);
-void ehca_cleanup_mrmw_cache(void);
-int ehca_init_small_qp_cache(void);
-void ehca_cleanup_small_qp_cache(void);
-
-extern rwlock_t ehca_qp_idr_lock;
-extern rwlock_t ehca_cq_idr_lock;
-extern struct idr ehca_qp_idr;
-extern struct idr ehca_cq_idr;
-extern spinlock_t shca_list_lock;
-
-extern int ehca_static_rate;
-extern int ehca_port_act_time;
-extern bool ehca_use_hp_mr;
-extern bool ehca_scaling_code;
-extern int ehca_lock_hcalls;
-extern int ehca_nr_ports;
-extern int ehca_max_cq;
-extern int ehca_max_qp;
-
-struct ipzu_queue_resp {
-	u32 qe_size;      /* queue entry size */
-	u32 act_nr_of_sg;
-	u32 queue_length; /* queue length allocated in bytes */
-	u32 pagesize;
-	u32 toggle_state;
-	u32 offset; /* save offset within a page for small_qp */
-};
-
-struct ehca_create_cq_resp {
-	u32 cq_number;
-	u32 token;
-	struct ipzu_queue_resp ipz_queue;
-	u32 fw_handle_ofs;
-	u32 dummy;
-};
-
-struct ehca_create_qp_resp {
-	u32 qp_num;
-	u32 token;
-	u32 qp_type;
-	u32 ext_type;
-	u32 qkey;
-	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
-	u32 real_qp_num;
-	u32 fw_handle_ofs;
-	u32 dummy;
-	struct ipzu_queue_resp ipz_squeue;
-	struct ipzu_queue_resp ipz_rqueue;
-};
-
-struct ehca_alloc_cq_parms {
-	u32 nr_cqe;
-	u32 act_nr_of_entries;
-	u32 act_pages;
-	struct ipz_eq_handle eq_handle;
-};
-
-enum ehca_service_type {
-	ST_RC  = 0,
-	ST_UC  = 1,
-	ST_RD  = 2,
-	ST_UD  = 3,
-};
-
-enum ehca_ll_comp_flags {
-	LLQP_SEND_COMP = 0x20,
-	LLQP_RECV_COMP = 0x40,
-	LLQP_COMP_MASK = 0x60,
-};
-
-struct ehca_alloc_queue_parms {
-	/* input parameters */
-	int max_wr;
-	int max_sge;
-	int page_size;
-	int is_small;
-
-	/* output parameters */
-	u16 act_nr_wqes;
-	u8  act_nr_sges;
-	u32 queue_size; /* bytes for small queues, pages otherwise */
-};
-
-struct ehca_alloc_qp_parms {
-	struct ehca_alloc_queue_parms squeue;
-	struct ehca_alloc_queue_parms rqueue;
-
-	/* input parameters */
-	enum ehca_service_type servicetype;
-	int qp_storage;
-	int sigtype;
-	enum ehca_ext_qp_type ext_type;
-	enum ehca_ll_comp_flags ll_comp_flags;
-	int ud_av_l_key_ctl;
-
-	u32 token;
-	struct ipz_eq_handle eq_handle;
-	struct ipz_pd pd;
-	struct ipz_cq_handle send_cq_handle, recv_cq_handle;
-
-	u32 srq_qpn, srq_token, srq_limit;
-
-	/* output parameters */
-	u32 real_qp_num;
-	struct ipz_qp_handle qp_handle;
-	struct h_galpas galpas;
-};
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
deleted file mode 100644
index 689c35786dd2..000000000000
--- a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  pSeries interface definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_CLASSES_PSERIES_H__
-#define __EHCA_CLASSES_PSERIES_H__
-
-#include "hcp_phyp.h"
-#include "ipz_pt_fn.h"
-
-
-struct ehca_pfqp {
-	struct ipz_qpt sqpt;
-	struct ipz_qpt rqpt;
-};
-
-struct ehca_pfcq {
-	struct ipz_qpt qpt;
-	u32 cqnr;
-};
-
-struct ehca_pfeq {
-	struct ipz_qpt qpt;
-	struct h_galpa galpa;
-	u32 eqnr;
-};
-
-struct ipz_adapter_handle {
-	u64 handle;
-};
-
-struct ipz_cq_handle {
-	u64 handle;
-};
-
-struct ipz_eq_handle {
-	u64 handle;
-};
-
-struct ipz_qp_handle {
-	u64 handle;
-};
-struct ipz_mrmw_handle {
-	u64 handle;
-};
-
-struct ipz_pd {
-	u32 value;
-};
-
-struct hcp_modify_qp_control_block {
-	u32 qkey;                      /* 00 */
-	u32 rdd;                       /* reliable datagram domain */
-	u32 send_psn;                  /* 02 */
-	u32 receive_psn;               /* 03 */
-	u32 prim_phys_port;            /* 04 */
-	u32 alt_phys_port;             /* 05 */
-	u32 prim_p_key_idx;            /* 06 */
-	u32 alt_p_key_idx;             /* 07 */
-	u32 rdma_atomic_ctrl;          /* 08 */
-	u32 qp_state;                  /* 09 */
-	u32 reserved_10;               /* 10 */
-	u32 rdma_nr_atomic_resp_res;   /* 11 */
-	u32 path_migration_state;      /* 12 */
-	u32 rdma_atomic_outst_dest_qp; /* 13 */
-	u32 dest_qp_nr;                /* 14 */
-	u32 min_rnr_nak_timer_field;   /* 15 */
-	u32 service_level;             /* 16 */
-	u32 send_grh_flag;             /* 17 */
-	u32 retry_count;               /* 18 */
-	u32 timeout;                   /* 19 */
-	u32 path_mtu;                  /* 20 */
-	u32 max_static_rate;           /* 21 */
-	u32 dlid;                      /* 22 */
-	u32 rnr_retry_count;           /* 23 */
-	u32 source_path_bits;          /* 24 */
-	u32 traffic_class;             /* 25 */
-	u32 hop_limit;                 /* 26 */
-	u32 source_gid_idx;            /* 27 */
-	u32 flow_label;                /* 28 */
-	u32 reserved_29;               /* 29 */
-	union {                        /* 30 */
-		u64 dw[2];
-		u8 byte[16];
-	} dest_gid;
-	u32 service_level_al;          /* 34 */
-	u32 send_grh_flag_al;          /* 35 */
-	u32 retry_count_al;            /* 36 */
-	u32 timeout_al;                /* 37 */
-	u32 max_static_rate_al;        /* 38 */
-	u32 dlid_al;                   /* 39 */
-	u32 rnr_retry_count_al;        /* 40 */
-	u32 source_path_bits_al;       /* 41 */
-	u32 traffic_class_al;          /* 42 */
-	u32 hop_limit_al;              /* 43 */
-	u32 source_gid_idx_al;         /* 44 */
-	u32 flow_label_al;             /* 45 */
-	u32 reserved_46;               /* 46 */
-	u32 reserved_47;               /* 47 */
-	union {                        /* 48 */
-		u64 dw[2];
-		u8 byte[16];
-	} dest_gid_al;
-	u32 max_nr_outst_send_wr;      /* 52 */
-	u32 max_nr_outst_recv_wr;      /* 53 */
-	u32 disable_ete_credit_check;  /* 54 */
-	u32 qp_number;                 /* 55 */
-	u64 send_queue_handle;         /* 56 */
-	u64 recv_queue_handle;         /* 58 */
-	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
-	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
-	u32 qp_enable;                 /* 62 */
-	u32 curr_srq_limit;            /* 63 */
-	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
-	u64 shared_rq_hndl;            /* 66 */
-	u64 trigg_doorbell_qp_hndl;    /* 68 */
-	u32 reserved_70_127[58];       /* 70 */
-};
-
-#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM( 0,  0)
-#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM( 2,  2)
-#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM( 3,  3)
-#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM( 4,  4)
-#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM( 5,  5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM( 6,  6)
-#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24, 31)
-#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM( 7,  7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM( 8,  8)
-#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM( 9,  9)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11, 11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12, 12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13, 13)
-#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14, 14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15, 15)
-#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16, 16)
-#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17, 17)
-#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18, 18)
-#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19, 19)
-#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20, 20)
-#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22, 22)
-#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23, 23)
-#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24, 24)
-#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25, 25)
-#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26, 26)
-#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27, 27)
-#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28, 28)
-#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30, 30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31, 31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32, 32)
-#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33, 33)
-#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34, 34)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36, 36)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37, 37)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38, 38)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39, 39)
-#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40, 40)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41, 41)
-#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42, 42)
-#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44, 44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47, 47)
-#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48, 48)
-#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49, 49)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50, 50)
-#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51, 51)
-
-#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c
deleted file mode 100644
index 1aa7931fe860..000000000000
--- a/drivers/staging/rdma/ehca/ehca_cq.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Completion queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "hcp_if.h"
-
-static struct kmem_cache *cq_cache;
-
-int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
-{
-	unsigned int qp_num = qp->real_qp_num;
-	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-
-	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
-		 cq->cq_number, qp_num);
-
-	return 0;
-}
-
-int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
-{
-	int ret = -EINVAL;
-	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-	struct hlist_node *iter;
-	struct ehca_qp *qp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-	hlist_for_each(iter, &cq->qp_hashtab[key]) {
-		qp = hlist_entry(iter, struct ehca_qp, list_entries);
-		if (qp->real_qp_num == real_qp_num) {
-			hlist_del(iter);
-			ehca_dbg(cq->ib_cq.device,
-				 "removed qp from cq .cq_num=%x real_qp_num=%x",
-				 cq->cq_number, real_qp_num);
-			ret = 0;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-	if (ret)
-		ehca_err(cq->ib_cq.device,
-			 "qp not found cq_num=%x real_qp_num=%x",
-			 cq->cq_number, real_qp_num);
-
-	return ret;
-}
-
-struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
-{
-	struct ehca_qp *ret = NULL;
-	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
-	struct hlist_node *iter;
-	struct ehca_qp *qp;
-	hlist_for_each(iter, &cq->qp_hashtab[key]) {
-		qp = hlist_entry(iter, struct ehca_qp, list_entries);
-		if (qp->real_qp_num == real_qp_num) {
-			ret = qp;
-			break;
-		}
-	}
-	return ret;
-}
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *context,
-			     struct ib_udata *udata)
-{
-	int cqe = attr->cqe;
-	static const u32 additional_cqe = 20;
-	struct ib_cq *cq;
-	struct ehca_cq *my_cq;
-	struct ehca_shca *shca =
-		container_of(device, struct ehca_shca, ib_device);
-	struct ipz_adapter_handle adapter_handle;
-	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
-	struct h_galpa gal;
-	void *vpage;
-	u32 counter;
-	u64 rpage, cqx_fec, h_ret;
-	int rc, i;
-	unsigned long flags;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
-		return ERR_PTR(-EINVAL);
-
-	if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
-		ehca_err(device, "Unable to create CQ, max number of %i "
-			"CQs reached.", shca->max_num_cqs);
-		ehca_err(device, "To increase the maximum number of CQs "
-			"use the number_of_cqs module parameter.\n");
-		return ERR_PTR(-ENOSPC);
-	}
-
-	my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
-	if (!my_cq) {
-		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
-			 device);
-		atomic_dec(&shca->num_cqs);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
-
-	spin_lock_init(&my_cq->spinlock);
-	spin_lock_init(&my_cq->cb_lock);
-	spin_lock_init(&my_cq->task_lock);
-	atomic_set(&my_cq->nr_events, 0);
-	init_waitqueue_head(&my_cq->wait_completion);
-
-	cq = &my_cq->ib_cq;
-
-	adapter_handle = shca->ipz_hca_handle;
-	param.eq_handle = shca->eq.ipz_eq_handle;
-
-	idr_preload(GFP_KERNEL);
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	rc = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-	idr_preload_end();
-
-	if (rc < 0) {
-		cq = ERR_PTR(-ENOMEM);
-		ehca_err(device, "Can't allocate new idr entry. device=%p",
-			 device);
-		goto create_cq_exit1;
-	}
-	my_cq->token = rc;
-
-	/*
-	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
-	 * for receiving errors CQEs.
-	 */
-	param.nr_cqe = cqe + additional_cqe;
-	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-			 "h_ret=%lli device=%p", h_ret, device);
-		cq = ERR_PTR(ehca2ib_return_code(h_ret));
-		goto create_cq_exit2;
-	}
-
-	rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
-				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
-	if (!rc) {
-		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
-			 rc, device);
-		cq = ERR_PTR(-EINVAL);
-		goto create_cq_exit3;
-	}
-
-	for (counter = 0; counter < param.act_pages; counter++) {
-		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-		if (!vpage) {
-			ehca_err(device, "ipz_qpageit_get_inc() "
-				 "returns NULL device=%p", device);
-			cq = ERR_PTR(-EAGAIN);
-			goto create_cq_exit4;
-		}
-		rpage = __pa(vpage);
-
-		h_ret = hipz_h_register_rpage_cq(adapter_handle,
-						 my_cq->ipz_cq_handle,
-						 &my_cq->pf,
-						 0,
-						 0,
-						 rpage,
-						 1,
-						 my_cq->galpas.
-						 kernel);
-
-		if (h_ret < H_SUCCESS) {
-			ehca_err(device, "hipz_h_register_rpage_cq() failed "
-				 "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
-				 "act_pages=%i", my_cq, my_cq->cq_number,
-				 h_ret, counter, param.act_pages);
-			cq = ERR_PTR(-EINVAL);
-			goto create_cq_exit4;
-		}
-
-		if (counter == (param.act_pages - 1)) {
-			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
-			if ((h_ret != H_SUCCESS) || vpage) {
-				ehca_err(device, "Registration of pages not "
-					 "complete ehca_cq=%p cq_num=%x "
-					 "h_ret=%lli", my_cq, my_cq->cq_number,
-					 h_ret);
-				cq = ERR_PTR(-EAGAIN);
-				goto create_cq_exit4;
-			}
-		} else {
-			if (h_ret != H_PAGE_REGISTERED) {
-				ehca_err(device, "Registration of page failed "
-					 "ehca_cq=%p cq_num=%x h_ret=%lli "
-					 "counter=%i act_pages=%i",
-					 my_cq, my_cq->cq_number,
-					 h_ret, counter, param.act_pages);
-				cq = ERR_PTR(-ENOMEM);
-				goto create_cq_exit4;
-			}
-		}
-	}
-
-	ipz_qeit_reset(&my_cq->ipz_queue);
-
-	gal = my_cq->galpas.kernel;
-	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
-	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
-		 my_cq, my_cq->cq_number, cqx_fec);
-
-	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
-		param.act_nr_of_entries - additional_cqe;
-	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
-
-	for (i = 0; i < QP_HASHTAB_LEN; i++)
-		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
-
-	INIT_LIST_HEAD(&my_cq->sqp_err_list);
-	INIT_LIST_HEAD(&my_cq->rqp_err_list);
-
-	if (context) {
-		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
-		struct ehca_create_cq_resp resp;
-		memset(&resp, 0, sizeof(resp));
-		resp.cq_number = my_cq->cq_number;
-		resp.token = my_cq->token;
-		resp.ipz_queue.qe_size = ipz_queue->qe_size;
-		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
-		resp.ipz_queue.queue_length = ipz_queue->queue_length;
-		resp.ipz_queue.pagesize = ipz_queue->pagesize;
-		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
-		resp.fw_handle_ofs = (u32)
-			(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
-		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
-			ehca_err(device, "Copy to udata failed.");
-			cq = ERR_PTR(-EFAULT);
-			goto create_cq_exit4;
-		}
-	}
-
-	return cq;
-
-create_cq_exit4:
-	ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-
-create_cq_exit3:
-	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-	if (h_ret != H_SUCCESS)
-		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-			 "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
-
-create_cq_exit2:
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	idr_remove(&ehca_cq_idr, my_cq->token);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-create_cq_exit1:
-	kmem_cache_free(cq_cache, my_cq);
-
-	atomic_dec(&shca->num_cqs);
-	return cq;
-}
-
-int ehca_destroy_cq(struct ib_cq *cq)
-{
-	u64 h_ret;
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int cq_num = my_cq->cq_number;
-	struct ib_device *device = cq->device;
-	struct ehca_shca *shca = container_of(device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	unsigned long flags;
-
-	if (cq->uobject) {
-		if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
-			ehca_err(device, "Resources still referenced in "
-				 "user space cq_num=%x", my_cq->cq_number);
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * remove the CQ from the idr first to make sure
-	 * no more interrupt tasklets will touch this CQ
-	 */
-	write_lock_irqsave(&ehca_cq_idr_lock, flags);
-	idr_remove(&ehca_cq_idr, my_cq->token);
-	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
-	/* now wait until all pending events have completed */
-	wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
-
-	/* nobody's using our CQ any longer -- we can destroy it */
-	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
-	if (h_ret == H_R_STATE) {
-		/* cq in err: read err data and destroy it forcibly */
-		ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
-			 "state. Try to delete it forcibly.",
-			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
-		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
-		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
-		if (h_ret == H_SUCCESS)
-			ehca_dbg(device, "cq_num=%x deleted successfully.",
-				 cq_num);
-	}
-	if (h_ret != H_SUCCESS) {
-		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
-			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
-		return ehca2ib_return_code(h_ret);
-	}
-	ipz_queue_dtor(NULL, &my_cq->ipz_queue);
-	kmem_cache_free(cq_cache, my_cq);
-
-	atomic_dec(&shca->num_cqs);
-	return 0;
-}
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-	/* TODO: proper resize needs to be done */
-	ehca_err(cq->device, "not implemented yet");
-
-	return -EFAULT;
-}
-
-int ehca_init_cq_cache(void)
-{
-	cq_cache = kmem_cache_create("ehca_cache_cq",
-				     sizeof(struct ehca_cq), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!cq_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_cq_cache(void)
-{
-	kmem_cache_destroy(cq_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c
deleted file mode 100644
index 90da6747d395..000000000000
--- a/drivers/staging/rdma/ehca/ehca_eq.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Event queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_qes.h"
-#include "hcp_if.h"
-#include "ipz_pt_fn.h"
-
-int ehca_create_eq(struct ehca_shca *shca,
-		   struct ehca_eq *eq,
-		   const enum ehca_eq_type type, const u32 length)
-{
-	int ret;
-	u64 h_ret;
-	u32 nr_pages;
-	u32 i;
-	void *vpage;
-	struct ib_device *ib_dev = &shca->ib_device;
-
-	spin_lock_init(&eq->spinlock);
-	spin_lock_init(&eq->irq_spinlock);
-	eq->is_initialized = 0;
-
-	if (type != EHCA_EQ && type != EHCA_NEQ) {
-		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
-		return -EINVAL;
-	}
-	if (!length) {
-		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
-		return -EINVAL;
-	}
-
-	h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-					 &eq->pf,
-					 type,
-					 length,
-					 &eq->ipz_eq_handle,
-					 &eq->length,
-					 &nr_pages, &eq->ist);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
-		return -EINVAL;
-	}
-
-	ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
-			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
-	if (!ret) {
-		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
-		goto create_eq_exit1;
-	}
-
-	for (i = 0; i < nr_pages; i++) {
-		u64 rpage;
-
-		vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-		if (!vpage)
-			goto create_eq_exit2;
-
-		rpage = __pa(vpage);
-		h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-						 eq->ipz_eq_handle,
-						 &eq->pf,
-						 0, 0, rpage, 1);
-
-		if (i == (nr_pages - 1)) {
-			/* last page */
-			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-			if (h_ret != H_SUCCESS || vpage)
-				goto create_eq_exit2;
-		} else {
-			if (h_ret != H_PAGE_REGISTERED)
-				goto create_eq_exit2;
-		}
-	}
-
-	ipz_qeit_reset(&eq->ipz_queue);
-
-	/* register interrupt handlers and initialize work queues */
-	if (type == EHCA_EQ) {
-		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
-
-		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-					  0, "ehca_eq",
-					  (void *)shca);
-		if (ret < 0)
-			ehca_err(ib_dev, "Can't map interrupt handler.");
-	} else if (type == EHCA_NEQ) {
-		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
-
-		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-					  0, "ehca_neq",
-					  (void *)shca);
-		if (ret < 0)
-			ehca_err(ib_dev, "Can't map interrupt handler.");
-	}
-
-	eq->is_initialized = 1;
-
-	return 0;
-
-create_eq_exit2:
-	ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-create_eq_exit1:
-	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-	return -EINVAL;
-}
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-	unsigned long flags;
-	void *eqe;
-
-	spin_lock_irqsave(&eq->spinlock, flags);
-	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
-	spin_unlock_irqrestore(&eq->spinlock, flags);
-
-	return eqe;
-}
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
-{
-	unsigned long flags;
-	u64 h_ret;
-
-	ibmebus_free_irq(eq->ist, (void *)shca);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	eq->is_initialized = 0;
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	tasklet_kill(&eq->interrupt_task);
-
-	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
-
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't free EQ resources.");
-		return -EINVAL;
-	}
-	ipz_queue_dtor(NULL, &eq->ipz_queue);
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c
deleted file mode 100644
index e8b1bb65797a..000000000000
--- a/drivers/staging/rdma/ehca/ehca_hca.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HCA query functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/gfp.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-static unsigned int limit_uint(unsigned int value)
-{
-	return min_t(unsigned int, value, INT_MAX);
-}
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-		      struct ib_udata *uhw)
-{
-	int i, ret = 0;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_hca *rblock;
-
-	static const u32 cap_mapping[] = {
-		IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
-		IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
-		IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
-		IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
-		IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
-		IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
-		IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
-		IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
-		IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
-		IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
-		IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
-	};
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query device properties");
-		ret = -EINVAL;
-		goto query_device1;
-	}
-
-	memset(props, 0, sizeof(struct ib_device_attr));
-	props->page_size_cap   = shca->hca_cap_mr_pgsize;
-	props->fw_ver          = rblock->hw_ver;
-	props->max_mr_size     = rblock->max_mr_size;
-	props->vendor_id       = rblock->vendor_id >> 8;
-	props->vendor_part_id  = rblock->vendor_part_id >> 16;
-	props->hw_ver          = rblock->hw_ver;
-	props->max_qp          = limit_uint(rblock->max_qp);
-	props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
-	props->max_sge         = limit_uint(rblock->max_sge);
-	props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
-	props->max_cq          = limit_uint(rblock->max_cq);
-	props->max_cqe         = limit_uint(rblock->max_cqe);
-	props->max_mr          = limit_uint(rblock->max_mr);
-	props->max_mw          = limit_uint(rblock->max_mw);
-	props->max_pd          = limit_uint(rblock->max_pd);
-	props->max_ah          = limit_uint(rblock->max_ah);
-	props->max_ee          = limit_uint(rblock->max_rd_ee_context);
-	props->max_rdd         = limit_uint(rblock->max_rd_domain);
-	props->max_fmr         = limit_uint(rblock->max_mr);
-	props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
-	props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
-	props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
-	props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
-	props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
-
-	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-		props->max_srq         = limit_uint(props->max_qp);
-		props->max_srq_wr      = limit_uint(props->max_qp_wr);
-		props->max_srq_sge     = 3;
-	}
-
-	props->max_pkeys           = 16;
-	/* Some FW versions say 0 here; insert sensible value in that case */
-	props->local_ca_ack_delay  = rblock->local_ca_ack_delay ?
-		min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
-	props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
-	props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
-	props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
-	props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
-	props->max_total_mcast_qp_attach
-		= limit_uint(rblock->max_total_mcast_qp_attach);
-
-	/* translate device capabilities */
-	props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
-		IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
-	for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
-		if (rblock->hca_cap_indicators & cap_mapping[i + 1])
-			props->device_cap_flags |= cap_mapping[i];
-
-query_device1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
-{
-	switch (fw_mtu) {
-	case 0x1:
-		return IB_MTU_256;
-	case 0x2:
-		return IB_MTU_512;
-	case 0x3:
-		return IB_MTU_1024;
-	case 0x4:
-		return IB_MTU_2048;
-	case 0x5:
-		return IB_MTU_4096;
-	default:
-		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-			 fw_mtu);
-		return 0;
-	}
-}
-
-static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
-{
-	switch (vl_cap) {
-	case 0x1:
-		return 1;
-	case 0x2:
-		return 2;
-	case 0x3:
-		return 4;
-	case 0x4:
-		return 8;
-	case 0x5:
-		return 15;
-	default:
-		ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
-			 vl_cap);
-		return 0;
-	}
-}
-
-int ehca_query_port(struct ib_device *ibdev,
-		    u8 port, struct ib_port_attr *props)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_port *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_port1;
-	}
-
-	memset(props, 0, sizeof(struct ib_port_attr));
-
-	props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
-	props->port_cap_flags  = rblock->capability_mask;
-	props->gid_tbl_len     = rblock->gid_tbl_len;
-	if (rblock->max_msg_sz)
-		props->max_msg_sz      = rblock->max_msg_sz;
-	else
-		props->max_msg_sz      = 0x1 << 31;
-	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
-	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
-	props->pkey_tbl_len    = rblock->pkey_tbl_len;
-	props->lid             = rblock->lid;
-	props->sm_lid          = rblock->sm_lid;
-	props->lmc             = rblock->lmc;
-	props->sm_sl           = rblock->sm_sl;
-	props->subnet_timeout  = rblock->subnet_timeout;
-	props->init_type_reply = rblock->init_type_reply;
-	props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
-
-	if (rblock->state && rblock->phys_width) {
-		props->phys_state      = rblock->phys_pstate;
-		props->state           = rblock->phys_state;
-		props->active_width    = rblock->phys_width;
-		props->active_speed    = rblock->phys_speed;
-	} else {
-		/* old firmware releases don't report physical
-		 * port info, so use default values
-		 */
-		props->phys_state      = 5;
-		props->state           = rblock->state;
-		props->active_width    = IB_WIDTH_12X;
-		props->active_speed    = IB_SPEED_SDR;
-	}
-
-query_port1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_sma_attr(struct ehca_shca *shca,
-			u8 port, struct ehca_sma_attr *attr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct hipz_query_port *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_sma_attr1;
-	}
-
-	memset(attr, 0, sizeof(struct ehca_sma_attr));
-
-	attr->lid    = rblock->lid;
-	attr->lmc    = rblock->lmc;
-	attr->sm_sl  = rblock->sm_sl;
-	attr->sm_lid = rblock->sm_lid;
-
-	attr->pkey_tbl_len = rblock->pkey_tbl_len;
-	memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
-
-query_sma_attr1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca;
-	struct hipz_query_port *rblock;
-
-	shca = container_of(ibdev, struct ehca_shca, ib_device);
-	if (index > 16) {
-		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-		return -EINVAL;
-	}
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_pkey1;
-	}
-
-	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
-
-query_pkey1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port,
-		   int index, union ib_gid *gid)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
-					      ib_device);
-	struct hipz_query_port *rblock;
-
-	if (index < 0 || index > 255) {
-		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
-		return -EINVAL;
-	}
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto query_gid1;
-	}
-
-	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
-	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
-
-query_gid1:
-	ehca_free_fw_ctrlblock(rblock);
-
-	return ret;
-}
-
-static const u32 allowed_port_caps = (
-	IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
-	IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
-	IB_PORT_VENDOR_CLASS_SUP);
-
-int ehca_modify_port(struct ib_device *ibdev,
-		     u8 port, int port_modify_mask,
-		     struct ib_port_modify *props)
-{
-	int ret = 0;
-	struct ehca_shca *shca;
-	struct hipz_query_port *rblock;
-	u32 cap;
-	u64 hret;
-
-	shca = container_of(ibdev, struct ehca_shca, ib_device);
-	if ((props->set_port_cap_mask | props->clr_port_cap_mask)
-	    & ~allowed_port_caps) {
-		ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
-			 "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
-			 props->clr_port_cap_mask, allowed_port_caps);
-		return -EINVAL;
-	}
-
-	if (mutex_lock_interruptible(&shca->modify_mutex))
-		return -ERESTARTSYS;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
-		ret = -ENOMEM;
-		goto modify_port1;
-	}
-
-	hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
-	if (hret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query port properties");
-		ret = -EINVAL;
-		goto modify_port2;
-	}
-
-	cap = (rblock->capability_mask | props->set_port_cap_mask)
-		& ~props->clr_port_cap_mask;
-
-	hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
-				  cap, props->init_type, port_modify_mask);
-	if (hret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Modify port failed  h_ret=%lli",
-			 hret);
-		ret = -EINVAL;
-	}
-
-modify_port2:
-	ehca_free_fw_ctrlblock(rblock);
-
-modify_port1:
-	mutex_unlock(&shca->modify_mutex);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c
deleted file mode 100644
index 8615d7cf7e01..000000000000
--- a/drivers/staging/rdma/ehca/ehca_irq.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Functions for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/smpboot.h>
-
-#include "ehca_classes.h"
-#include "ehca_irq.h"
-#include "ehca_iverbs.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
-#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
-#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
-#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
-#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
-#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
-
-#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
-#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
-#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
-#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
-#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
-
-#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
-#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
-
-static void queue_comp_task(struct ehca_cq *__cq);
-
-static struct ehca_comp_pool *pool;
-
-static inline void comp_event_callback(struct ehca_cq *cq)
-{
-	if (!cq->ib_cq.comp_handler)
-		return;
-
-	spin_lock(&cq->cb_lock);
-	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
-	spin_unlock(&cq->cb_lock);
-
-	return;
-}
-
-static void print_error_data(struct ehca_shca *shca, void *data,
-			     u64 *rblock, int length)
-{
-	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
-	u64 resource = rblock[1];
-
-	switch (type) {
-	case 0x1: /* Queue Pair */
-	{
-		struct ehca_qp *qp = (struct ehca_qp *)data;
-
-		/* only print error data if AER is set */
-		if (rblock[6] == 0)
-			return;
-
-		ehca_err(&shca->ib_device,
-			 "QP 0x%x (resource=%llx) has errors.",
-			 qp->ib_qp.qp_num, resource);
-		break;
-	}
-	case 0x4: /* Completion Queue */
-	{
-		struct ehca_cq *cq = (struct ehca_cq *)data;
-
-		ehca_err(&shca->ib_device,
-			 "CQ 0x%x (resource=%llx) has errors.",
-			 cq->cq_number, resource);
-		break;
-	}
-	default:
-		ehca_err(&shca->ib_device,
-			 "Unknown error type: %llx on %s.",
-			 type, shca->ib_device.name);
-		break;
-	}
-
-	ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
-	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
-		 "---------------------------------------------------");
-	ehca_dmp(rblock, length, "resource=%llx", resource);
-	ehca_err(&shca->ib_device, "EHCA ----- error data end "
-		 "----------------------------------------------------");
-
-	return;
-}
-
-int ehca_error_data(struct ehca_shca *shca, void *data,
-		    u64 resource)
-{
-
-	unsigned long ret;
-	u64 *rblock;
-	unsigned long block_count;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
-		ret = -ENOMEM;
-		goto error_data1;
-	}
-
-	/* rblock must be 4K aligned and should be 4K large */
-	ret = hipz_h_error_data(shca->ipz_hca_handle,
-				resource,
-				rblock,
-				&block_count);
-
-	if (ret == H_R_STATE)
-		ehca_err(&shca->ib_device,
-			 "No error data is available: %llx.", resource);
-	else if (ret == H_SUCCESS) {
-		int length;
-
-		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
-
-		if (length > EHCA_PAGESIZE)
-			length = EHCA_PAGESIZE;
-
-		print_error_data(shca, data, rblock, length);
-	} else
-		ehca_err(&shca->ib_device,
-			 "Error data could not be fetched: %llx", resource);
-
-	ehca_free_fw_ctrlblock(rblock);
-
-error_data1:
-	return ret;
-
-}
-
-static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
-			      enum ib_event_type event_type)
-{
-	struct ib_event event;
-
-	/* PATH_MIG without the QP ever having been armed is false alarm */
-	if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
-		return;
-
-	event.device = &shca->ib_device;
-	event.event = event_type;
-
-	if (qp->ext_type == EQPT_SRQ) {
-		if (!qp->ib_srq.event_handler)
-			return;
-
-		event.element.srq = &qp->ib_srq;
-		qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
-	} else {
-		if (!qp->ib_qp.event_handler)
-			return;
-
-		event.element.qp = &qp->ib_qp;
-		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
-	}
-}
-
-static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
-			      enum ib_event_type event_type, int fatal)
-{
-	struct ehca_qp *qp;
-	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
-
-	read_lock(&ehca_qp_idr_lock);
-	qp = idr_find(&ehca_qp_idr, token);
-	if (qp)
-		atomic_inc(&qp->nr_events);
-	read_unlock(&ehca_qp_idr_lock);
-
-	if (!qp)
-		return;
-
-	if (fatal)
-		ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
-
-	dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
-			  IB_EVENT_SRQ_ERR : event_type);
-
-	/*
-	 * eHCA only processes one WQE at a time for SRQ base QPs,
-	 * so the last WQE has been processed as soon as the QP enters
-	 * error state.
-	 */
-	if (fatal && qp->ext_type == EQPT_SRQBASE)
-		dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
-
-	if (atomic_dec_and_test(&qp->nr_events))
-		wake_up(&qp->wait_completion);
-	return;
-}
-
-static void cq_event_callback(struct ehca_shca *shca,
-			      u64 eqe)
-{
-	struct ehca_cq *cq;
-	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
-
-	read_lock(&ehca_cq_idr_lock);
-	cq = idr_find(&ehca_cq_idr, token);
-	if (cq)
-		atomic_inc(&cq->nr_events);
-	read_unlock(&ehca_cq_idr_lock);
-
-	if (!cq)
-		return;
-
-	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
-
-	if (atomic_dec_and_test(&cq->nr_events))
-		wake_up(&cq->wait_completion);
-
-	return;
-}
-
-static void parse_identifier(struct ehca_shca *shca, u64 eqe)
-{
-	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
-
-	switch (identifier) {
-	case 0x02: /* path migrated */
-		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
-		break;
-	case 0x03: /* communication established */
-		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
-		break;
-	case 0x04: /* send queue drained */
-		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
-		break;
-	case 0x05: /* QP error */
-	case 0x06: /* QP error */
-		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
-		break;
-	case 0x07: /* CQ error */
-	case 0x08: /* CQ error */
-		cq_event_callback(shca, eqe);
-		break;
-	case 0x09: /* MRMWPTE error */
-		ehca_err(&shca->ib_device, "MRMWPTE error.");
-		break;
-	case 0x0A: /* port event */
-		ehca_err(&shca->ib_device, "Port event.");
-		break;
-	case 0x0B: /* MR access error */
-		ehca_err(&shca->ib_device, "MR access error.");
-		break;
-	case 0x0C: /* EQ error */
-		ehca_err(&shca->ib_device, "EQ error.");
-		break;
-	case 0x0D: /* P/Q_Key mismatch */
-		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
-		break;
-	case 0x10: /* sampling complete */
-		ehca_err(&shca->ib_device, "Sampling complete.");
-		break;
-	case 0x11: /* unaffiliated access error */
-		ehca_err(&shca->ib_device, "Unaffiliated access error.");
-		break;
-	case 0x12: /* path migrating */
-		ehca_err(&shca->ib_device, "Path migrating.");
-		break;
-	case 0x13: /* interface trace stopped */
-		ehca_err(&shca->ib_device, "Interface trace stopped.");
-		break;
-	case 0x14: /* first error capture info available */
-		ehca_info(&shca->ib_device, "First error capture available");
-		break;
-	case 0x15: /* SRQ limit reached */
-		qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
-			 identifier, shca->ib_device.name);
-		break;
-	}
-
-	return;
-}
-
-static void dispatch_port_event(struct ehca_shca *shca, int port_num,
-				enum ib_event_type type, const char *msg)
-{
-	struct ib_event event;
-
-	ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
-	event.device = &shca->ib_device;
-	event.event = type;
-	event.element.port_num = port_num;
-	ib_dispatch_event(&event);
-}
-
-static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
-{
-	struct ehca_sma_attr  new_attr;
-	struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
-
-	ehca_query_sma_attr(shca, port_num, &new_attr);
-
-	if (new_attr.sm_sl  != old_attr->sm_sl ||
-	    new_attr.sm_lid != old_attr->sm_lid)
-		dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
-				    "SM changed");
-
-	if (new_attr.lid != old_attr->lid ||
-	    new_attr.lmc != old_attr->lmc)
-		dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
-				    "LID changed");
-
-	if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
-	    memcmp(new_attr.pkeys, old_attr->pkeys,
-		   sizeof(u16) * new_attr.pkey_tbl_len))
-		dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
-				    "P_Key changed");
-
-	*old_attr = new_attr;
-}
-
-/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
-static int replay_modify_qp(struct ehca_sport *sport)
-{
-	int aqp1_destroyed;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-
-	aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
-
-	if (sport->ibqp_sqp[IB_QPT_SMI])
-		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
-	if (!aqp1_destroyed)
-		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
-
-	spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
-	return aqp1_destroyed;
-}
-
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
-{
-	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
-	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
-	u8 spec_event;
-	struct ehca_sport *sport = &shca->sport[port - 1];
-
-	switch (ec) {
-	case 0x30: /* port availability change */
-		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-			/* only replay modify_qp calls in autodetect mode;
-			 * if AQP1 was destroyed, the port is already down
-			 * again and we can drop the event.
-			 */
-			if (ehca_nr_ports < 0)
-				if (replay_modify_qp(sport))
-					break;
-
-			sport->port_state = IB_PORT_ACTIVE;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-					    "is active");
-			ehca_query_sma_attr(shca, port, &sport->saved_attr);
-		} else {
-			sport->port_state = IB_PORT_DOWN;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-					    "is inactive");
-		}
-		break;
-	case 0x31:
-		/* port configuration change
-		 * disruptive change is caused by
-		 * LID, PKEY or SM change
-		 */
-		if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
-			ehca_warn(&shca->ib_device, "disruptive port "
-				  "%d configuration change", port);
-
-			sport->port_state = IB_PORT_DOWN;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
-					    "is inactive");
-
-			sport->port_state = IB_PORT_ACTIVE;
-			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
-					    "is active");
-			ehca_query_sma_attr(shca, port,
-					    &sport->saved_attr);
-		} else
-			notify_port_conf_change(shca, port);
-		break;
-	case 0x32: /* adapter malfunction */
-		ehca_err(&shca->ib_device, "Adapter malfunction.");
-		break;
-	case 0x33:  /* trace stopped */
-		ehca_err(&shca->ib_device, "Traced stopped.");
-		break;
-	case 0x34: /* util async event */
-		spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
-		if (spec_event == 0x80) /* client reregister required */
-			dispatch_port_event(shca, port,
-					    IB_EVENT_CLIENT_REREGISTER,
-					    "client reregister req.");
-		else
-			ehca_warn(&shca->ib_device, "Unknown util async "
-				  "event %x on port %x", spec_event, port);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
-			 ec, shca->ib_device.name);
-		break;
-	}
-
-	return;
-}
-
-static inline void reset_eq_pending(struct ehca_cq *cq)
-{
-	u64 CQx_EP;
-	struct h_galpa gal = cq->galpas.kernel;
-
-	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
-	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
-
-	return;
-}
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-	tasklet_hi_schedule(&shca->neq.interrupt_task);
-
-	return IRQ_HANDLED;
-}
-
-void ehca_tasklet_neq(unsigned long data)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)data;
-	struct ehca_eqe *eqe;
-	u64 ret;
-
-	eqe = ehca_poll_eq(shca, &shca->neq);
-
-	while (eqe) {
-		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
-			parse_ec(shca, eqe->entry);
-
-		eqe = ehca_poll_eq(shca, &shca->neq);
-	}
-
-	ret = hipz_h_reset_event(shca->ipz_hca_handle,
-				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
-
-	if (ret != H_SUCCESS)
-		ehca_err(&shca->ib_device, "Can't clear notification events.");
-
-	return;
-}
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
-
-	tasklet_hi_schedule(&shca->eq.interrupt_task);
-
-	return IRQ_HANDLED;
-}
-
-
-static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
-{
-	u64 eqe_value;
-	u32 token;
-	struct ehca_cq *cq;
-
-	eqe_value = eqe->entry;
-	ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
-	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-		ehca_dbg(&shca->ib_device, "Got completion event");
-		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-		read_lock(&ehca_cq_idr_lock);
-		cq = idr_find(&ehca_cq_idr, token);
-		if (cq)
-			atomic_inc(&cq->nr_events);
-		read_unlock(&ehca_cq_idr_lock);
-		if (cq == NULL) {
-			ehca_err(&shca->ib_device,
-				 "Invalid eqe for non-existing cq token=%x",
-				 token);
-			return;
-		}
-		reset_eq_pending(cq);
-		if (ehca_scaling_code)
-			queue_comp_task(cq);
-		else {
-			comp_event_callback(cq);
-			if (atomic_dec_and_test(&cq->nr_events))
-				wake_up(&cq->wait_completion);
-		}
-	} else {
-		ehca_dbg(&shca->ib_device, "Got non completion event");
-		parse_identifier(shca, eqe_value);
-	}
-}
-
-void ehca_process_eq(struct ehca_shca *shca, int is_irq)
-{
-	struct ehca_eq *eq = &shca->eq;
-	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
-	u64 eqe_value, ret;
-	int eqe_cnt, i;
-	int eq_empty = 0;
-
-	spin_lock(&eq->irq_spinlock);
-	if (is_irq) {
-		const int max_query_cnt = 100;
-		int query_cnt = 0;
-		int int_state = 1;
-		do {
-			int_state = hipz_h_query_int_state(
-				shca->ipz_hca_handle, eq->ist);
-			query_cnt++;
-			iosync();
-		} while (int_state && query_cnt < max_query_cnt);
-		if (unlikely((query_cnt == max_query_cnt)))
-			ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
-				 int_state, query_cnt);
-	}
-
-	/* read out all eqes */
-	eqe_cnt = 0;
-	do {
-		u32 token;
-		eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
-		if (!eqe_cache[eqe_cnt].eqe)
-			break;
-		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
-		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
-			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-			read_lock(&ehca_cq_idr_lock);
-			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
-			if (eqe_cache[eqe_cnt].cq)
-				atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
-			read_unlock(&ehca_cq_idr_lock);
-			if (!eqe_cache[eqe_cnt].cq) {
-				ehca_err(&shca->ib_device,
-					 "Invalid eqe for non-existing cq "
-					 "token=%x", token);
-				continue;
-			}
-		} else
-			eqe_cache[eqe_cnt].cq = NULL;
-		eqe_cnt++;
-	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
-	if (!eqe_cnt) {
-		if (is_irq)
-			ehca_dbg(&shca->ib_device,
-				 "No eqe found for irq event");
-		goto unlock_irq_spinlock;
-	} else if (!is_irq) {
-		ret = hipz_h_eoi(eq->ist);
-		if (ret != H_SUCCESS)
-			ehca_err(&shca->ib_device,
-				 "bad return code EOI -rc = %lld\n", ret);
-		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
-	}
-	if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
-		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
-	/* enable irq for new packets */
-	for (i = 0; i < eqe_cnt; i++) {
-		if (eq->eqe_cache[i].cq)
-			reset_eq_pending(eq->eqe_cache[i].cq);
-	}
-	/* check eq */
-	spin_lock(&eq->spinlock);
-	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
-	spin_unlock(&eq->spinlock);
-	/* call completion handler for cached eqes */
-	for (i = 0; i < eqe_cnt; i++)
-		if (eq->eqe_cache[i].cq) {
-			if (ehca_scaling_code)
-				queue_comp_task(eq->eqe_cache[i].cq);
-			else {
-				struct ehca_cq *cq = eq->eqe_cache[i].cq;
-				comp_event_callback(cq);
-				if (atomic_dec_and_test(&cq->nr_events))
-					wake_up(&cq->wait_completion);
-			}
-		} else {
-			ehca_dbg(&shca->ib_device, "Got non completion event");
-			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
-		}
-	/* poll eq if not empty */
-	if (eq_empty)
-		goto unlock_irq_spinlock;
-	do {
-		struct ehca_eqe *eqe;
-		eqe = ehca_poll_eq(shca, &shca->eq);
-		if (!eqe)
-			break;
-		process_eqe(shca, eqe);
-	} while (1);
-
-unlock_irq_spinlock:
-	spin_unlock(&eq->irq_spinlock);
-}
-
-void ehca_tasklet_eq(unsigned long data)
-{
-	ehca_process_eq((struct ehca_shca*)data, 1);
-}
-
-static int find_next_online_cpu(struct ehca_comp_pool *pool)
-{
-	int cpu;
-	unsigned long flags;
-
-	WARN_ON_ONCE(!in_interrupt());
-	if (ehca_debug_level >= 3)
-		ehca_dmp(cpu_online_mask, cpumask_size(), "");
-
-	spin_lock_irqsave(&pool->last_cpu_lock, flags);
-	do {
-		cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
-		if (cpu >= nr_cpu_ids)
-			cpu = cpumask_first(cpu_online_mask);
-		pool->last_cpu = cpu;
-	} while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
-	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
-
-	return cpu;
-}
-
-static void __queue_comp_task(struct ehca_cq *__cq,
-			      struct ehca_cpu_comp_task *cct,
-			      struct task_struct *thread)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cct->task_lock, flags);
-	spin_lock(&__cq->task_lock);
-
-	if (__cq->nr_callbacks == 0) {
-		__cq->nr_callbacks++;
-		list_add_tail(&__cq->entry, &cct->cq_list);
-		cct->cq_jobs++;
-		wake_up_process(thread);
-	} else
-		__cq->nr_callbacks++;
-
-	spin_unlock(&__cq->task_lock);
-	spin_unlock_irqrestore(&cct->task_lock, flags);
-}
-
-static void queue_comp_task(struct ehca_cq *__cq)
-{
-	int cpu_id;
-	struct ehca_cpu_comp_task *cct;
-	struct task_struct *thread;
-	int cq_jobs;
-	unsigned long flags;
-
-	cpu_id = find_next_online_cpu(pool);
-	BUG_ON(!cpu_online(cpu_id));
-
-	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-	BUG_ON(!cct || !thread);
-
-	spin_lock_irqsave(&cct->task_lock, flags);
-	cq_jobs = cct->cq_jobs;
-	spin_unlock_irqrestore(&cct->task_lock, flags);
-	if (cq_jobs > 0) {
-		cpu_id = find_next_online_cpu(pool);
-		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
-		thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
-		BUG_ON(!cct || !thread);
-	}
-	__queue_comp_task(__cq, cct, thread);
-}
-
-static void run_comp_task(struct ehca_cpu_comp_task *cct)
-{
-	struct ehca_cq *cq;
-
-	while (!list_empty(&cct->cq_list)) {
-		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-		spin_unlock_irq(&cct->task_lock);
-
-		comp_event_callback(cq);
-		if (atomic_dec_and_test(&cq->nr_events))
-			wake_up(&cq->wait_completion);
-
-		spin_lock_irq(&cct->task_lock);
-		spin_lock(&cq->task_lock);
-		cq->nr_callbacks--;
-		if (!cq->nr_callbacks) {
-			list_del_init(cct->cq_list.next);
-			cct->cq_jobs--;
-		}
-		spin_unlock(&cq->task_lock);
-	}
-}
-
-static void comp_task_park(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-	struct ehca_cpu_comp_task *target;
-	struct task_struct *thread;
-	struct ehca_cq *cq, *tmp;
-	LIST_HEAD(list);
-
-	spin_lock_irq(&cct->task_lock);
-	cct->cq_jobs = 0;
-	cct->active = 0;
-	list_splice_init(&cct->cq_list, &list);
-	spin_unlock_irq(&cct->task_lock);
-
-	cpu = find_next_online_cpu(pool);
-	target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
-	spin_lock_irq(&target->task_lock);
-	list_for_each_entry_safe(cq, tmp, &list, entry) {
-		list_del(&cq->entry);
-		__queue_comp_task(cq, target, thread);
-	}
-	spin_unlock_irq(&target->task_lock);
-}
-
-static void comp_task_stop(unsigned int cpu, bool online)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-	spin_lock_irq(&cct->task_lock);
-	cct->cq_jobs = 0;
-	cct->active = 0;
-	WARN_ON(!list_empty(&cct->cq_list));
-	spin_unlock_irq(&cct->task_lock);
-}
-
-static int comp_task_should_run(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
-	return cct->cq_jobs;
-}
-
-static void comp_task(unsigned int cpu)
-{
-	struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
-	int cql_empty;
-
-	spin_lock_irq(&cct->task_lock);
-	cql_empty = list_empty(&cct->cq_list);
-	if (!cql_empty) {
-		__set_current_state(TASK_RUNNING);
-		run_comp_task(cct);
-	}
-	spin_unlock_irq(&cct->task_lock);
-}
-
-static struct smp_hotplug_thread comp_pool_threads = {
-	.thread_should_run	= comp_task_should_run,
-	.thread_fn		= comp_task,
-	.thread_comm		= "ehca_comp/%u",
-	.cleanup		= comp_task_stop,
-	.park			= comp_task_park,
-};
-
-int ehca_create_comp_pool(void)
-{
-	int cpu, ret = -ENOMEM;
-
-	if (!ehca_scaling_code)
-		return 0;
-
-	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
-	if (pool == NULL)
-		return -ENOMEM;
-
-	spin_lock_init(&pool->last_cpu_lock);
-	pool->last_cpu = cpumask_any(cpu_online_mask);
-
-	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
-	if (!pool->cpu_comp_tasks)
-		goto out_pool;
-
-	pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
-	if (!pool->cpu_comp_threads)
-		goto out_tasks;
-
-	for_each_present_cpu(cpu) {
-		struct ehca_cpu_comp_task *cct;
-
-		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-		spin_lock_init(&cct->task_lock);
-		INIT_LIST_HEAD(&cct->cq_list);
-	}
-
-	comp_pool_threads.store = pool->cpu_comp_threads;
-	ret = smpboot_register_percpu_thread(&comp_pool_threads);
-	if (ret)
-		goto out_threads;
-
-	pr_info("eHCA scaling code enabled\n");
-	return ret;
-
-out_threads:
-	free_percpu(pool->cpu_comp_threads);
-out_tasks:
-	free_percpu(pool->cpu_comp_tasks);
-out_pool:
-	kfree(pool);
-	return ret;
-}
-
-void ehca_destroy_comp_pool(void)
-{
-	if (!ehca_scaling_code)
-		return;
-
-	smpboot_unregister_percpu_thread(&comp_pool_threads);
-
-	free_percpu(pool->cpu_comp_threads);
-	free_percpu(pool->cpu_comp_tasks);
-	kfree(pool);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h
deleted file mode 100644
index 5370199f08c7..000000000000
--- a/drivers/staging/rdma/ehca/ehca_irq.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions and structs for EQs, NEQs and interrupts
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Khadija Souissi <souissi@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IRQ_H
-#define __EHCA_IRQ_H
-
-
-struct ehca_shca;
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
-
-irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
-void ehca_tasklet_neq(unsigned long data);
-
-irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
-void ehca_tasklet_eq(unsigned long data);
-void ehca_process_eq(struct ehca_shca *shca, int is_irq);
-
-struct ehca_cpu_comp_task {
-	struct list_head cq_list;
-	spinlock_t task_lock;
-	int cq_jobs;
-	int active;
-};
-
-struct ehca_comp_pool {
-	struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
-	struct task_struct * __percpu *cpu_comp_threads;
-	int last_cpu;
-	spinlock_t last_cpu_lock;
-};
-
-int ehca_create_comp_pool(void);
-void ehca_destroy_comp_pool(void);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
deleted file mode 100644
index cca5933fcda6..000000000000
--- a/drivers/staging/rdma/ehca/ehca_iverbs.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Function definitions for internal functions
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Dietmar Decker <ddecker@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __EHCA_IVERBS_H__
-#define __EHCA_IVERBS_H__
-
-#include "ehca_classes.h"
-
-int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-		      struct ib_udata *uhw);
-
-int ehca_query_port(struct ib_device *ibdev, u8 port,
-		    struct ib_port_attr *props);
-
-enum rdma_protocol_type
-ehca_query_protocol(struct ib_device *device, u8 port_num);
-
-int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
-			struct ehca_sma_attr *attr);
-
-int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
-
-int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
-		   union ib_gid *gid);
-
-int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
-		     struct ib_port_modify *props);
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata);
-
-int ehca_dealloc_pd(struct ib_pd *pd);
-
-struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
-
-int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
-
-int ehca_destroy_ah(struct ib_ah *ah);
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt, int mr_access_flags,
-			       struct ib_udata *udata);
-
-int ehca_dereg_mr(struct ib_mr *mr);
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-int ehca_dealloc_mw(struct ib_mw *mw);
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-			      int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr);
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-		      u64 *page_list, int list_len, u64 iova);
-
-int ehca_unmap_fmr(struct list_head *fmr_list);
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr);
-
-enum ehca_eq_type {
-	EHCA_EQ = 0, /* Event Queue              */
-	EHCA_NEQ     /* Notification Event Queue */
-};
-
-int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
-		   enum ehca_eq_type type, const u32 length);
-
-int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-
-
-struct ib_cq *ehca_create_cq(struct ib_device *device,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *context,
-			     struct ib_udata *udata);
-
-int ehca_destroy_cq(struct ib_cq *cq);
-
-int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
-
-int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata);
-
-int ehca_destroy_qp(struct ib_qp *qp);
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-		   struct ib_udata *udata);
-
-int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
-		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-		   struct ib_send_wr **bad_send_wr);
-
-int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-		   struct ib_recv_wr **bad_recv_wr);
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-		       struct ib_recv_wr *recv_wr,
-		       struct ib_recv_wr **bad_recv_wr);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-			       struct ib_srq_init_attr *init_attr,
-			       struct ib_udata *udata);
-
-int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-
-int ehca_destroy_srq(struct ib_srq *srq);
-
-u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
-		    struct ib_qp_init_attr *qp_init_attr);
-
-int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-					struct ib_udata *udata);
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context);
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in, size_t in_mad_size,
-		     struct ib_mad_hdr *out, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index);
-
-void ehca_poll_eqs(unsigned long data);
-
-int ehca_calc_ipd(struct ehca_shca *shca, int port,
-		  enum ib_rate path_rate, u32 *ipd);
-
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
-
-#ifdef CONFIG_PPC_64K_PAGES
-void *ehca_alloc_fw_ctrlblock(gfp_t flags);
-void ehca_free_fw_ctrlblock(void *ptr);
-#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
-#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
-#endif
-
-void ehca_recover_sqp(struct ib_qp *sqp);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
deleted file mode 100644
index 832f22f40862..000000000000
--- a/drivers/staging/rdma/ehca/ehca_main.c
+++ /dev/null
@@ -1,1118 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  module start stop, hca detection
- *
- *  Authors: Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <linux/slab.h>
-#endif
-
-#include <linux/notifier.h>
-#include <linux/memory.h>
-#include <rdma/ib_mad.h>
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-#define HCAD_VERSION "0029"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
-MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION(HCAD_VERSION);
-
-static bool ehca_open_aqp1    = 0;
-static int ehca_hw_level      = 0;
-static bool ehca_poll_all_eqs = 1;
-
-int ehca_debug_level   = 0;
-int ehca_nr_ports      = -1;
-bool ehca_use_hp_mr    = 0;
-int ehca_port_act_time = 30;
-int ehca_static_rate   = -1;
-bool ehca_scaling_code = 0;
-int ehca_lock_hcalls   = -1;
-int ehca_max_cq        = -1;
-int ehca_max_qp        = -1;
-
-module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
-module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
-module_param_named(hw_level,      ehca_hw_level,      int,  S_IRUGO);
-module_param_named(nr_ports,      ehca_nr_ports,      int,  S_IRUGO);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     bool, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
-module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
-module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
-module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
-module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
-
-MODULE_PARM_DESC(open_aqp1,
-		 "Open AQP1 on startup (default: no)");
-MODULE_PARM_DESC(debug_level,
-		 "Amount of debug output (0: none (default), 1: traces, "
-		 "2: some dumps, 3: lots)");
-MODULE_PARM_DESC(hw_level,
-		 "Hardware level (0: autosensing (default), "
-		 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
-MODULE_PARM_DESC(nr_ports,
-		 "number of connected ports (-1: autodetect (default), "
-		 "1: port one only, 2: two ports)");
-MODULE_PARM_DESC(use_hp_mr,
-		 "Use high performance MRs (default: no)");
-MODULE_PARM_DESC(port_act_time,
-		 "Time to wait for port activation (default: 30 sec)");
-MODULE_PARM_DESC(poll_all_eqs,
-		 "Poll all event queues periodically (default: yes)");
-MODULE_PARM_DESC(static_rate,
-		 "Set permanent static rate (default: no static rate)");
-MODULE_PARM_DESC(scaling_code,
-		 "Enable scaling code (default: no)");
-MODULE_PARM_DESC(lock_hcalls,
-		 "Serialize all hCalls made by the driver "
-		 "(default: autodetect)");
-MODULE_PARM_DESC(number_of_cqs,
-		"Max number of CQs which can be allocated "
-		"(default: autodetect)");
-MODULE_PARM_DESC(number_of_qps,
-		"Max number of QPs which can be allocated "
-		"(default: autodetect)");
-
-DEFINE_RWLOCK(ehca_qp_idr_lock);
-DEFINE_RWLOCK(ehca_cq_idr_lock);
-DEFINE_IDR(ehca_qp_idr);
-DEFINE_IDR(ehca_cq_idr);
-
-static LIST_HEAD(shca_list); /* list of all registered ehcas */
-DEFINE_SPINLOCK(shca_list_lock);
-
-static struct timer_list poll_eqs_timer;
-
-#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache;
-
-void *ehca_alloc_fw_ctrlblock(gfp_t flags)
-{
-	void *ret = kmem_cache_zalloc(ctblk_cache, flags);
-	if (!ret)
-		ehca_gen_err("Out of memory for ctblk");
-	return ret;
-}
-
-void ehca_free_fw_ctrlblock(void *ptr)
-{
-	if (ptr)
-		kmem_cache_free(ctblk_cache, ptr);
-
-}
-#endif
-
-int ehca2ib_return_code(u64 ehca_rc)
-{
-	switch (ehca_rc) {
-	case H_SUCCESS:
-		return 0;
-	case H_RESOURCE:             /* Resource in use */
-	case H_BUSY:
-		return -EBUSY;
-	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
-	case H_CONSTRAINED:          /* resource constraint */
-	case H_NO_MEM:
-		return -ENOMEM;
-	default:
-		return -EINVAL;
-	}
-}
-
-static int ehca_create_slab_caches(void)
-{
-	int ret;
-
-	ret = ehca_init_pd_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create PD SLAB cache.");
-		return ret;
-	}
-
-	ret = ehca_init_cq_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create CQ SLAB cache.");
-		goto create_slab_caches2;
-	}
-
-	ret = ehca_init_qp_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create QP SLAB cache.");
-		goto create_slab_caches3;
-	}
-
-	ret = ehca_init_av_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create AV SLAB cache.");
-		goto create_slab_caches4;
-	}
-
-	ret = ehca_init_mrmw_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create MR&MW SLAB cache.");
-		goto create_slab_caches5;
-	}
-
-	ret = ehca_init_small_qp_cache();
-	if (ret) {
-		ehca_gen_err("Cannot create small queue SLAB cache.");
-		goto create_slab_caches6;
-	}
-
-#ifdef CONFIG_PPC_64K_PAGES
-	ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
-					EHCA_PAGESIZE, H_CB_ALIGNMENT,
-					SLAB_HWCACHE_ALIGN,
-					NULL);
-	if (!ctblk_cache) {
-		ehca_gen_err("Cannot create ctblk SLAB cache.");
-		ehca_cleanup_small_qp_cache();
-		ret = -ENOMEM;
-		goto create_slab_caches6;
-	}
-#endif
-	return 0;
-
-create_slab_caches6:
-	ehca_cleanup_mrmw_cache();
-
-create_slab_caches5:
-	ehca_cleanup_av_cache();
-
-create_slab_caches4:
-	ehca_cleanup_qp_cache();
-
-create_slab_caches3:
-	ehca_cleanup_cq_cache();
-
-create_slab_caches2:
-	ehca_cleanup_pd_cache();
-
-	return ret;
-}
-
-static void ehca_destroy_slab_caches(void)
-{
-	ehca_cleanup_small_qp_cache();
-	ehca_cleanup_mrmw_cache();
-	ehca_cleanup_av_cache();
-	ehca_cleanup_qp_cache();
-	ehca_cleanup_cq_cache();
-	ehca_cleanup_pd_cache();
-#ifdef CONFIG_PPC_64K_PAGES
-	kmem_cache_destroy(ctblk_cache);
-#endif
-}
-
-#define EHCA_HCAAVER  EHCA_BMASK_IBM(32, 39)
-#define EHCA_REVID    EHCA_BMASK_IBM(40, 63)
-
-static struct cap_descr {
-	u64 mask;
-	char *descr;
-} hca_cap_descr[] = {
-	{ HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
-	{ HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
-	{ HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
-	{ HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
-	{ HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
-	{ HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
-	{ HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
-	{ HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
-	{ HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
-	{ HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
-	{ HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
-	{ HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
-	{ HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
-	{ HCA_CAP_SRQ, "HCA_CAP_SRQ" },
-	{ HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
-	{ HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
-	{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
-	{ HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
-};
-
-static int ehca_sense_attributes(struct ehca_shca *shca)
-{
-	int i, ret = 0;
-	u64 h_ret;
-	struct hipz_query_hca *rblock;
-	struct hipz_query_port *port;
-	const char *loc_code;
-
-	static const u32 pgsize_map[] = {
-		HCA_CAP_MR_PGSIZE_4K,  0x1000,
-		HCA_CAP_MR_PGSIZE_64K, 0x10000,
-		HCA_CAP_MR_PGSIZE_1M,  0x100000,
-		HCA_CAP_MR_PGSIZE_16M, 0x1000000,
-	};
-
-	ehca_gen_dbg("Probing adapter %s...",
-		     shca->ofdev->dev.of_node->full_name);
-	loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
-				   NULL);
-	if (loc_code)
-		ehca_gen_dbg(" ... location lode=%s", loc_code);
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_gen_err("Cannot allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
-	if (h_ret != H_SUCCESS) {
-		ehca_gen_err("Cannot query device properties. h_ret=%lli",
-			     h_ret);
-		ret = -EPERM;
-		goto sense_attributes1;
-	}
-
-	if (ehca_nr_ports == 1)
-		shca->num_ports = 1;
-	else
-		shca->num_ports = (u8)rblock->num_ports;
-
-	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
-
-	if (ehca_hw_level == 0) {
-		u32 hcaaver;
-		u32 revid;
-
-		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
-		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
-
-		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
-
-		if (hcaaver == 1) {
-			if (revid <= 3)
-				shca->hw_level = 0x10 | (revid + 1);
-			else
-				shca->hw_level = 0x14;
-		} else if (hcaaver == 2) {
-			if (revid == 0)
-				shca->hw_level = 0x21;
-			else if (revid == 0x10)
-				shca->hw_level = 0x22;
-			else if (revid == 0x20 || revid == 0x21)
-				shca->hw_level = 0x23;
-		}
-
-		if (!shca->hw_level) {
-			ehca_gen_warn("unknown hardware version"
-				      " - assuming default level");
-			shca->hw_level = 0x22;
-		}
-	} else
-		shca->hw_level = ehca_hw_level;
-	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
-
-	shca->hca_cap = rblock->hca_cap_indicators;
-	ehca_gen_dbg(" ... HCA capabilities:");
-	for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
-		if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
-			ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
-
-	/* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
-	 * a firmware property, so it's valid across all adapters
-	 */
-	if (ehca_lock_hcalls == -1)
-		ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
-					shca->hca_cap);
-
-	/* translate supported MR page sizes; always support 4K */
-	shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
-	for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
-		if (rblock->memory_page_size_supported & pgsize_map[i])
-			shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
-
-	/* Set maximum number of CQs and QPs to calculate EQ size */
-	if (shca->max_num_qps == -1)
-		shca->max_num_qps = min_t(int, rblock->max_qp,
-					  EHCA_MAX_NUM_QUEUES);
-	else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
-		ehca_gen_warn("The requested number of QPs is out of range "
-			      "(1 - %i) specified by HW. Value is set to %i",
-			      rblock->max_qp, rblock->max_qp);
-		shca->max_num_qps = rblock->max_qp;
-	}
-
-	if (shca->max_num_cqs == -1)
-		shca->max_num_cqs = min_t(int, rblock->max_cq,
-					  EHCA_MAX_NUM_QUEUES);
-	else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
-		ehca_gen_warn("The requested number of CQs is out of range "
-			      "(1 - %i) specified by HW. Value is set to %i",
-			      rblock->max_cq, rblock->max_cq);
-	}
-
-	/* query max MTU from first port -- it's the same for all ports */
-	port = (struct hipz_query_port *)rblock;
-	h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
-	if (h_ret != H_SUCCESS) {
-		ehca_gen_err("Cannot query port properties. h_ret=%lli",
-			     h_ret);
-		ret = -EPERM;
-		goto sense_attributes1;
-	}
-
-	shca->max_mtu = port->max_mtu;
-
-sense_attributes1:
-	ehca_free_fw_ctrlblock(rblock);
-	return ret;
-}
-
-static int init_node_guid(struct ehca_shca *shca)
-{
-	int ret = 0;
-	struct hipz_query_hca *rblock;
-
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!rblock) {
-		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
-		return -ENOMEM;
-	}
-
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "Can't query device properties");
-		ret = -EINVAL;
-		goto init_node_guid1;
-	}
-
-	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
-
-init_node_guid1:
-	ehca_free_fw_ctrlblock(rblock);
-	return ret;
-}
-
-static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num,
-			       struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = ehca_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-	return 0;
-}
-
-static int ehca_init_device(struct ehca_shca *shca)
-{
-	int ret;
-
-	ret = init_node_guid(shca);
-	if (ret)
-		return ret;
-
-	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
-	shca->ib_device.owner               = THIS_MODULE;
-
-	shca->ib_device.uverbs_abi_ver	    = 8;
-	shca->ib_device.uverbs_cmd_mask	    =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-
-	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
-	shca->ib_device.phys_port_cnt       = shca->num_ports;
-	shca->ib_device.num_comp_vectors    = 1;
-	shca->ib_device.dma_device          = &shca->ofdev->dev;
-	shca->ib_device.query_device        = ehca_query_device;
-	shca->ib_device.query_port          = ehca_query_port;
-	shca->ib_device.query_gid           = ehca_query_gid;
-	shca->ib_device.query_pkey          = ehca_query_pkey;
-	/* shca->in_device.modify_device    = ehca_modify_device    */
-	shca->ib_device.modify_port         = ehca_modify_port;
-	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
-	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
-	shca->ib_device.alloc_pd            = ehca_alloc_pd;
-	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
-	shca->ib_device.create_ah	    = ehca_create_ah;
-	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
-	shca->ib_device.query_ah	    = ehca_query_ah;
-	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
-	shca->ib_device.create_qp	    = ehca_create_qp;
-	shca->ib_device.modify_qp	    = ehca_modify_qp;
-	shca->ib_device.query_qp	    = ehca_query_qp;
-	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
-	shca->ib_device.post_send	    = ehca_post_send;
-	shca->ib_device.post_recv	    = ehca_post_recv;
-	shca->ib_device.create_cq	    = ehca_create_cq;
-	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
-	shca->ib_device.resize_cq	    = ehca_resize_cq;
-	shca->ib_device.poll_cq		    = ehca_poll_cq;
-	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
-	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
-	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
-	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
-	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
-	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
-	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
-	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
-	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
-	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
-	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
-	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
-	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
-	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
-	shca->ib_device.process_mad	    = ehca_process_mad;
-	shca->ib_device.mmap		    = ehca_mmap;
-	shca->ib_device.dma_ops		    = &ehca_dma_mapping_ops;
-	shca->ib_device.get_port_immutable  = ehca_port_immutable;
-
-	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-		shca->ib_device.uverbs_cmd_mask |=
-			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
-			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
-		shca->ib_device.create_srq          = ehca_create_srq;
-		shca->ib_device.modify_srq          = ehca_modify_srq;
-		shca->ib_device.query_srq           = ehca_query_srq;
-		shca->ib_device.destroy_srq         = ehca_destroy_srq;
-		shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
-	}
-
-	return ret;
-}
-
-static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
-{
-	struct ehca_sport *sport = &shca->sport[port - 1];
-	struct ib_cq *ibcq;
-	struct ib_qp *ibqp;
-	struct ib_qp_init_attr qp_init_attr;
-	struct ib_cq_init_attr cq_attr = {};
-	int ret;
-
-	if (sport->ibcq_aqp1) {
-		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
-		return -EPERM;
-	}
-
-	cq_attr.cqe = 10;
-	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1),
-			    &cq_attr);
-	if (IS_ERR(ibcq)) {
-		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
-		return PTR_ERR(ibcq);
-	}
-	sport->ibcq_aqp1 = ibcq;
-
-	if (sport->ibqp_sqp[IB_QPT_GSI]) {
-		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
-		ret = -EPERM;
-		goto create_aqp1;
-	}
-
-	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
-	qp_init_attr.send_cq          = ibcq;
-	qp_init_attr.recv_cq          = ibcq;
-	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
-	qp_init_attr.cap.max_send_wr  = 100;
-	qp_init_attr.cap.max_recv_wr  = 100;
-	qp_init_attr.cap.max_send_sge = 2;
-	qp_init_attr.cap.max_recv_sge = 1;
-	qp_init_attr.qp_type          = IB_QPT_GSI;
-	qp_init_attr.port_num         = port;
-	qp_init_attr.qp_context       = NULL;
-	qp_init_attr.event_handler    = NULL;
-	qp_init_attr.srq              = NULL;
-
-	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
-	if (IS_ERR(ibqp)) {
-		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
-		ret = PTR_ERR(ibqp);
-		goto create_aqp1;
-	}
-	sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
-
-	return 0;
-
-create_aqp1:
-	ib_destroy_cq(sport->ibcq_aqp1);
-	return ret;
-}
-
-static int ehca_destroy_aqp1(struct ehca_sport *sport)
-{
-	int ret;
-
-	ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
-	if (ret) {
-		ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
-		return ret;
-	}
-
-	ret = ib_destroy_cq(sport->ibcq_aqp1);
-	if (ret)
-		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
-
-	return ret;
-}
-
-static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
-}
-
-static ssize_t ehca_store_debug_level(struct device_driver *ddp,
-				      const char *buf, size_t count)
-{
-	int value = (*buf) - '0';
-	if (value >= 0 && value <= 9)
-		ehca_debug_level = value;
-	return 1;
-}
-
-static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-		   ehca_show_debug_level, ehca_store_debug_level);
-
-static struct attribute *ehca_drv_attrs[] = {
-	&driver_attr_debug_level.attr,
-	NULL
-};
-
-static struct attribute_group ehca_drv_attr_grp = {
-	.attrs = ehca_drv_attrs
-};
-
-static const struct attribute_group *ehca_drv_attr_groups[] = {
-	&ehca_drv_attr_grp,
-	NULL,
-};
-
-#define EHCA_RESOURCE_ATTR(name)                                           \
-static ssize_t  ehca_show_##name(struct device *dev,                       \
-				 struct device_attribute *attr,            \
-				 char *buf)                                \
-{									   \
-	struct ehca_shca *shca;						   \
-	struct hipz_query_hca *rblock;				           \
-	int data;                                                          \
-									   \
-	shca = dev_get_drvdata(dev);					   \
-									   \
-	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);			   \
-	if (!rblock) {						           \
-		dev_err(dev, "Can't allocate rblock memory.\n");           \
-		return 0;						   \
-	}								   \
-									   \
-	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
-		dev_err(dev, "Can't query device properties\n");           \
-		ehca_free_fw_ctrlblock(rblock);			   	   \
-		return 0;					   	   \
-	}								   \
-									   \
-	data = rblock->name;                                               \
-	ehca_free_fw_ctrlblock(rblock);                                    \
-									   \
-	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
-		return snprintf(buf, 256, "1\n");			   \
-	else								   \
-		return snprintf(buf, 256, "%d\n", data);		   \
-									   \
-}									   \
-static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
-
-EHCA_RESOURCE_ATTR(num_ports);
-EHCA_RESOURCE_ATTR(hw_ver);
-EHCA_RESOURCE_ATTR(max_eq);
-EHCA_RESOURCE_ATTR(cur_eq);
-EHCA_RESOURCE_ATTR(max_cq);
-EHCA_RESOURCE_ATTR(cur_cq);
-EHCA_RESOURCE_ATTR(max_qp);
-EHCA_RESOURCE_ATTR(cur_qp);
-EHCA_RESOURCE_ATTR(max_mr);
-EHCA_RESOURCE_ATTR(cur_mr);
-EHCA_RESOURCE_ATTR(max_mw);
-EHCA_RESOURCE_ATTR(cur_mw);
-EHCA_RESOURCE_ATTR(max_pd);
-EHCA_RESOURCE_ATTR(max_ah);
-
-static ssize_t ehca_show_adapter_handle(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct ehca_shca *shca = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
-
-}
-static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-
-static struct attribute *ehca_dev_attrs[] = {
-	&dev_attr_adapter_handle.attr,
-	&dev_attr_num_ports.attr,
-	&dev_attr_hw_ver.attr,
-	&dev_attr_max_eq.attr,
-	&dev_attr_cur_eq.attr,
-	&dev_attr_max_cq.attr,
-	&dev_attr_cur_cq.attr,
-	&dev_attr_max_qp.attr,
-	&dev_attr_cur_qp.attr,
-	&dev_attr_max_mr.attr,
-	&dev_attr_cur_mr.attr,
-	&dev_attr_max_mw.attr,
-	&dev_attr_cur_mw.attr,
-	&dev_attr_max_pd.attr,
-	&dev_attr_max_ah.attr,
-	NULL
-};
-
-static struct attribute_group ehca_dev_attr_grp = {
-	.attrs = ehca_dev_attrs
-};
-
-static int ehca_probe(struct platform_device *dev)
-{
-	struct ehca_shca *shca;
-	const u64 *handle;
-	struct ib_pd *ibpd;
-	int ret, i, eq_size;
-	unsigned long flags;
-
-	handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
-	if (!handle) {
-		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
-			     dev->dev.of_node->full_name);
-		return -ENODEV;
-	}
-
-	if (!(*handle)) {
-		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
-			     dev->dev.of_node->full_name);
-		return -ENODEV;
-	}
-
-	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
-	if (!shca) {
-		ehca_gen_err("Cannot allocate shca memory.");
-		return -ENOMEM;
-	}
-
-	mutex_init(&shca->modify_mutex);
-	atomic_set(&shca->num_cqs, 0);
-	atomic_set(&shca->num_qps, 0);
-	shca->max_num_qps = ehca_max_qp;
-	shca->max_num_cqs = ehca_max_cq;
-
-	for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
-		spin_lock_init(&shca->sport[i].mod_sqp_lock);
-
-	shca->ofdev = dev;
-	shca->ipz_hca_handle.handle = *handle;
-	dev_set_drvdata(&dev->dev, shca);
-
-	ret = ehca_sense_attributes(shca);
-	if (ret < 0) {
-		ehca_gen_err("Cannot sense eHCA attributes.");
-		goto probe1;
-	}
-
-	ret = ehca_init_device(shca);
-	if (ret) {
-		ehca_gen_err("Cannot init ehca  device struct");
-		goto probe1;
-	}
-
-	eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
-	/* create event queues */
-	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create EQ.");
-		goto probe1;
-	}
-
-	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create NEQ.");
-		goto probe3;
-	}
-
-	/* create internal protection domain */
-	ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
-	if (IS_ERR(ibpd)) {
-		ehca_err(&shca->ib_device, "Cannot create internal PD.");
-		ret = PTR_ERR(ibpd);
-		goto probe4;
-	}
-
-	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
-	shca->pd->ib_pd.device = &shca->ib_device;
-
-	/* create internal max MR */
-	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
-
-	if (ret) {
-		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
-			 ret);
-		goto probe5;
-	}
-
-	ret = ib_register_device(&shca->ib_device, NULL);
-	if (ret) {
-		ehca_err(&shca->ib_device,
-			 "ib_register_device() failed ret=%i", ret);
-		goto probe6;
-	}
-
-	/* create AQP1 for port 1 */
-	if (ehca_open_aqp1 == 1) {
-		shca->sport[0].port_state = IB_PORT_DOWN;
-		ret = ehca_create_aqp1(shca, 1);
-		if (ret) {
-			ehca_err(&shca->ib_device,
-				 "Cannot create AQP1 for port 1.");
-			goto probe7;
-		}
-	}
-
-	/* create AQP1 for port 2 */
-	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
-		shca->sport[1].port_state = IB_PORT_DOWN;
-		ret = ehca_create_aqp1(shca, 2);
-		if (ret) {
-			ehca_err(&shca->ib_device,
-				 "Cannot create AQP1 for port 2.");
-			goto probe8;
-		}
-	}
-
-	ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-	if (ret) /* only complain; we can live without attributes */
-		ehca_err(&shca->ib_device,
-			 "Cannot create device attributes  ret=%d", ret);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	list_add(&shca->shca_list, &shca_list);
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	return 0;
-
-probe8:
-	ret = ehca_destroy_aqp1(&shca->sport[0]);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy AQP1 for port 1. ret=%i", ret);
-
-probe7:
-	ib_unregister_device(&shca->ib_device);
-
-probe6:
-	ret = ehca_dereg_internal_maxmr(shca);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal MR. ret=%x", ret);
-
-probe5:
-	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal PD. ret=%x", ret);
-
-probe4:
-	ret = ehca_destroy_eq(shca, &shca->neq);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy NEQ. ret=%x", ret);
-
-probe3:
-	ret = ehca_destroy_eq(shca, &shca->eq);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy EQ. ret=%x", ret);
-
-probe1:
-	ib_dealloc_device(&shca->ib_device);
-
-	return -EINVAL;
-}
-
-static int ehca_remove(struct platform_device *dev)
-{
-	struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
-	unsigned long flags;
-	int ret;
-
-	sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
-
-	if (ehca_open_aqp1 == 1) {
-		int i;
-		for (i = 0; i < shca->num_ports; i++) {
-			ret = ehca_destroy_aqp1(&shca->sport[i]);
-			if (ret)
-				ehca_err(&shca->ib_device,
-					 "Cannot destroy AQP1 for port %x "
-					 "ret=%i", ret, i);
-		}
-	}
-
-	ib_unregister_device(&shca->ib_device);
-
-	ret = ehca_dereg_internal_maxmr(shca);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal MR. ret=%i", ret);
-
-	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "Cannot destroy internal PD. ret=%i", ret);
-
-	ret = ehca_destroy_eq(shca, &shca->eq);
-	if (ret)
-		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
-
-	ret = ehca_destroy_eq(shca, &shca->neq);
-	if (ret)
-		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
-
-	ib_dealloc_device(&shca->ib_device);
-
-	spin_lock_irqsave(&shca_list_lock, flags);
-	list_del(&shca->shca_list);
-	spin_unlock_irqrestore(&shca_list_lock, flags);
-
-	return ret;
-}
-
-static struct of_device_id ehca_device_table[] =
-{
-	{
-		.name       = "lhca",
-		.compatible = "IBM,lhca",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, ehca_device_table);
-
-static struct platform_driver ehca_driver = {
-	.probe       = ehca_probe,
-	.remove      = ehca_remove,
-	.driver = {
-		.name = "ehca",
-		.owner = THIS_MODULE,
-		.groups = ehca_drv_attr_groups,
-		.of_match_table = ehca_device_table,
-	},
-};
-
-void ehca_poll_eqs(unsigned long data)
-{
-	struct ehca_shca *shca;
-
-	spin_lock(&shca_list_lock);
-	list_for_each_entry(shca, &shca_list, shca_list) {
-		if (shca->eq.is_initialized) {
-			/* call deadman proc only if eq ptr does not change */
-			struct ehca_eq *eq = &shca->eq;
-			int max = 3;
-			volatile u64 q_ofs, q_ofs2;
-			unsigned long flags;
-			spin_lock_irqsave(&eq->spinlock, flags);
-			q_ofs = eq->ipz_queue.current_q_offset;
-			spin_unlock_irqrestore(&eq->spinlock, flags);
-			do {
-				spin_lock_irqsave(&eq->spinlock, flags);
-				q_ofs2 = eq->ipz_queue.current_q_offset;
-				spin_unlock_irqrestore(&eq->spinlock, flags);
-				max--;
-			} while (q_ofs == q_ofs2 && max > 0);
-			if (q_ofs == q_ofs2)
-				ehca_process_eq(shca, 0);
-		}
-	}
-	mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
-	spin_unlock(&shca_list_lock);
-}
-
-static int ehca_mem_notifier(struct notifier_block *nb,
-			     unsigned long action, void *data)
-{
-	static unsigned long ehca_dmem_warn_time;
-	unsigned long flags;
-
-	switch (action) {
-	case MEM_CANCEL_OFFLINE:
-	case MEM_CANCEL_ONLINE:
-	case MEM_ONLINE:
-	case MEM_OFFLINE:
-		return NOTIFY_OK;
-	case MEM_GOING_ONLINE:
-	case MEM_GOING_OFFLINE:
-		/* only ok if no hca is attached to the lpar */
-		spin_lock_irqsave(&shca_list_lock, flags);
-		if (list_empty(&shca_list)) {
-			spin_unlock_irqrestore(&shca_list_lock, flags);
-			return NOTIFY_OK;
-		} else {
-			spin_unlock_irqrestore(&shca_list_lock, flags);
-			if (printk_timed_ratelimit(&ehca_dmem_warn_time,
-						   30 * 1000))
-				ehca_gen_err("DMEM operations are not allowed"
-					     "in conjunction with eHCA");
-			return NOTIFY_BAD;
-		}
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block ehca_mem_nb = {
-	.notifier_call = ehca_mem_notifier,
-};
-
-static int __init ehca_module_init(void)
-{
-	int ret;
-
-	printk(KERN_INFO "eHCA Infiniband Device Driver "
-	       "(Version " HCAD_VERSION ")\n");
-
-	ret = ehca_create_comp_pool();
-	if (ret) {
-		ehca_gen_err("Cannot create comp pool.");
-		return ret;
-	}
-
-	ret = ehca_create_slab_caches();
-	if (ret) {
-		ehca_gen_err("Cannot create SLAB caches");
-		ret = -ENOMEM;
-		goto module_init1;
-	}
-
-	ret = ehca_create_busmap();
-	if (ret) {
-		ehca_gen_err("Cannot create busmap.");
-		goto module_init2;
-	}
-
-	ret = ibmebus_register_driver(&ehca_driver);
-	if (ret) {
-		ehca_gen_err("Cannot register eHCA device driver");
-		ret = -EINVAL;
-		goto module_init3;
-	}
-
-	ret = register_memory_notifier(&ehca_mem_nb);
-	if (ret) {
-		ehca_gen_err("Failed registering memory add/remove notifier");
-		goto module_init4;
-	}
-
-	if (ehca_poll_all_eqs != 1) {
-		ehca_gen_err("WARNING!!!");
-		ehca_gen_err("It is possible to lose interrupts.");
-	} else {
-		init_timer(&poll_eqs_timer);
-		poll_eqs_timer.function = ehca_poll_eqs;
-		poll_eqs_timer.expires = jiffies + HZ;
-		add_timer(&poll_eqs_timer);
-	}
-
-	return 0;
-
-module_init4:
-	ibmebus_unregister_driver(&ehca_driver);
-
-module_init3:
-	ehca_destroy_busmap();
-
-module_init2:
-	ehca_destroy_slab_caches();
-
-module_init1:
-	ehca_destroy_comp_pool();
-	return ret;
-};
-
-static void __exit ehca_module_exit(void)
-{
-	if (ehca_poll_all_eqs == 1)
-		del_timer_sync(&poll_eqs_timer);
-
-	ibmebus_unregister_driver(&ehca_driver);
-
-	unregister_memory_notifier(&ehca_mem_nb);
-
-	ehca_destroy_busmap();
-
-	ehca_destroy_slab_caches();
-
-	ehca_destroy_comp_pool();
-
-	idr_destroy(&ehca_cq_idr);
-	idr_destroy(&ehca_qp_idr);
-};
-
-module_init(ehca_module_init);
-module_exit(ehca_module_exit);
diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c
deleted file mode 100644
index cec181532924..000000000000
--- a/drivers/staging/rdma/ehca/ehca_mcast.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  mcast  functions
- *
- *  Authors: Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define MAX_MC_LID 0xFFFE
-#define MIN_MC_LID 0xC000	/* Multicast limits */
-#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
-#define EHCA_VALID_MULTICAST_LID(lid) \
-	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
-
-int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-					      ib_device);
-	union ib_gid my_gid;
-	u64 subnet_prefix, interface_id, h_ret;
-
-	if (ibqp->qp_type != IB_QPT_UD) {
-		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
-		return -EINVAL;
-	}
-
-	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-		ehca_err(ibqp->device, "invalid mulitcast gid");
-		return -EINVAL;
-	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-		return -EINVAL;
-	}
-
-	memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-	interface_id = be64_to_cpu(my_gid.global.interface_id);
-	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
-				   my_qp->ipz_qp_handle,
-				   my_qp->galpas.kernel,
-				   lid, subnet_prefix, interface_id);
-	if (h_ret != H_SUCCESS)
-		ehca_err(ibqp->device,
-			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-			 "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-	return ehca2ib_return_code(h_ret);
-}
-
-int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(ibqp->pd->device,
-					      struct ehca_shca, ib_device);
-	union ib_gid my_gid;
-	u64 subnet_prefix, interface_id, h_ret;
-
-	if (ibqp->qp_type != IB_QPT_UD) {
-		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
-		return -EINVAL;
-	}
-
-	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
-		ehca_err(ibqp->device, "invalid mulitcast gid");
-		return -EINVAL;
-	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
-		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
-		return -EINVAL;
-	}
-
-	memcpy(&my_gid, gid->raw, sizeof(union ib_gid));
-
-	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
-	interface_id = be64_to_cpu(my_gid.global.interface_id);
-	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
-				   my_qp->ipz_qp_handle,
-				   my_qp->galpas.kernel,
-				   lid, subnet_prefix, interface_id);
-	if (h_ret != H_SUCCESS)
-		ehca_err(ibqp->device,
-			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-			 "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
-
-	return ehca2ib_return_code(h_ret);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
deleted file mode 100644
index 3367205e3160..000000000000
--- a/drivers/staging/rdma/ehca/ehca_mrmw.c
+++ /dev/null
@@ -1,2202 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <rdma/ib_umem.h>
-
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "hcp_if.h"
-#include "hipz_hw.h"
-
-#define NUM_CHUNKS(length, chunk_size) \
-	(((length) + (chunk_size - 1)) / (chunk_size))
-
-/* max number of rpages (per hcall register_rpages) */
-#define MAX_RPAGES 512
-
-/* DMEM toleration management */
-#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
-#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
-#define EHCA_HUGEPAGESHIFT     34
-#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
-#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
-#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
-#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
-#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
-#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
-#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
-#define EHCA_DIR_MAP_SIZE (0x10000)
-#define EHCA_ENT_MAP_SIZE (0x10000)
-#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
-
-static unsigned long ehca_mr_len;
-
-/*
- * Memory map data structures
- */
-struct ehca_dir_bmap {
-	u64 ent[EHCA_MAP_ENTRIES];
-};
-struct ehca_top_bmap {
-	struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
-};
-struct ehca_bmap {
-	struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
-};
-
-static struct ehca_bmap *ehca_bmap;
-
-static struct kmem_cache *mr_cache;
-static struct kmem_cache *mw_cache;
-
-enum ehca_mr_pgsize {
-	EHCA_MR_PGSIZE4K  = 0x1000L,
-	EHCA_MR_PGSIZE64K = 0x10000L,
-	EHCA_MR_PGSIZE1M  = 0x100000L,
-	EHCA_MR_PGSIZE16M = 0x1000000L
-};
-
-#define EHCA_MR_PGSHIFT4K  12
-#define EHCA_MR_PGSHIFT64K 16
-#define EHCA_MR_PGSHIFT1M  20
-#define EHCA_MR_PGSHIFT16M 24
-
-static u64 ehca_map_vaddr(void *caddr);
-
-static u32 ehca_encode_hwpage_size(u32 pgsize)
-{
-	int log = ilog2(pgsize);
-	WARN_ON(log < 12 || log > 24 || log & 3);
-	return (log - 12) / 4;
-}
-
-static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
-{
-	return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
-}
-
-static struct ehca_mr *ehca_mr_new(void)
-{
-	struct ehca_mr *me;
-
-	me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
-	if (me)
-		spin_lock_init(&me->mrlock);
-	else
-		ehca_gen_err("alloc failed");
-
-	return me;
-}
-
-static void ehca_mr_delete(struct ehca_mr *me)
-{
-	kmem_cache_free(mr_cache, me);
-}
-
-static struct ehca_mw *ehca_mw_new(void)
-{
-	struct ehca_mw *me;
-
-	me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
-	if (me)
-		spin_lock_init(&me->mwlock);
-	else
-		ehca_gen_err("alloc failed");
-
-	return me;
-}
-
-static void ehca_mw_delete(struct ehca_mw *me)
-{
-	kmem_cache_free(mw_cache, me);
-}
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
-{
-	struct ib_mr *ib_mr;
-	int ret;
-	struct ehca_mr *e_maxmr;
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-
-	if (shca->maxmr) {
-		e_maxmr = ehca_mr_new();
-		if (!e_maxmr) {
-			ehca_err(&shca->ib_device, "out of memory");
-			ib_mr = ERR_PTR(-ENOMEM);
-			goto get_dma_mr_exit0;
-		}
-
-		ret = ehca_reg_maxmr(shca, e_maxmr,
-				     (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
-				     mr_access_flags, e_pd,
-				     &e_maxmr->ib.ib_mr.lkey,
-				     &e_maxmr->ib.ib_mr.rkey);
-		if (ret) {
-			ehca_mr_delete(e_maxmr);
-			ib_mr = ERR_PTR(ret);
-			goto get_dma_mr_exit0;
-		}
-		ib_mr = &e_maxmr->ib.ib_mr;
-	} else {
-		ehca_err(&shca->ib_device, "no internal max-MR exist!");
-		ib_mr = ERR_PTR(-EINVAL);
-		goto get_dma_mr_exit0;
-	}
-
-get_dma_mr_exit0:
-	if (IS_ERR(ib_mr))
-		ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
-			 PTR_ERR(ib_mr), pd, mr_access_flags);
-	return ib_mr;
-} /* end ehca_get_dma_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt, int mr_access_flags,
-			       struct ib_udata *udata)
-{
-	struct ib_mr *ib_mr;
-	struct ehca_mr *e_mr;
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_mr_pginfo pginfo;
-	int ret, page_shift;
-	u32 num_kpages;
-	u32 num_hwpages;
-	u64 hwpage_size;
-
-	if (!pd) {
-		ehca_gen_err("bad pd=%p", pd);
-		return ERR_PTR(-EFAULT);
-	}
-
-	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-		/*
-		 * Remote Write Access requires Local Write Access
-		 * Remote Atomic Access requires Local Write Access
-		 */
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit0;
-	}
-
-	if (length == 0 || virt + length < virt) {
-		ehca_err(pd->device, "bad input values: length=%llx "
-			 "virt_base=%llx", length, virt);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit0;
-	}
-
-	e_mr = ehca_mr_new();
-	if (!e_mr) {
-		ehca_err(pd->device, "out of memory");
-		ib_mr = ERR_PTR(-ENOMEM);
-		goto reg_user_mr_exit0;
-	}
-
-	e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
-				 mr_access_flags, 0);
-	if (IS_ERR(e_mr->umem)) {
-		ib_mr = (void *)e_mr->umem;
-		goto reg_user_mr_exit1;
-	}
-
-	if (e_mr->umem->page_size != PAGE_SIZE) {
-		ehca_err(pd->device, "page size not supported, "
-			 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
-		ib_mr = ERR_PTR(-EINVAL);
-		goto reg_user_mr_exit2;
-	}
-
-	/* determine number of MR pages */
-	num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
-	/* select proper hw_pgsize */
-	page_shift = PAGE_SHIFT;
-	if (e_mr->umem->hugetlb) {
-		/* determine page_shift, clamp between 4K and 16M */
-		page_shift = (fls64(length - 1) + 3) & ~3;
-		page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
-				 EHCA_MR_PGSHIFT16M);
-	}
-	hwpage_size = 1UL << page_shift;
-
-	/* now that we have the desired page size, shift until it's
-	 * supported, too. 4K is always supported, so this terminates.
-	 */
-	while (!(hwpage_size & shca->hca_cap_mr_pgsize))
-		hwpage_size >>= 4;
-
-reg_user_mr_fallback:
-	num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
-	/* register MR on HCA */
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_USER;
-	pginfo.hwpage_size = hwpage_size;
-	pginfo.num_kpages = num_kpages;
-	pginfo.num_hwpages = num_hwpages;
-	pginfo.u.usr.region = e_mr->umem;
-	pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
-	pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
-	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
-			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
-			  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
-	if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
-		ehca_warn(pd->device, "failed to register mr "
-			  "with hwpage_size=%llx", hwpage_size);
-		ehca_info(pd->device, "try to register mr with "
-			  "kpage_size=%lx", PAGE_SIZE);
-		/*
-		 * this means kpages are not contiguous for a hw page
-		 * try kernel page size as fallback solution
-		 */
-		hwpage_size = PAGE_SIZE;
-		goto reg_user_mr_fallback;
-	}
-	if (ret) {
-		ib_mr = ERR_PTR(ret);
-		goto reg_user_mr_exit2;
-	}
-
-	/* successful registration of all pages */
-	return &e_mr->ib.ib_mr;
-
-reg_user_mr_exit2:
-	ib_umem_release(e_mr->umem);
-reg_user_mr_exit1:
-	ehca_mr_delete(e_mr);
-reg_user_mr_exit0:
-	if (IS_ERR(ib_mr))
-		ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
-			 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
-	return ib_mr;
-} /* end ehca_reg_user_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_mr(struct ib_mr *mr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(mr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-
-	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
-			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
-		ret = -EINVAL;
-		goto dereg_mr_exit0;
-	} else if (e_mr == shca->maxmr) {
-		/* should be impossible, however reject to be sure */
-		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
-			 "shca->maxmr=%p mr->lkey=%x",
-			 mr, shca->maxmr, mr->lkey);
-		ret = -EINVAL;
-		goto dereg_mr_exit0;
-	}
-
-	/* TODO: BUSY: MR still has bound window(s) */
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
-			 "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
-			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
-			 e_mr->ipz_mr_handle.handle, mr->lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto dereg_mr_exit0;
-	}
-
-	if (e_mr->umem)
-		ib_umem_release(e_mr->umem);
-
-	/* successful deregistration */
-	ehca_mr_delete(e_mr);
-
-dereg_mr_exit0:
-	if (ret)
-		ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
-	return ret;
-} /* end ehca_dereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
-	struct ib_mw *ib_mw;
-	u64 h_ret;
-	struct ehca_mw *e_mw;
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_mw_hipzout_parms hipzout;
-
-	if (type != IB_MW_TYPE_1)
-		return ERR_PTR(-EINVAL);
-
-	e_mw = ehca_mw_new();
-	if (!e_mw) {
-		ib_mw = ERR_PTR(-ENOMEM);
-		goto alloc_mw_exit0;
-	}
-
-	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
-					 e_pd->fw_pd, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
-			 "shca=%p hca_hndl=%llx mw=%p",
-			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
-		ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
-		goto alloc_mw_exit1;
-	}
-	/* successful MW allocation */
-	e_mw->ipz_mw_handle = hipzout.handle;
-	e_mw->ib_mw.rkey    = hipzout.rkey;
-	return &e_mw->ib_mw;
-
-alloc_mw_exit1:
-	ehca_mw_delete(e_mw);
-alloc_mw_exit0:
-	if (IS_ERR(ib_mw))
-		ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
-	return ib_mw;
-} /* end ehca_alloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_mw(struct ib_mw *mw)
-{
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(mw->device, struct ehca_shca, ib_device);
-	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
-
-	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
-			 "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
-			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
-			 e_mw->ipz_mw_handle.handle);
-		return ehca2ib_return_code(h_ret);
-	}
-	/* successful deallocation */
-	ehca_mw_delete(e_mw);
-	return 0;
-} /* end ehca_dealloc_mw() */
-
-/*----------------------------------------------------------------------*/
-
-struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
-			      int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr)
-{
-	struct ib_fmr *ib_fmr;
-	struct ehca_shca *shca =
-		container_of(pd->device, struct ehca_shca, ib_device);
-	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_mr *e_fmr;
-	int ret;
-	u32 tmp_lkey, tmp_rkey;
-	struct ehca_mr_pginfo pginfo;
-	u64 hw_pgsize;
-
-	/* check other parameters */
-	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
-	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
-		/*
-		 * Remote Write Access requires Local Write Access
-		 * Remote Atomic Access requires Local Write Access
-		 */
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-	if (mr_access_flags & IB_ACCESS_MW_BIND) {
-		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
-			 mr_access_flags);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
-		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
-			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
-			 fmr_attr->max_pages, fmr_attr->max_maps,
-			 fmr_attr->page_shift);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-
-	hw_pgsize = 1 << fmr_attr->page_shift;
-	if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
-		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
-			 fmr_attr->page_shift);
-		ib_fmr = ERR_PTR(-EINVAL);
-		goto alloc_fmr_exit0;
-	}
-
-	e_fmr = ehca_mr_new();
-	if (!e_fmr) {
-		ib_fmr = ERR_PTR(-ENOMEM);
-		goto alloc_fmr_exit0;
-	}
-	e_fmr->flags |= EHCA_MR_FLAG_FMR;
-
-	/* register MR on HCA */
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.hwpage_size = hw_pgsize;
-	/*
-	 * pginfo.num_hwpages==0, ie register_rpages() will not be called
-	 * but deferred to map_phys_fmr()
-	 */
-	ret = ehca_reg_mr(shca, e_fmr, NULL,
-			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
-			  mr_access_flags, e_pd, &pginfo,
-			  &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
-	if (ret) {
-		ib_fmr = ERR_PTR(ret);
-		goto alloc_fmr_exit1;
-	}
-
-	/* successful */
-	e_fmr->hwpage_size = hw_pgsize;
-	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
-	e_fmr->fmr_max_pages = fmr_attr->max_pages;
-	e_fmr->fmr_max_maps = fmr_attr->max_maps;
-	e_fmr->fmr_map_cnt = 0;
-	return &e_fmr->ib.ib_fmr;
-
-alloc_fmr_exit1:
-	ehca_mr_delete(e_fmr);
-alloc_fmr_exit0:
-	return ib_fmr;
-} /* end ehca_alloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_map_phys_fmr(struct ib_fmr *fmr,
-		      u64 *page_list,
-		      int list_len,
-		      u64 iova)
-{
-	int ret;
-	struct ehca_shca *shca =
-		container_of(fmr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
-	struct ehca_mr_pginfo pginfo;
-	u32 tmp_lkey, tmp_rkey;
-
-	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-			 e_fmr, e_fmr->flags);
-		ret = -EINVAL;
-		goto map_phys_fmr_exit0;
-	}
-	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
-	if (ret)
-		goto map_phys_fmr_exit0;
-	if (iova % e_fmr->fmr_page_size) {
-		/* only whole-numbered pages */
-		ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
-			 iova, e_fmr->fmr_page_size);
-		ret = -EINVAL;
-		goto map_phys_fmr_exit0;
-	}
-	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
-		/* HCAD does not limit the maps, however trace this anyway */
-		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
-			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
-			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
-	}
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_FMR;
-	pginfo.num_kpages = list_len;
-	pginfo.hwpage_size = e_fmr->hwpage_size;
-	pginfo.num_hwpages =
-		list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
-	pginfo.u.fmr.page_list = page_list;
-	pginfo.next_hwpage =
-		(iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
-	pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
-
-	ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
-			    list_len * e_fmr->fmr_page_size,
-			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
-	if (ret)
-		goto map_phys_fmr_exit0;
-
-	/* successful reregistration */
-	e_fmr->fmr_map_cnt++;
-	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
-	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
-	return 0;
-
-map_phys_fmr_exit0:
-	if (ret)
-		ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
-			 "iova=%llx", ret, fmr, page_list, list_len, iova);
-	return ret;
-} /* end ehca_map_phys_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_fmr(struct list_head *fmr_list)
-{
-	int ret = 0;
-	struct ib_fmr *ib_fmr;
-	struct ehca_shca *shca = NULL;
-	struct ehca_shca *prev_shca;
-	struct ehca_mr *e_fmr;
-	u32 num_fmr = 0;
-	u32 unmap_fmr_cnt = 0;
-
-	/* check all FMR belong to same SHCA, and check internal flag */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		prev_shca = shca;
-		shca = container_of(ib_fmr->device, struct ehca_shca,
-				    ib_device);
-		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-		if ((shca != prev_shca) && prev_shca) {
-			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
-				 "prev_shca=%p e_fmr=%p",
-				 shca, prev_shca, e_fmr);
-			ret = -EINVAL;
-			goto unmap_fmr_exit0;
-		}
-		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
-				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
-			ret = -EINVAL;
-			goto unmap_fmr_exit0;
-		}
-		num_fmr++;
-	}
-
-	/* loop over all FMRs to unmap */
-	list_for_each_entry(ib_fmr, fmr_list, list) {
-		unmap_fmr_cnt++;
-		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
-		shca = container_of(ib_fmr->device, struct ehca_shca,
-				    ib_device);
-		ret = ehca_unmap_one_fmr(shca, e_fmr);
-		if (ret) {
-			/* unmap failed, stop unmapping of rest of FMRs */
-			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
-				 "stop rest, e_fmr=%p num_fmr=%x "
-				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
-				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
-			goto unmap_fmr_exit0;
-		}
-	}
-
-unmap_fmr_exit0:
-	if (ret)
-		ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
-			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
-	return ret;
-} /* end ehca_unmap_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dealloc_fmr(struct ib_fmr *fmr)
-{
-	int ret;
-	u64 h_ret;
-	struct ehca_shca *shca =
-		container_of(fmr->device, struct ehca_shca, ib_device);
-	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
-
-	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
-		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
-			 e_fmr, e_fmr->flags);
-		ret = -EINVAL;
-		goto free_fmr_exit0;
-	}
-
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
-			 "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto free_fmr_exit0;
-	}
-	/* successful deregistration */
-	ehca_mr_delete(e_fmr);
-	return 0;
-
-free_fmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
-	return ret;
-} /* end ehca_dealloc_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-				   struct ehca_mr *e_mr,
-				   struct ehca_mr_pginfo *pginfo);
-
-int ehca_reg_mr(struct ehca_shca *shca,
-		struct ehca_mr *e_mr,
-		u64 *iova_start,
-		u64 size,
-		int acl,
-		struct ehca_pd *e_pd,
-		struct ehca_mr_pginfo *pginfo,
-		u32 *lkey, /*OUT*/
-		u32 *rkey, /*OUT*/
-		enum ehca_reg_type reg_type)
-{
-	int ret;
-	u64 h_ret;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-	if (ehca_use_hp_mr == 1)
-		hipz_acl |= 0x00000001;
-
-	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
-					 (u64)iova_start, size, hipz_acl,
-					 e_pd->fw_pd, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
-			 "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_reg_mr_exit0;
-	}
-
-	e_mr->ipz_mr_handle = hipzout.handle;
-
-	if (reg_type == EHCA_REG_BUSMAP_MR)
-		ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
-	else if (reg_type == EHCA_REG_MR)
-		ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
-	else
-		ret = -EINVAL;
-
-	if (ret)
-		goto ehca_reg_mr_exit1;
-
-	/* successful registration */
-	e_mr->num_kpages = pginfo->num_kpages;
-	e_mr->num_hwpages = pginfo->num_hwpages;
-	e_mr->hwpage_size = pginfo->hwpage_size;
-	e_mr->start = iova_start;
-	e_mr->size = size;
-	e_mr->acl = acl;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-
-ehca_reg_mr_exit1:
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
-			 "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
-			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
-			 hipzout.lkey, pginfo, pginfo->num_kpages,
-			 pginfo->num_hwpages, ret);
-		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
-			 "not recoverable");
-	}
-ehca_reg_mr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-			 "num_kpages=%llx num_hwpages=%llx",
-			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
-			 pginfo->num_kpages, pginfo->num_hwpages);
-	return ret;
-} /* end ehca_reg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-		       struct ehca_mr *e_mr,
-		       struct ehca_mr_pginfo *pginfo)
-{
-	int ret = 0;
-	u64 h_ret;
-	u32 rnum;
-	u64 rpage;
-	u32 i;
-	u64 *kpage;
-
-	if (!pginfo->num_hwpages) /* in case of fmr */
-		return 0;
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		ret = -ENOMEM;
-		goto ehca_reg_mr_rpages_exit0;
-	}
-
-	/* max MAX_RPAGES ehca mr pages per register call */
-	for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
-
-		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-			rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
-			if (rnum == 0)
-				rnum = MAX_RPAGES;      /* last shot is full */
-		} else
-			rnum = MAX_RPAGES;
-
-		ret = ehca_set_pagebuf(pginfo, rnum, kpage);
-		if (ret) {
-			ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-				 "bad rc, ret=%i rnum=%x kpage=%p",
-				 ret, rnum, kpage);
-			goto ehca_reg_mr_rpages_exit1;
-		}
-
-		if (rnum > 1) {
-			rpage = __pa(kpage);
-			if (!rpage) {
-				ehca_err(&shca->ib_device, "kpage=%p i=%x",
-					 kpage, i);
-				ret = -EFAULT;
-				goto ehca_reg_mr_rpages_exit1;
-			}
-		} else
-			rpage = *kpage;
-
-		h_ret = hipz_h_register_rpage_mr(
-			shca->ipz_hca_handle, e_mr,
-			ehca_encode_hwpage_size(pginfo->hwpage_size),
-			0, rpage, rnum);
-
-		if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
-			/*
-			 * check for 'registration complete'==H_SUCCESS
-			 * and for 'page registered'==H_PAGE_REGISTERED
-			 */
-			if (h_ret != H_SUCCESS) {
-				ehca_err(&shca->ib_device, "last "
-					 "hipz_reg_rpage_mr failed, h_ret=%lli "
-					 "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
-					 " lkey=%x", h_ret, e_mr, i,
-					 shca->ipz_hca_handle.handle,
-					 e_mr->ipz_mr_handle.handle,
-					 e_mr->ib.ib_mr.lkey);
-				ret = ehca2ib_return_code(h_ret);
-				break;
-			} else
-				ret = 0;
-		} else if (h_ret != H_PAGE_REGISTERED) {
-			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-				 "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
-				 "mr_hndl=%llx", h_ret, e_mr, i,
-				 e_mr->ib.ib_mr.lkey,
-				 shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle);
-			ret = ehca2ib_return_code(h_ret);
-			break;
-		} else
-			ret = 0;
-	} /* end for(i) */
-
-
-ehca_reg_mr_rpages_exit1:
-	ehca_free_fw_ctrlblock(kpage);
-ehca_reg_mr_rpages_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
-			 "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
-			 pginfo, pginfo->num_kpages, pginfo->num_hwpages);
-	return ret;
-} /* end ehca_reg_mr_rpages() */
-
-/*----------------------------------------------------------------------*/
-
-inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
-				struct ehca_mr *e_mr,
-				u64 *iova_start,
-				u64 size,
-				u32 acl,
-				struct ehca_pd *e_pd,
-				struct ehca_mr_pginfo *pginfo,
-				u32 *lkey, /*OUT*/
-				u32 *rkey) /*OUT*/
-{
-	int ret;
-	u64 h_ret;
-	u32 hipz_acl;
-	u64 *kpage;
-	u64 rpage;
-	struct ehca_mr_pginfo pginfo_save;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		ret = -ENOMEM;
-		goto ehca_rereg_mr_rereg1_exit0;
-	}
-
-	pginfo_save = *pginfo;
-	ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
-	if (ret) {
-		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
-			 "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
-			 "kpage=%p", e_mr, pginfo, pginfo->type,
-			 pginfo->num_kpages, pginfo->num_hwpages, kpage);
-		goto ehca_rereg_mr_rereg1_exit1;
-	}
-	rpage = __pa(kpage);
-	if (!rpage) {
-		ehca_err(&shca->ib_device, "kpage=%p", kpage);
-		ret = -EFAULT;
-		goto ehca_rereg_mr_rereg1_exit1;
-	}
-	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
-				      (u64)iova_start, size, hipz_acl,
-				      e_pd->fw_pd, rpage, &hipzout);
-	if (h_ret != H_SUCCESS) {
-		/*
-		 * reregistration unsuccessful, try it again with the 3 hCalls,
-		 * e.g. this is required in case H_MR_CONDITION
-		 * (MW bound or MR is shared)
-		 */
-		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-			  "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
-		*pginfo = pginfo_save;
-		ret = -EAGAIN;
-	} else if ((u64 *)hipzout.vaddr != iova_start) {
-		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
-			 "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
-			 "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
-			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
-			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
-		ret = -EFAULT;
-	} else {
-		/*
-		 * successful reregistration
-		 * note: start and start_out are identical for eServer HCAs
-		 */
-		e_mr->num_kpages = pginfo->num_kpages;
-		e_mr->num_hwpages = pginfo->num_hwpages;
-		e_mr->hwpage_size = pginfo->hwpage_size;
-		e_mr->start = iova_start;
-		e_mr->size = size;
-		e_mr->acl = acl;
-		*lkey = hipzout.lkey;
-		*rkey = hipzout.rkey;
-	}
-
-ehca_rereg_mr_rereg1_exit1:
-	ehca_free_fw_ctrlblock(kpage);
-ehca_rereg_mr_rereg1_exit0:
-	if ( ret && (ret != -EAGAIN) )
-		ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
-			 "pginfo=%p num_kpages=%llx num_hwpages=%llx",
-			 ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
-			 pginfo->num_hwpages);
-	return ret;
-} /* end ehca_rereg_mr_rereg1() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-		  struct ehca_mr *e_mr,
-		  u64 *iova_start,
-		  u64 size,
-		  int acl,
-		  struct ehca_pd *e_pd,
-		  struct ehca_mr_pginfo *pginfo,
-		  u32 *lkey,
-		  u32 *rkey)
-{
-	int ret = 0;
-	u64 h_ret;
-	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
-	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
-
-	/* first determine reregistration hCall(s) */
-	if ((pginfo->num_hwpages > MAX_RPAGES) ||
-	    (e_mr->num_hwpages > MAX_RPAGES) ||
-	    (pginfo->num_hwpages > e_mr->num_hwpages)) {
-		ehca_dbg(&shca->ib_device, "Rereg3 case, "
-			 "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
-			 pginfo->num_hwpages, e_mr->num_hwpages);
-		rereg_1_hcall = 0;
-		rereg_3_hcall = 1;
-	}
-
-	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
-		rereg_1_hcall = 0;
-		rereg_3_hcall = 1;
-		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
-		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
-			 e_mr);
-	}
-
-	if (rereg_1_hcall) {
-		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
-					   acl, e_pd, pginfo, lkey, rkey);
-		if (ret) {
-			if (ret == -EAGAIN)
-				rereg_3_hcall = 1;
-			else
-				goto ehca_rereg_mr_exit0;
-		}
-	}
-
-	if (rereg_3_hcall) {
-		struct ehca_mr save_mr;
-
-		/* first deregister old MR */
-		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
-		if (h_ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-				 "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
-				 "mr->lkey=%x",
-				 h_ret, e_mr, shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle,
-				 e_mr->ib.ib_mr.lkey);
-			ret = ehca2ib_return_code(h_ret);
-			goto ehca_rereg_mr_exit0;
-		}
-		/* clean ehca_mr_t, without changing struct ib_mr and lock */
-		save_mr = *e_mr;
-		ehca_mr_deletenew(e_mr);
-
-		/* set some MR values */
-		e_mr->flags = save_mr.flags;
-		e_mr->hwpage_size = save_mr.hwpage_size;
-		e_mr->fmr_page_size = save_mr.fmr_page_size;
-		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
-		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
-		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
-
-		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
-				  e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
-		if (ret) {
-			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
-			memcpy(&e_mr->flags, &(save_mr.flags),
-			       sizeof(struct ehca_mr) - offset);
-			goto ehca_rereg_mr_exit0;
-		}
-	}
-
-ehca_rereg_mr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
-			 "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
-			 "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
-			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
-			 acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
-			 rereg_1_hcall, rereg_3_hcall);
-	return ret;
-} /* end ehca_rereg_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-		       struct ehca_mr *e_fmr)
-{
-	int ret = 0;
-	u64 h_ret;
-	struct ehca_pd *e_pd =
-		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
-	struct ehca_mr save_fmr;
-	u32 tmp_lkey, tmp_rkey;
-	struct ehca_mr_pginfo pginfo;
-	struct ehca_mr_hipzout_parms hipzout;
-	struct ehca_mr save_mr;
-
-	if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
-		/*
-		 * note: after using rereg hcall with len=0,
-		 * rereg hcall must be used again for registering pages
-		 */
-		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
-					      0, 0, e_pd->fw_pd, 0, &hipzout);
-		if (h_ret == H_SUCCESS) {
-			/* successful reregistration */
-			e_fmr->start = NULL;
-			e_fmr->size = 0;
-			tmp_lkey = hipzout.lkey;
-			tmp_rkey = hipzout.rkey;
-			return 0;
-		}
-		/*
-		 * should not happen, because length checked above,
-		 * FMRs are not shared and no MW bound to FMRs
-		 */
-		ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-			 "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
-			 "mr_hndl=%llx lkey=%x lkey_out=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle,
-			 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
-		/* try free and rereg */
-	}
-
-	/* first free old FMR */
-	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-			 "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
-			 "lkey=%x",
-			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
-			 e_fmr->ipz_mr_handle.handle,
-			 e_fmr->ib.ib_fmr.lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_unmap_one_fmr_exit0;
-	}
-	/* clean ehca_mr_t, without changing lock */
-	save_fmr = *e_fmr;
-	ehca_mr_deletenew(e_fmr);
-
-	/* set some MR values */
-	e_fmr->flags = save_fmr.flags;
-	e_fmr->hwpage_size = save_fmr.hwpage_size;
-	e_fmr->fmr_page_size = save_fmr.fmr_page_size;
-	e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
-	e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
-	e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
-	e_fmr->acl = save_fmr.acl;
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_FMR;
-	ret = ehca_reg_mr(shca, e_fmr, NULL,
-			  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
-			  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
-			  &tmp_rkey, EHCA_REG_MR);
-	if (ret) {
-		u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
-		memcpy(&e_fmr->flags, &(save_mr.flags),
-		       sizeof(struct ehca_mr) - offset);
-	}
-
-ehca_unmap_one_fmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
-			 "fmr_max_pages=%x",
-			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
-	return ret;
-} /* end ehca_unmap_one_fmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_smr(struct ehca_shca *shca,
-		 struct ehca_mr *e_origmr,
-		 struct ehca_mr *e_newmr,
-		 u64 *iova_start,
-		 int acl,
-		 struct ehca_pd *e_pd,
-		 u32 *lkey, /*OUT*/
-		 u32 *rkey) /*OUT*/
-{
-	int ret = 0;
-	u64 h_ret;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
-				    &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
-			 "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
-			 shca->ipz_hca_handle.handle,
-			 e_origmr->ipz_mr_handle.handle,
-			 e_origmr->ib.ib_mr.lkey);
-		ret = ehca2ib_return_code(h_ret);
-		goto ehca_reg_smr_exit0;
-	}
-	/* successful registration */
-	e_newmr->num_kpages = e_origmr->num_kpages;
-	e_newmr->num_hwpages = e_origmr->num_hwpages;
-	e_newmr->hwpage_size   = e_origmr->hwpage_size;
-	e_newmr->start = iova_start;
-	e_newmr->size = e_origmr->size;
-	e_newmr->acl = acl;
-	e_newmr->ipz_mr_handle = hipzout.handle;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-
-ehca_reg_smr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
-			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
-			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
-	return ret;
-} /* end ehca_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-static inline void *ehca_calc_sectbase(int top, int dir, int idx)
-{
-	unsigned long ret = idx;
-	ret |= dir << EHCA_DIR_INDEX_SHIFT;
-	ret |= top << EHCA_TOP_INDEX_SHIFT;
-	return __va(ret << SECTION_SIZE_BITS);
-}
-
-#define ehca_bmap_valid(entry) \
-	((u64)entry != (u64)EHCA_INVAL_ADDR)
-
-static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
-			       struct ehca_shca *shca, struct ehca_mr *mr,
-			       struct ehca_mr_pginfo *pginfo)
-{
-	u64 h_ret = 0;
-	unsigned long page = 0;
-	u64 rpage = __pa(kpage);
-	int page_count;
-
-	void *sectbase = ehca_calc_sectbase(top, dir, idx);
-	if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
-		ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
-					   "hwpage_size does not fit to "
-					   "section start address");
-	}
-	page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
-
-	while (page < page_count) {
-		u64 rnum;
-		for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
-		     rnum++) {
-			void *pg = sectbase + ((page++) * pginfo->hwpage_size);
-			kpage[rnum] = __pa(pg);
-		}
-
-		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
-			ehca_encode_hwpage_size(pginfo->hwpage_size),
-			0, rpage, rnum);
-
-		if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
-			ehca_err(&shca->ib_device, "register_rpage_mr failed");
-			return h_ret;
-		}
-	}
-	return h_ret;
-}
-
-static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
-				struct ehca_shca *shca, struct ehca_mr *mr,
-				struct ehca_mr_pginfo *pginfo)
-{
-	u64 hret = H_SUCCESS;
-	int idx;
-
-	for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
-			continue;
-
-		hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
-					   pginfo);
-		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-				return hret;
-	}
-	return hret;
-}
-
-static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
-				    struct ehca_mr *mr,
-				    struct ehca_mr_pginfo *pginfo)
-{
-	u64 hret = H_SUCCESS;
-	int dir;
-
-	for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-			continue;
-
-		hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
-		if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
-				return hret;
-	}
-	return hret;
-}
-
-/* register internal max-MR to internal SHCA */
-int ehca_reg_internal_maxmr(
-	struct ehca_shca *shca,
-	struct ehca_pd *e_pd,
-	struct ehca_mr **e_maxmr)  /*OUT*/
-{
-	int ret;
-	struct ehca_mr *e_mr;
-	u64 *iova_start;
-	u64 size_maxmr;
-	struct ehca_mr_pginfo pginfo;
-	u32 num_kpages;
-	u32 num_hwpages;
-	u64 hw_pgsize;
-
-	if (!ehca_bmap) {
-		ret = -EFAULT;
-		goto ehca_reg_internal_maxmr_exit0;
-	}
-
-	e_mr = ehca_mr_new();
-	if (!e_mr) {
-		ehca_err(&shca->ib_device, "out of memory");
-		ret = -ENOMEM;
-		goto ehca_reg_internal_maxmr_exit0;
-	}
-	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
-
-	/* register internal max-MR on HCA */
-	size_maxmr = ehca_mr_len;
-	iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
-	num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
-				PAGE_SIZE);
-	hw_pgsize = ehca_get_max_hwpage_size(shca);
-	num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
-				 hw_pgsize);
-
-	memset(&pginfo, 0, sizeof(pginfo));
-	pginfo.type = EHCA_MR_PGI_PHYS;
-	pginfo.num_kpages = num_kpages;
-	pginfo.num_hwpages = num_hwpages;
-	pginfo.hwpage_size = hw_pgsize;
-	pginfo.u.phy.addr = 0;
-	pginfo.u.phy.size = size_maxmr;
-
-	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
-			  &pginfo, &e_mr->ib.ib_mr.lkey,
-			  &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
-	if (ret) {
-		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
-			 "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
-			 "num_hwpages=%x", e_mr, iova_start, size_maxmr,
-			 num_kpages, num_hwpages);
-		goto ehca_reg_internal_maxmr_exit1;
-	}
-
-	/* successful registration of all pages */
-	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
-	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
-	e_mr->ib.ib_mr.uobject = NULL;
-	atomic_inc(&(e_pd->ib_pd.usecnt));
-	*e_maxmr = e_mr;
-	return 0;
-
-ehca_reg_internal_maxmr_exit1:
-	ehca_mr_delete(e_mr);
-ehca_reg_internal_maxmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
-			 ret, shca, e_pd, e_maxmr);
-	return ret;
-} /* end ehca_reg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-		   struct ehca_mr *e_newmr,
-		   u64 *iova_start,
-		   int acl,
-		   struct ehca_pd *e_pd,
-		   u32 *lkey,
-		   u32 *rkey)
-{
-	u64 h_ret;
-	struct ehca_mr *e_origmr = shca->maxmr;
-	u32 hipz_acl;
-	struct ehca_mr_hipzout_parms hipzout;
-
-	ehca_mrmw_map_acl(acl, &hipz_acl);
-	ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
-
-	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
-				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
-				    &hipzout);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
-			 "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
-			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
-			 e_origmr->ipz_mr_handle.handle,
-			 e_origmr->ib.ib_mr.lkey);
-		return ehca2ib_return_code(h_ret);
-	}
-	/* successful registration */
-	e_newmr->num_kpages = e_origmr->num_kpages;
-	e_newmr->num_hwpages = e_origmr->num_hwpages;
-	e_newmr->hwpage_size = e_origmr->hwpage_size;
-	e_newmr->start = iova_start;
-	e_newmr->size = e_origmr->size;
-	e_newmr->acl = acl;
-	e_newmr->ipz_mr_handle = hipzout.handle;
-	*lkey = hipzout.lkey;
-	*rkey = hipzout.rkey;
-	return 0;
-} /* end ehca_reg_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
-{
-	int ret;
-	struct ehca_mr *e_maxmr;
-	struct ib_pd *ib_pd;
-
-	if (!shca->maxmr) {
-		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
-		ret = -EINVAL;
-		goto ehca_dereg_internal_maxmr_exit0;
-	}
-
-	e_maxmr = shca->maxmr;
-	ib_pd = e_maxmr->ib.ib_mr.pd;
-	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
-
-	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
-	if (ret) {
-		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-			 "ret=%i e_maxmr=%p shca=%p lkey=%x",
-			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
-		shca->maxmr = e_maxmr;
-		goto ehca_dereg_internal_maxmr_exit0;
-	}
-
-	atomic_dec(&ib_pd->usecnt);
-
-ehca_dereg_internal_maxmr_exit0:
-	if (ret)
-		ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
-			 ret, shca, shca->maxmr);
-	return ret;
-} /* end ehca_dereg_internal_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* check page list of map FMR verb for validness */
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-			     u64 *page_list,
-			     int list_len)
-{
-	u32 i;
-	u64 *page;
-
-	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
-		ehca_gen_err("bad list_len, list_len=%x "
-			     "e_fmr->fmr_max_pages=%x fmr=%p",
-			     list_len, e_fmr->fmr_max_pages, e_fmr);
-		return -EINVAL;
-	}
-
-	/* each page must be aligned */
-	page = page_list;
-	for (i = 0; i < list_len; i++) {
-		if (*page % e_fmr->fmr_page_size) {
-			ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
-				     "fmr_page_size=%x", i, *page, page, e_fmr,
-				     e_fmr->fmr_page_size);
-			return -EINVAL;
-		}
-		page++;
-	}
-
-	return 0;
-} /* end ehca_fmr_check_page_list() */
-
-/*----------------------------------------------------------------------*/
-
-/* PAGE_SIZE >= pginfo->hwpage_size */
-static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
-				  u32 number,
-				  u64 *kpage)
-{
-	int ret = 0;
-	u64 pgaddr;
-	u32 j = 0;
-	int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-	struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-	while (*sg != NULL) {
-		pgaddr = page_to_pfn(sg_page(*sg))
-			<< PAGE_SHIFT;
-		*kpage = pgaddr + (pginfo->next_hwpage *
-				   pginfo->hwpage_size);
-		if (!(*kpage)) {
-			ehca_gen_err("pgaddr=%llx "
-				     "sg_dma_address=%llx "
-				     "entry=%llx next_hwpage=%llx",
-				     pgaddr, (u64)sg_dma_address(*sg),
-				     pginfo->u.usr.next_nmap,
-				     pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		(pginfo->next_hwpage)++;
-		kpage++;
-		if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
-			(pginfo->kpage_cnt)++;
-			(pginfo->u.usr.next_nmap)++;
-			pginfo->next_hwpage = 0;
-			*sg = sg_next(*sg);
-		}
-		j++;
-		if (j >= number)
-			break;
-	}
-
-	return ret;
-}
-
-/*
- * check given pages for contiguous layout
- * last page addr is returned in prev_pgaddr for further check
- */
-static int ehca_check_kpages_per_ate(struct scatterlist **sg,
-				     int num_pages,
-				     u64 *prev_pgaddr)
-{
-	for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
-		u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
-		if (ehca_debug_level >= 3)
-			ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
-				     *(u64 *)__va(pgaddr));
-		if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
-			ehca_gen_err("uncontiguous page found pgaddr=%llx "
-				     "prev_pgaddr=%llx entries_left_in_hwpage=%x",
-				     pgaddr, *prev_pgaddr, num_pages);
-			return -EINVAL;
-		}
-		*prev_pgaddr = pgaddr;
-	}
-	return 0;
-}
-
-/* PAGE_SIZE < pginfo->hwpage_size */
-static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
-				  u32 number,
-				  u64 *kpage)
-{
-	int ret = 0;
-	u64 pgaddr, prev_pgaddr;
-	u32 j = 0;
-	int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
-	int nr_kpages = kpages_per_hwpage;
-	struct scatterlist **sg = &pginfo->u.usr.next_sg;
-
-	while (*sg != NULL) {
-
-		if (nr_kpages == kpages_per_hwpage) {
-			pgaddr = (page_to_pfn(sg_page(*sg))
-				   << PAGE_SHIFT);
-			*kpage = pgaddr;
-			if (!(*kpage)) {
-				ehca_gen_err("pgaddr=%llx entry=%llx",
-					     pgaddr, pginfo->u.usr.next_nmap);
-				ret = -EFAULT;
-				return ret;
-			}
-			/*
-			 * The first page in a hwpage must be aligned;
-			 * the first MR page is exempt from this rule.
-			 */
-			if (pgaddr & (pginfo->hwpage_size - 1)) {
-				if (pginfo->hwpage_cnt) {
-					ehca_gen_err(
-						"invalid alignment "
-						"pgaddr=%llx entry=%llx "
-						"mr_pgsize=%llx",
-						pgaddr, pginfo->u.usr.next_nmap,
-						pginfo->hwpage_size);
-					ret = -EFAULT;
-					return ret;
-				}
-				/* first MR page */
-				pginfo->kpage_cnt =
-					(pgaddr &
-					 (pginfo->hwpage_size - 1)) >>
-					PAGE_SHIFT;
-				nr_kpages -= pginfo->kpage_cnt;
-				*kpage = pgaddr &
-					 ~(pginfo->hwpage_size - 1);
-			}
-			if (ehca_debug_level >= 3) {
-				u64 val = *(u64 *)__va(pgaddr);
-				ehca_gen_dbg("kpage=%llx page=%llx "
-					     "value=%016llx",
-					     *kpage, pgaddr, val);
-			}
-			prev_pgaddr = pgaddr;
-			*sg = sg_next(*sg);
-			pginfo->kpage_cnt++;
-			pginfo->u.usr.next_nmap++;
-			nr_kpages--;
-			if (!nr_kpages)
-				goto next_kpage;
-			continue;
-		}
-
-		ret = ehca_check_kpages_per_ate(sg, nr_kpages,
-						&prev_pgaddr);
-		if (ret)
-			return ret;
-		pginfo->kpage_cnt += nr_kpages;
-		pginfo->u.usr.next_nmap += nr_kpages;
-
-next_kpage:
-		nr_kpages = kpages_per_hwpage;
-		(pginfo->hwpage_cnt)++;
-		kpage++;
-		j++;
-		if (j >= number)
-			break;
-	}
-
-	return ret;
-}
-
-static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-				 u32 number, u64 *kpage)
-{
-	int ret = 0;
-	u64 addr = pginfo->u.phy.addr;
-	u64 size = pginfo->u.phy.size;
-	u64 num_hw, offs_hw;
-	u32 i = 0;
-
-	num_hw  = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
-				pginfo->hwpage_size);
-	offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
-
-	while (pginfo->next_hwpage < offs_hw + num_hw) {
-		/* sanity check */
-		if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
-		    (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
-			ehca_gen_err("kpage_cnt >= num_kpages, "
-				     "kpage_cnt=%llx num_kpages=%llx "
-				     "hwpage_cnt=%llx "
-				     "num_hwpages=%llx i=%x",
-				     pginfo->kpage_cnt,
-				     pginfo->num_kpages,
-				     pginfo->hwpage_cnt,
-				     pginfo->num_hwpages, i);
-			return -EFAULT;
-		}
-		*kpage = (addr & ~(pginfo->hwpage_size - 1)) +
-			 (pginfo->next_hwpage * pginfo->hwpage_size);
-		if ( !(*kpage) && addr ) {
-			ehca_gen_err("addr=%llx size=%llx "
-				     "next_hwpage=%llx", addr,
-				     size, pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		(pginfo->next_hwpage)++;
-		if (PAGE_SIZE >= pginfo->hwpage_size) {
-			if (pginfo->next_hwpage %
-			    (PAGE_SIZE / pginfo->hwpage_size) == 0)
-				(pginfo->kpage_cnt)++;
-		} else
-			pginfo->kpage_cnt += pginfo->hwpage_size /
-				PAGE_SIZE;
-		kpage++;
-		i++;
-		if (i >= number) break;
-	}
-	if (pginfo->next_hwpage >= offs_hw + num_hw) {
-		pginfo->next_hwpage = 0;
-	}
-
-	return ret;
-}
-
-static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-				u32 number, u64 *kpage)
-{
-	int ret = 0;
-	u64 *fmrlist;
-	u32 i;
-
-	/* loop over desired page_list entries */
-	fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
-	for (i = 0; i < number; i++) {
-		*kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
-			   pginfo->next_hwpage * pginfo->hwpage_size;
-		if ( !(*kpage) ) {
-			ehca_gen_err("*fmrlist=%llx fmrlist=%p "
-				     "next_listelem=%llx next_hwpage=%llx",
-				     *fmrlist, fmrlist,
-				     pginfo->u.fmr.next_listelem,
-				     pginfo->next_hwpage);
-			return -EFAULT;
-		}
-		(pginfo->hwpage_cnt)++;
-		if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
-			if (pginfo->next_hwpage %
-			    (pginfo->u.fmr.fmr_pgsize /
-			     pginfo->hwpage_size) == 0) {
-				(pginfo->kpage_cnt)++;
-				(pginfo->u.fmr.next_listelem)++;
-				fmrlist++;
-				pginfo->next_hwpage = 0;
-			} else
-				(pginfo->next_hwpage)++;
-		} else {
-			unsigned int cnt_per_hwpage = pginfo->hwpage_size /
-				pginfo->u.fmr.fmr_pgsize;
-			unsigned int j;
-			u64 prev = *kpage;
-			/* check if adrs are contiguous */
-			for (j = 1; j < cnt_per_hwpage; j++) {
-				u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
-				if (prev + pginfo->u.fmr.fmr_pgsize != p) {
-					ehca_gen_err("uncontiguous fmr pages "
-						     "found prev=%llx p=%llx "
-						     "idx=%x", prev, p, i + j);
-					return -EINVAL;
-				}
-				prev = p;
-			}
-			pginfo->kpage_cnt += cnt_per_hwpage;
-			pginfo->u.fmr.next_listelem += cnt_per_hwpage;
-			fmrlist += cnt_per_hwpage;
-		}
-		kpage++;
-	}
-	return ret;
-}
-
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-		     u32 number,
-		     u64 *kpage)
-{
-	int ret;
-
-	switch (pginfo->type) {
-	case EHCA_MR_PGI_PHYS:
-		ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
-		break;
-	case EHCA_MR_PGI_USER:
-		ret = PAGE_SIZE >= pginfo->hwpage_size ?
-			ehca_set_pagebuf_user1(pginfo, number, kpage) :
-			ehca_set_pagebuf_user2(pginfo, number, kpage);
-		break;
-	case EHCA_MR_PGI_FMR:
-		ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
-		break;
-	default:
-		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
-		ret = -EFAULT;
-		break;
-	}
-	return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * check MR if it is a max-MR, i.e. uses whole memory
- * in case it's a max-MR 1 is returned, else 0
- */
-int ehca_mr_is_maxmr(u64 size,
-		     u64 *iova_start)
-{
-	/* a MR is treated as max-MR only if it fits following: */
-	if ((size == ehca_mr_len) &&
-	    (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
-		ehca_gen_dbg("this is a max-MR");
-		return 1;
-	} else
-		return 0;
-} /* end ehca_mr_is_maxmr() */
-
-/*----------------------------------------------------------------------*/
-
-/* map access control for MR/MW. This routine is used for MR and MW. */
-void ehca_mrmw_map_acl(int ib_acl,
-		       u32 *hipz_acl)
-{
-	*hipz_acl = 0;
-	if (ib_acl & IB_ACCESS_REMOTE_READ)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
-	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
-	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
-		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
-	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
-		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
-	if (ib_acl & IB_ACCESS_MW_BIND)
-		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
-} /* end ehca_mrmw_map_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
-{
-	*hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
-} /* end ehca_mrmw_set_pgsize_hipz_acl() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * reverse map access control for MR/MW.
- * This routine is used for MR and MW.
- */
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-			       int *ib_acl) /*OUT*/
-{
-	*ib_acl = 0;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
-		*ib_acl |= IB_ACCESS_REMOTE_READ;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
-		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
-		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
-		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
-	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
-		*ib_acl |= IB_ACCESS_MW_BIND;
-} /* end ehca_mrmw_reverse_map_acl() */
-
-
-/*----------------------------------------------------------------------*/
-
-/*
- * MR destructor and constructor
- * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
- * except struct ib_mr and spinlock
- */
-void ehca_mr_deletenew(struct ehca_mr *mr)
-{
-	mr->flags = 0;
-	mr->num_kpages = 0;
-	mr->num_hwpages = 0;
-	mr->acl = 0;
-	mr->start = NULL;
-	mr->fmr_page_size = 0;
-	mr->fmr_max_pages = 0;
-	mr->fmr_max_maps = 0;
-	mr->fmr_map_cnt = 0;
-	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
-	memset(&mr->galpas, 0, sizeof(mr->galpas));
-} /* end ehca_mr_deletenew() */
-
-int ehca_init_mrmw_cache(void)
-{
-	mr_cache = kmem_cache_create("ehca_cache_mr",
-				     sizeof(struct ehca_mr), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!mr_cache)
-		return -ENOMEM;
-	mw_cache = kmem_cache_create("ehca_cache_mw",
-				     sizeof(struct ehca_mw), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!mw_cache) {
-		kmem_cache_destroy(mr_cache);
-		mr_cache = NULL;
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-void ehca_cleanup_mrmw_cache(void)
-{
-	kmem_cache_destroy(mr_cache);
-	kmem_cache_destroy(mw_cache);
-}
-
-static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
-				     int dir)
-{
-	if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
-		ehca_top_bmap->dir[dir] =
-			kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
-		if (!ehca_top_bmap->dir[dir])
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
-	}
-	return 0;
-}
-
-static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
-{
-	if (!ehca_bmap_valid(ehca_bmap->top[top])) {
-		ehca_bmap->top[top] =
-			kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
-		if (!ehca_bmap->top[top])
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
-	}
-	return ehca_init_top_bmap(ehca_bmap->top[top], dir);
-}
-
-static inline int ehca_calc_index(unsigned long i, unsigned long s)
-{
-	return (i >> s) & EHCA_INDEX_MASK;
-}
-
-void ehca_destroy_busmap(void)
-{
-	int top, dir;
-
-	if (!ehca_bmap)
-		return;
-
-	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]))
-			continue;
-		for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
-			if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-				continue;
-
-			kfree(ehca_bmap->top[top]->dir[dir]);
-		}
-
-		kfree(ehca_bmap->top[top]);
-	}
-
-	kfree(ehca_bmap);
-	ehca_bmap = NULL;
-}
-
-static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
-{
-	unsigned long i, start_section, end_section;
-	int top, dir, idx;
-
-	if (!nr_pages)
-		return 0;
-
-	if (!ehca_bmap) {
-		ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
-		if (!ehca_bmap)
-			return -ENOMEM;
-		/* Set map block to 0xFF according to EHCA_INVAL_ADDR */
-		memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
-	}
-
-	start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
-	end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
-	for (i = start_section; i < end_section; i++) {
-		int ret;
-		top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
-		dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
-		idx = i & EHCA_INDEX_MASK;
-
-		ret = ehca_init_bmap(ehca_bmap, top, dir);
-		if (ret) {
-			ehca_destroy_busmap();
-			return ret;
-		}
-		ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
-		ehca_mr_len += EHCA_SECTSIZE;
-	}
-	return 0;
-}
-
-static int ehca_is_hugepage(unsigned long pfn)
-{
-	int page_order;
-
-	if (pfn & EHCA_HUGEPAGE_PFN_MASK)
-		return 0;
-
-	page_order = compound_order(pfn_to_page(pfn));
-	if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
-		return 0;
-
-	return 1;
-}
-
-static int ehca_create_busmap_callback(unsigned long initial_pfn,
-				       unsigned long total_nr_pages, void *arg)
-{
-	int ret;
-	unsigned long pfn, start_pfn, end_pfn, nr_pages;
-
-	if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
-		return ehca_update_busmap(initial_pfn, total_nr_pages);
-
-	/* Given chunk is >= 16GB -> check for hugepages */
-	start_pfn = initial_pfn;
-	end_pfn = initial_pfn + total_nr_pages;
-	pfn = start_pfn;
-
-	while (pfn < end_pfn) {
-		if (ehca_is_hugepage(pfn)) {
-			/* Add mem found in front of the hugepage */
-			nr_pages = pfn - start_pfn;
-			ret = ehca_update_busmap(start_pfn, nr_pages);
-			if (ret)
-				return ret;
-			/* Skip the hugepage */
-			pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
-			start_pfn = pfn;
-		} else
-			pfn += (EHCA_SECTSIZE / PAGE_SIZE);
-	}
-
-	/* Add mem found behind the hugepage(s)  */
-	nr_pages = pfn - start_pfn;
-	return ehca_update_busmap(start_pfn, nr_pages);
-}
-
-int ehca_create_busmap(void)
-{
-	int ret;
-
-	ehca_mr_len = 0;
-	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
-				   ehca_create_busmap_callback);
-	return ret;
-}
-
-static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
-				   struct ehca_mr *e_mr,
-				   struct ehca_mr_pginfo *pginfo)
-{
-	int top;
-	u64 hret, *kpage;
-
-	kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!kpage) {
-		ehca_err(&shca->ib_device, "kpage alloc failed");
-		return -ENOMEM;
-	}
-	for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
-		if (!ehca_bmap_valid(ehca_bmap->top[top]))
-			continue;
-		hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
-		if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
-			break;
-	}
-
-	ehca_free_fw_ctrlblock(kpage);
-
-	if (hret == H_SUCCESS)
-		return 0; /* Everything is fine */
-	else {
-		ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
-				 "h_ret=%lli e_mr=%p top=%x lkey=%x "
-				 "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
-				 e_mr->ib.ib_mr.lkey,
-				 shca->ipz_hca_handle.handle,
-				 e_mr->ipz_mr_handle.handle);
-		return ehca2ib_return_code(hret);
-	}
-}
-
-static u64 ehca_map_vaddr(void *caddr)
-{
-	int top, dir, idx;
-	unsigned long abs_addr, offset;
-	u64 entry;
-
-	if (!ehca_bmap)
-		return EHCA_INVAL_ADDR;
-
-	abs_addr = __pa(caddr);
-	top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
-	if (!ehca_bmap_valid(ehca_bmap->top[top]))
-		return EHCA_INVAL_ADDR;
-
-	dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
-	if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
-		return EHCA_INVAL_ADDR;
-
-	idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
-
-	entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
-	if (ehca_bmap_valid(entry)) {
-		offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
-		return entry | offset;
-	} else
-		return EHCA_INVAL_ADDR;
-}
-
-static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-	return dma_addr == EHCA_INVAL_ADDR;
-}
-
-static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
-			       size_t size, enum dma_data_direction direction)
-{
-	if (cpu_addr)
-		return ehca_map_vaddr(cpu_addr);
-	else
-		return EHCA_INVAL_ADDR;
-}
-
-static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
-				  enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
-			     unsigned long offset, size_t size,
-			     enum dma_data_direction direction)
-{
-	u64 addr;
-
-	if (offset + size > PAGE_SIZE)
-		return EHCA_INVAL_ADDR;
-
-	addr = ehca_map_vaddr(page_address(page));
-	if (!ehca_dma_mapping_error(dev, addr))
-		addr += offset;
-
-	return addr;
-}
-
-static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
-				enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i) {
-		u64 addr;
-		addr = ehca_map_vaddr(sg_virt(sg));
-		if (ehca_dma_mapping_error(dev, addr))
-			return 0;
-
-		sg->dma_address = addr;
-		sg->dma_length = sg->length;
-	}
-	return nents;
-}
-
-static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
-			      int nents, enum dma_data_direction direction)
-{
-	/* This is only a stub; nothing to be done here */
-}
-
-static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
-					 size_t size,
-					 enum dma_data_direction dir)
-{
-	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
-}
-
-static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
-					    size_t size,
-					    enum dma_data_direction dir)
-{
-	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
-				     u64 *dma_handle, gfp_t flag)
-{
-	struct page *p;
-	void *addr = NULL;
-	u64 dma_addr;
-
-	p = alloc_pages(flag, get_order(size));
-	if (p) {
-		addr = page_address(p);
-		dma_addr = ehca_map_vaddr(addr);
-		if (ehca_dma_mapping_error(dev, dma_addr)) {
-			free_pages((unsigned long)addr,	get_order(size));
-			return NULL;
-		}
-		if (dma_handle)
-			*dma_handle = dma_addr;
-		return addr;
-	}
-	return NULL;
-}
-
-static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
-				   void *cpu_addr, u64 dma_handle)
-{
-	if (cpu_addr && size)
-		free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-
-struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
-	.mapping_error          = ehca_dma_mapping_error,
-	.map_single             = ehca_dma_map_single,
-	.unmap_single           = ehca_dma_unmap_single,
-	.map_page               = ehca_dma_map_page,
-	.unmap_page             = ehca_dma_unmap_page,
-	.map_sg                 = ehca_dma_map_sg,
-	.unmap_sg               = ehca_dma_unmap_sg,
-	.sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
-	.sync_single_for_device = ehca_dma_sync_single_for_device,
-	.alloc_coherent         = ehca_dma_alloc_coherent,
-	.free_coherent          = ehca_dma_free_coherent,
-};
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
deleted file mode 100644
index 52bfa95697f7..000000000000
--- a/drivers/staging/rdma/ehca/ehca_mrmw.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  MR/MW declarations and inline functions
- *
- *  Authors: Dietmar Decker <ddecker@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _EHCA_MRMW_H_
-#define _EHCA_MRMW_H_
-
-enum ehca_reg_type {
-	EHCA_REG_MR,
-	EHCA_REG_BUSMAP_MR
-};
-
-int ehca_reg_mr(struct ehca_shca *shca,
-		struct ehca_mr *e_mr,
-		u64 *iova_start,
-		u64 size,
-		int acl,
-		struct ehca_pd *e_pd,
-		struct ehca_mr_pginfo *pginfo,
-		u32 *lkey,
-		u32 *rkey,
-		enum ehca_reg_type reg_type);
-
-int ehca_reg_mr_rpages(struct ehca_shca *shca,
-		       struct ehca_mr *e_mr,
-		       struct ehca_mr_pginfo *pginfo);
-
-int ehca_rereg_mr(struct ehca_shca *shca,
-		  struct ehca_mr *e_mr,
-		  u64 *iova_start,
-		  u64 size,
-		  int mr_access_flags,
-		  struct ehca_pd *e_pd,
-		  struct ehca_mr_pginfo *pginfo,
-		  u32 *lkey,
-		  u32 *rkey);
-
-int ehca_unmap_one_fmr(struct ehca_shca *shca,
-		       struct ehca_mr *e_fmr);
-
-int ehca_reg_smr(struct ehca_shca *shca,
-		 struct ehca_mr *e_origmr,
-		 struct ehca_mr *e_newmr,
-		 u64 *iova_start,
-		 int acl,
-		 struct ehca_pd *e_pd,
-		 u32 *lkey,
-		 u32 *rkey);
-
-int ehca_reg_internal_maxmr(struct ehca_shca *shca,
-			    struct ehca_pd *e_pd,
-			    struct ehca_mr **maxmr);
-
-int ehca_reg_maxmr(struct ehca_shca *shca,
-		   struct ehca_mr *e_newmr,
-		   u64 *iova_start,
-		   int acl,
-		   struct ehca_pd *e_pd,
-		   u32 *lkey,
-		   u32 *rkey);
-
-int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-
-int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
-			     u64 *page_list,
-			     int list_len);
-
-int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
-		     u32 number,
-		     u64 *kpage);
-
-int ehca_mr_is_maxmr(u64 size,
-		     u64 *iova_start);
-
-void ehca_mrmw_map_acl(int ib_acl,
-		       u32 *hipz_acl);
-
-void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
-
-void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
-			       int *ib_acl);
-
-void ehca_mr_deletenew(struct ehca_mr *mr);
-
-int ehca_create_busmap(void);
-
-void ehca_destroy_busmap(void);
-
-extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
-#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c
deleted file mode 100644
index 2a8aae411941..000000000000
--- a/drivers/staging/rdma/ehca/ehca_pd.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  PD functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-
-static struct kmem_cache *pd_cache;
-
-struct ib_pd *ehca_alloc_pd(struct ib_device *device,
-			    struct ib_ucontext *context, struct ib_udata *udata)
-{
-	struct ehca_pd *pd;
-	int i;
-
-	pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
-	if (!pd) {
-		ehca_err(device, "device=%p context=%p out of memory",
-			 device, context);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < 2; i++) {
-		INIT_LIST_HEAD(&pd->free[i]);
-		INIT_LIST_HEAD(&pd->full[i]);
-	}
-	mutex_init(&pd->lock);
-
-	/*
-	 * Kernel PD: when device = -1, 0
-	 * User   PD: when context != -1
-	 */
-	if (!context) {
-		/*
-		 * Kernel PDs after init reuses always
-		 * the one created in ehca_shca_reopen()
-		 */
-		struct ehca_shca *shca = container_of(device, struct ehca_shca,
-						      ib_device);
-		pd->fw_pd.value = shca->pd->fw_pd.value;
-	} else
-		pd->fw_pd.value = (u64)pd;
-
-	return &pd->ib_pd;
-}
-
-int ehca_dealloc_pd(struct ib_pd *pd)
-{
-	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-	int i, leftovers = 0;
-	struct ipz_small_queue_page *page, *tmp;
-
-	for (i = 0; i < 2; i++) {
-		list_splice(&my_pd->full[i], &my_pd->free[i]);
-		list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
-			leftovers = 1;
-			free_page(page->page);
-			kmem_cache_free(small_qp_cache, page);
-		}
-	}
-
-	if (leftovers)
-		ehca_warn(pd->device,
-			  "Some small queue pages were not freed");
-
-	kmem_cache_free(pd_cache, my_pd);
-
-	return 0;
-}
-
-int ehca_init_pd_cache(void)
-{
-	pd_cache = kmem_cache_create("ehca_cache_pd",
-				     sizeof(struct ehca_pd), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!pd_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_pd_cache(void)
-{
-	kmem_cache_destroy(pd_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h
deleted file mode 100644
index 90c4efa67586..000000000000
--- a/drivers/staging/rdma/ehca/ehca_qes.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Hardware request structures
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _EHCA_QES_H_
-#define _EHCA_QES_H_
-
-#include "ehca_tools.h"
-
-/* virtual scatter gather entry to specify remote addresses with length */
-struct ehca_vsgentry {
-	u64 vaddr;
-	u32 lkey;
-	u32 length;
-};
-
-#define GRH_FLAG_MASK        EHCA_BMASK_IBM( 7,  7)
-#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM( 0,  3)
-#define GRH_TCLASS_MASK      EHCA_BMASK_IBM( 4, 12)
-#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13, 31)
-#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32, 47)
-#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48, 55)
-#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56, 63)
-
-/*
- * Unreliable Datagram Address Vector Format
- * see IBTA Vol1 chapter 8.3 Global Routing Header
- */
-struct ehca_ud_av {
-	u8 sl;
-	u8 lnh;
-	u16 dlid;
-	u8 reserved1;
-	u8 reserved2;
-	u8 reserved3;
-	u8 slid_path_bits;
-	u8 reserved4;
-	u8 ipd;
-	u8 reserved5;
-	u8 pmtu;
-	u32 reserved6;
-	u64 reserved7;
-	union {
-		struct {
-			u64 word_0; /* always set to 6  */
-			/*should be 0x1B for IB transport */
-			u64 word_1;
-			u64 word_2;
-			u64 word_3;
-			u64 word_4;
-		} grh;
-		struct {
-			u32 wd_0;
-			u32 wd_1;
-			/* DWord_1 --> SGID */
-
-			u32 sgid_wd3;
-			u32 sgid_wd2;
-
-			u32 sgid_wd1;
-			u32 sgid_wd0;
-			/* DWord_3 --> DGID */
-
-			u32 dgid_wd3;
-			u32 dgid_wd2;
-
-			u32 dgid_wd1;
-			u32 dgid_wd0;
-		} grh_l;
-	};
-};
-
-/* maximum number of sg entries allowed in a WQE */
-#define MAX_WQE_SG_ENTRIES 252
-
-#define WQE_OPTYPE_SEND             0x80
-#define WQE_OPTYPE_RDMAREAD         0x40
-#define WQE_OPTYPE_RDMAWRITE        0x20
-#define WQE_OPTYPE_CMPSWAP          0x10
-#define WQE_OPTYPE_FETCHADD         0x08
-#define WQE_OPTYPE_BIND             0x04
-
-#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
-#define WQE_WRFLAG_FENCE            0x40
-#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
-#define WQE_WRFLAG_SOLIC_EVENT      0x10
-
-#define WQEF_CACHE_HINT             0x80
-#define WQEF_CACHE_HINT_RD_WR       0x40
-#define WQEF_TIMED_WQE              0x20
-#define WQEF_PURGE                  0x08
-#define WQEF_HIGH_NIBBLE            0xF0
-
-#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
-#define MW_BIND_ACCESSCTRL_R_READ    0x20
-#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
-
-struct ehca_wqe {
-	u64 work_request_id;
-	u8 optype;
-	u8 wr_flag;
-	u16 pkeyi;
-	u8 wqef;
-	u8 nr_of_data_seg;
-	u16 wqe_provided_slid;
-	u32 destination_qp_number;
-	u32 resync_psn_sqp;
-	u32 local_ee_context_qkey;
-	u32 immediate_data;
-	union {
-		struct {
-			u64 remote_virtual_address;
-			u32 rkey;
-			u32 reserved;
-			u64 atomic_1st_op_dma_len;
-			u64 atomic_2nd_op;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-
-		} nud;
-		struct {
-			u64 ehca_ud_av_ptr;
-			u64 reserved1;
-			u64 reserved2;
-			u64 reserved3;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-		} ud_avp;
-		struct {
-			struct ehca_ud_av ud_av;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
-						     2];
-		} ud_av;
-		struct {
-			u64 reserved0;
-			u64 reserved1;
-			u64 reserved2;
-			u64 reserved3;
-			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
-		} all_rcv;
-
-		struct {
-			u64 reserved;
-			u32 rkey;
-			u32 old_rkey;
-			u64 reserved1;
-			u64 reserved2;
-			u64 virtual_address;
-			u32 reserved3;
-			u32 length;
-			u32 reserved4;
-			u16 reserved5;
-			u8 reserved6;
-			u8 lr_ctl;
-			u32 lkey;
-			u32 reserved7;
-			u64 reserved8;
-			u64 reserved9;
-			u64 reserved10;
-			u64 reserved11;
-		} bind;
-		struct {
-			u64 reserved12;
-			u64 reserved13;
-			u32 size;
-			u32 start;
-		} inline_data;
-	} u;
-
-};
-
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
-#define WC_IMM_DATA     EHCA_BMASK_IBM(1, 1)
-#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2, 2)
-#define WC_SE_BIT       EHCA_BMASK_IBM(3, 3)
-#define WC_STATUS_ERROR_BIT 0x80000000
-#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
-#define WC_STATUS_PURGE_BIT 0x10
-#define WC_SEND_RECEIVE_BIT 0x80
-
-struct ehca_cqe {
-	u64 work_request_id;
-	u8 optype;
-	u8 w_completion_flags;
-	u16 reserved1;
-	u32 nr_bytes_transferred;
-	u32 immediate_data;
-	u32 local_qp_number;
-	u8 freed_resource_count;
-	u8 service_level;
-	u16 wqe_count;
-	u32 qp_token;
-	u32 qkey_ee_token;
-	u32 remote_qp_number;
-	u16 dlid;
-	u16 rlid;
-	u16 reserved2;
-	u16 pkey_index;
-	u32 cqe_timestamp;
-	u32 wqe_timestamp;
-	u8 wqe_timestamp_valid;
-	u8 reserved3;
-	u8 reserved4;
-	u8 cqe_flags;
-	u32 status;
-};
-
-struct ehca_eqe {
-	u64 entry;
-};
-
-struct ehca_mrte {
-	u64 starting_va;
-	u64 length; /* length of memory region in bytes*/
-	u32 pd;
-	u8 key_instance;
-	u8 pagesize;
-	u8 mr_control;
-	u8 local_remote_access_ctrl;
-	u8 reserved[0x20 - 0x18];
-	u64 at_pointer[4];
-};
-#endif /*_EHCA_QES_H_*/
diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c
deleted file mode 100644
index 896c01f810f6..000000000000
--- a/drivers/staging/rdma/ehca/ehca_qp.c
+++ /dev/null
@@ -1,2256 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  QP functions
- *
- *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
- *           Stefan Roscher <stefan.roscher@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-static struct kmem_cache *qp_cache;
-
-/*
- * attributes not supported by query qp
- */
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
-				     IB_QP_EN_SQD_ASYNC_NOTIFY)
-
-/*
- * ehca (internal) qp state values
- */
-enum ehca_qp_state {
-	EHCA_QPS_RESET = 1,
-	EHCA_QPS_INIT = 2,
-	EHCA_QPS_RTR = 3,
-	EHCA_QPS_RTS = 5,
-	EHCA_QPS_SQD = 6,
-	EHCA_QPS_SQE = 8,
-	EHCA_QPS_ERR = 128
-};
-
-/*
- * qp state transitions as defined by IB Arch Rel 1.1 page 431
- */
-enum ib_qp_statetrans {
-	IB_QPST_ANY2RESET,
-	IB_QPST_ANY2ERR,
-	IB_QPST_RESET2INIT,
-	IB_QPST_INIT2RTR,
-	IB_QPST_INIT2INIT,
-	IB_QPST_RTR2RTS,
-	IB_QPST_RTS2SQD,
-	IB_QPST_RTS2RTS,
-	IB_QPST_SQD2RTS,
-	IB_QPST_SQE2RTS,
-	IB_QPST_SQD2SQD,
-	IB_QPST_MAX	/* nr of transitions, this must be last!!! */
-};
-
-/*
- * ib2ehca_qp_state maps IB to ehca qp_state
- * returns ehca qp state corresponding to given ib qp state
- */
-static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
-{
-	switch (ib_qp_state) {
-	case IB_QPS_RESET:
-		return EHCA_QPS_RESET;
-	case IB_QPS_INIT:
-		return EHCA_QPS_INIT;
-	case IB_QPS_RTR:
-		return EHCA_QPS_RTR;
-	case IB_QPS_RTS:
-		return EHCA_QPS_RTS;
-	case IB_QPS_SQD:
-		return EHCA_QPS_SQD;
-	case IB_QPS_SQE:
-		return EHCA_QPS_SQE;
-	case IB_QPS_ERR:
-		return EHCA_QPS_ERR;
-	default:
-		ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
-		return -EINVAL;
-	}
-}
-
-/*
- * ehca2ib_qp_state maps ehca to IB qp_state
- * returns ib qp state corresponding to given ehca qp state
- */
-static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
-						ehca_qp_state)
-{
-	switch (ehca_qp_state) {
-	case EHCA_QPS_RESET:
-		return IB_QPS_RESET;
-	case EHCA_QPS_INIT:
-		return IB_QPS_INIT;
-	case EHCA_QPS_RTR:
-		return IB_QPS_RTR;
-	case EHCA_QPS_RTS:
-		return IB_QPS_RTS;
-	case EHCA_QPS_SQD:
-		return IB_QPS_SQD;
-	case EHCA_QPS_SQE:
-		return IB_QPS_SQE;
-	case EHCA_QPS_ERR:
-		return IB_QPS_ERR;
-	default:
-		ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
-		return -EINVAL;
-	}
-}
-
-/*
- * ehca_qp_type used as index for req_attr and opt_attr of
- * struct ehca_modqp_statetrans
- */
-enum ehca_qp_type {
-	QPT_RC = 0,
-	QPT_UC = 1,
-	QPT_UD = 2,
-	QPT_SQP = 3,
-	QPT_MAX
-};
-
-/*
- * ib2ehcaqptype maps Ib to ehca qp_type
- * returns ehca qp type corresponding to ib qp type
- */
-static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
-{
-	switch (ibqptype) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		return QPT_SQP;
-	case IB_QPT_RC:
-		return QPT_RC;
-	case IB_QPT_UC:
-		return QPT_UC;
-	case IB_QPT_UD:
-		return QPT_UD;
-	default:
-		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-		return -EINVAL;
-	}
-}
-
-static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
-							 int ib_tostate)
-{
-	int index = -EINVAL;
-	switch (ib_tostate) {
-	case IB_QPS_RESET:
-		index = IB_QPST_ANY2RESET;
-		break;
-	case IB_QPS_INIT:
-		switch (ib_fromstate) {
-		case IB_QPS_RESET:
-			index = IB_QPST_RESET2INIT;
-			break;
-		case IB_QPS_INIT:
-			index = IB_QPST_INIT2INIT;
-			break;
-		}
-		break;
-	case IB_QPS_RTR:
-		if (ib_fromstate == IB_QPS_INIT)
-			index = IB_QPST_INIT2RTR;
-		break;
-	case IB_QPS_RTS:
-		switch (ib_fromstate) {
-		case IB_QPS_RTR:
-			index = IB_QPST_RTR2RTS;
-			break;
-		case IB_QPS_RTS:
-			index = IB_QPST_RTS2RTS;
-			break;
-		case IB_QPS_SQD:
-			index = IB_QPST_SQD2RTS;
-			break;
-		case IB_QPS_SQE:
-			index = IB_QPST_SQE2RTS;
-			break;
-		}
-		break;
-	case IB_QPS_SQD:
-		if (ib_fromstate == IB_QPS_RTS)
-			index = IB_QPST_RTS2SQD;
-		break;
-	case IB_QPS_SQE:
-		break;
-	case IB_QPS_ERR:
-		index = IB_QPST_ANY2ERR;
-		break;
-	default:
-		break;
-	}
-	return index;
-}
-
-/*
- * ibqptype2servicetype returns hcp service type corresponding to given
- * ib qp type used by create_qp()
- */
-static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
-{
-	switch (ibqptype) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		return ST_UD;
-	case IB_QPT_RC:
-		return ST_RC;
-	case IB_QPT_UC:
-		return ST_UC;
-	case IB_QPT_UD:
-		return ST_UD;
-	case IB_QPT_RAW_IPV6:
-		return -EINVAL;
-	case IB_QPT_RAW_ETHERTYPE:
-		return -EINVAL;
-	default:
-		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
-		return -EINVAL;
-	}
-}
-
-/*
- * init userspace queue info from ipz_queue data
- */
-static inline void queue2resp(struct ipzu_queue_resp *resp,
-			      struct ipz_queue *queue)
-{
-	resp->qe_size = queue->qe_size;
-	resp->act_nr_of_sg = queue->act_nr_of_sg;
-	resp->queue_length = queue->queue_length;
-	resp->pagesize = queue->pagesize;
-	resp->toggle_state = queue->toggle_state;
-	resp->offset = queue->offset;
-}
-
-/*
- * init_qp_queue initializes/constructs r/squeue and registers queue pages.
- */
-static inline int init_qp_queue(struct ehca_shca *shca,
-				struct ehca_pd *pd,
-				struct ehca_qp *my_qp,
-				struct ipz_queue *queue,
-				int q_type,
-				u64 expected_hret,
-				struct ehca_alloc_queue_parms *parms,
-				int wqe_size)
-{
-	int ret, cnt, ipz_rc, nr_q_pages;
-	void *vpage;
-	u64 rpage, h_ret;
-	struct ib_device *ib_dev = &shca->ib_device;
-	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
-
-	if (!parms->queue_size)
-		return 0;
-
-	if (parms->is_small) {
-		nr_q_pages = 1;
-		ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-					128 << parms->page_size,
-					wqe_size, parms->act_nr_sges, 1);
-	} else {
-		nr_q_pages = parms->queue_size;
-		ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
-					EHCA_PAGESIZE, wqe_size,
-					parms->act_nr_sges, 0);
-	}
-
-	if (!ipz_rc) {
-		ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
-			 ipz_rc);
-		return -EBUSY;
-	}
-
-	/* register queue pages */
-	for (cnt = 0; cnt < nr_q_pages; cnt++) {
-		vpage = ipz_qpageit_get_inc(queue);
-		if (!vpage) {
-			ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-				 "failed p_vpage= %p", vpage);
-			ret = -EINVAL;
-			goto init_qp_queue1;
-		}
-		rpage = __pa(vpage);
-
-		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-						 my_qp->ipz_qp_handle,
-						 NULL, 0, q_type,
-						 rpage, parms->is_small ? 0 : 1,
-						 my_qp->galpas.kernel);
-		if (cnt == (nr_q_pages - 1)) {	/* last page! */
-			if (h_ret != expected_hret) {
-				ehca_err(ib_dev, "hipz_qp_register_rpage() "
-					 "h_ret=%lli", h_ret);
-				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queue1;
-			}
-			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-			if (vpage) {
-				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
-					 "should not succeed vpage=%p", vpage);
-				ret = -EINVAL;
-				goto init_qp_queue1;
-			}
-		} else {
-			if (h_ret != H_PAGE_REGISTERED) {
-				ehca_err(ib_dev, "hipz_qp_register_rpage() "
-					 "h_ret=%lli", h_ret);
-				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queue1;
-			}
-		}
-	}
-
-	ipz_qeit_reset(queue);
-
-	return 0;
-
-init_qp_queue1:
-	ipz_queue_dtor(pd, queue);
-	return ret;
-}
-
-static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
-{
-	if (is_llqp)
-		return 128 << act_nr_sge;
-	else
-		return offsetof(struct ehca_wqe,
-				u.nud.sg_list[act_nr_sge]);
-}
-
-static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
-				       int req_nr_sge, int is_llqp)
-{
-	u32 wqe_size, q_size;
-	int act_nr_sge = req_nr_sge;
-
-	if (!is_llqp)
-		/* round up #SGEs so WQE size is a power of 2 */
-		for (act_nr_sge = 4; act_nr_sge <= 252;
-		     act_nr_sge = 4 + 2 * act_nr_sge)
-			if (act_nr_sge >= req_nr_sge)
-				break;
-
-	wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
-	q_size = wqe_size * (queue->max_wr + 1);
-
-	if (q_size <= 512)
-		queue->page_size = 2;
-	else if (q_size <= 1024)
-		queue->page_size = 3;
-	else
-		queue->page_size = 0;
-
-	queue->is_small = (queue->page_size != 0);
-}
-
-/* needs to be called with cq->spinlock held */
-void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
-{
-	struct list_head *list, *node;
-
-	/* TODO: support low latency QPs */
-	if (qp->ext_type == EQPT_LLQP)
-		return;
-
-	if (on_sq) {
-		list = &qp->send_cq->sqp_err_list;
-		node = &qp->sq_err_node;
-	} else {
-		list = &qp->recv_cq->rqp_err_list;
-		node = &qp->rq_err_node;
-	}
-
-	if (list_empty(node))
-		list_add_tail(node, list);
-
-	return;
-}
-
-static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cq->spinlock, flags);
-
-	if (!list_empty(node))
-		list_del_init(node);
-
-	spin_unlock_irqrestore(&cq->spinlock, flags);
-}
-
-static void reset_queue_map(struct ehca_queue_map *qmap)
-{
-	int i;
-
-	qmap->tail = qmap->entries - 1;
-	qmap->left_to_poll = 0;
-	qmap->next_wqe_idx = 0;
-	for (i = 0; i < qmap->entries; i++) {
-		qmap->map[i].reported = 1;
-		qmap->map[i].cqe_req = 0;
-	}
-}
-
-/*
- * Create an ib_qp struct that is either a QP or an SRQ, depending on
- * the value of the is_srq parameter. If init_attr and srq_init_attr share
- * fields, the field out of init_attr is used.
- */
-static struct ehca_qp *internal_create_qp(
-	struct ib_pd *pd,
-	struct ib_qp_init_attr *init_attr,
-	struct ib_srq_init_attr *srq_init_attr,
-	struct ib_udata *udata, int is_srq)
-{
-	struct ehca_qp *my_qp, *my_srq = NULL;
-	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-	struct ib_ucontext *context = NULL;
-	u64 h_ret;
-	int is_llqp = 0, has_srq = 0, is_user = 0;
-	int qp_type, max_send_sge, max_recv_sge, ret;
-
-	/* h_call's out parameters */
-	struct ehca_alloc_qp_parms parms;
-	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
-	unsigned long flags;
-
-	if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
-		ehca_err(pd->device, "Unable to create QP, max number of %i "
-			 "QPs reached.", shca->max_num_qps);
-		ehca_err(pd->device, "To increase the maximum number of QPs "
-			 "use the number_of_qps module parameter.\n");
-		return ERR_PTR(-ENOSPC);
-	}
-
-	if (init_attr->create_flags) {
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	memset(&parms, 0, sizeof(parms));
-	qp_type = init_attr->qp_type;
-
-	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
-		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
-		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
-			 init_attr->sq_sig_type);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* save LLQP info */
-	if (qp_type & 0x80) {
-		is_llqp = 1;
-		parms.ext_type = EQPT_LLQP;
-		parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
-	}
-	qp_type &= 0x1F;
-	init_attr->qp_type &= 0x1F;
-
-	/* handle SRQ base QPs */
-	if (init_attr->srq) {
-		my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
-
-		if (qp_type == IB_QPT_UC) {
-			ehca_err(pd->device, "UC with SRQ not supported");
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-
-		has_srq = 1;
-		parms.ext_type = EQPT_SRQBASE;
-		parms.srq_qpn = my_srq->real_qp_num;
-	}
-
-	if (is_llqp && has_srq) {
-		ehca_err(pd->device, "LLQPs can't have an SRQ");
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* handle SRQs */
-	if (is_srq) {
-		parms.ext_type = EQPT_SRQ;
-		parms.srq_limit = srq_init_attr->attr.srq_limit;
-		if (init_attr->cap.max_recv_sge > 3) {
-			ehca_err(pd->device, "no more than three SGEs "
-				 "supported for SRQ  pd=%p  max_sge=%x",
-				 pd, init_attr->cap.max_recv_sge);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/* check QP type */
-	if (qp_type != IB_QPT_UD &&
-	    qp_type != IB_QPT_UC &&
-	    qp_type != IB_QPT_RC &&
-	    qp_type != IB_QPT_SMI &&
-	    qp_type != IB_QPT_GSI) {
-		ehca_err(pd->device, "wrong QP Type=%x", qp_type);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (is_llqp) {
-		switch (qp_type) {
-		case IB_QPT_RC:
-			if ((init_attr->cap.max_send_wr > 255) ||
-			    (init_attr->cap.max_recv_wr > 255)) {
-				ehca_err(pd->device,
-					 "Invalid Number of max_sq_wr=%x "
-					 "or max_rq_wr=%x for RC LLQP",
-					 init_attr->cap.max_send_wr,
-					 init_attr->cap.max_recv_wr);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			}
-			break;
-		case IB_QPT_UD:
-			if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
-				ehca_err(pd->device, "UD LLQP not supported "
-					 "by this adapter");
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-ENOSYS);
-			}
-			if (!(init_attr->cap.max_send_sge <= 5
-			    && init_attr->cap.max_send_sge >= 1
-			    && init_attr->cap.max_recv_sge <= 5
-			    && init_attr->cap.max_recv_sge >= 1)) {
-				ehca_err(pd->device,
-					 "Invalid Number of max_send_sge=%x "
-					 "or max_recv_sge=%x for UD LLQP",
-					 init_attr->cap.max_send_sge,
-					 init_attr->cap.max_recv_sge);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			} else if (init_attr->cap.max_send_wr > 255) {
-				ehca_err(pd->device,
-					 "Invalid Number of "
-					 "max_send_wr=%x for UD QP_TYPE=%x",
-					 init_attr->cap.max_send_wr, qp_type);
-				atomic_dec(&shca->num_qps);
-				return ERR_PTR(-EINVAL);
-			}
-			break;
-		default:
-			ehca_err(pd->device, "unsupported LL QP Type=%x",
-				 qp_type);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	} else {
-		int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
-			       || qp_type == IB_QPT_GSI) ? 250 : 252;
-
-		if (init_attr->cap.max_send_sge > max_sge
-		    || init_attr->cap.max_recv_sge > max_sge) {
-			ehca_err(pd->device, "Invalid number of SGEs requested "
-				 "send_sge=%x recv_sge=%x max_sge=%x",
-				 init_attr->cap.max_send_sge,
-				 init_attr->cap.max_recv_sge, max_sge);
-			atomic_dec(&shca->num_qps);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
-	if (!my_qp) {
-		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
-		atomic_dec(&shca->num_qps);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (pd->uobject && udata) {
-		is_user = 1;
-		context = pd->uobject->context;
-	}
-
-	atomic_set(&my_qp->nr_events, 0);
-	init_waitqueue_head(&my_qp->wait_completion);
-	spin_lock_init(&my_qp->spinlock_s);
-	spin_lock_init(&my_qp->spinlock_r);
-	my_qp->qp_type = qp_type;
-	my_qp->ext_type = parms.ext_type;
-	my_qp->state = IB_QPS_RESET;
-
-	if (init_attr->recv_cq)
-		my_qp->recv_cq =
-			container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-	if (init_attr->send_cq)
-		my_qp->send_cq =
-			container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-	idr_preload(GFP_KERNEL);
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-
-	ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
-	if (ret >= 0)
-		my_qp->token = ret;
-
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-	idr_preload_end();
-	if (ret < 0) {
-		if (ret == -ENOSPC) {
-			ret = -EINVAL;
-			ehca_err(pd->device, "Invalid number of qp");
-		} else {
-			ret = -ENOMEM;
-			ehca_err(pd->device, "Can't allocate new idr entry.");
-		}
-		goto create_qp_exit0;
-	}
-
-	if (has_srq)
-		parms.srq_token = my_qp->token;
-
-	parms.servicetype = ibqptype2servicetype(qp_type);
-	if (parms.servicetype < 0) {
-		ret = -EINVAL;
-		ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-		goto create_qp_exit1;
-	}
-
-	/* Always signal by WQE so we can hide circ. WQEs */
-	parms.sigtype = HCALL_SIGT_BY_WQE;
-
-	/* UD_AV CIRCUMVENTION */
-	max_send_sge = init_attr->cap.max_send_sge;
-	max_recv_sge = init_attr->cap.max_recv_sge;
-	if (parms.servicetype == ST_UD && !is_llqp) {
-		max_send_sge += 2;
-		max_recv_sge += 2;
-	}
-
-	parms.token = my_qp->token;
-	parms.eq_handle = shca->eq.ipz_eq_handle;
-	parms.pd = my_pd->fw_pd;
-	if (my_qp->send_cq)
-		parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
-	if (my_qp->recv_cq)
-		parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
-
-	parms.squeue.max_wr = init_attr->cap.max_send_wr;
-	parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
-	parms.squeue.max_sge = max_send_sge;
-	parms.rqueue.max_sge = max_recv_sge;
-
-	/* RC QPs need one more SWQE for unsolicited ack circumvention */
-	if (qp_type == IB_QPT_RC)
-		parms.squeue.max_wr++;
-
-	if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
-		if (HAS_SQ(my_qp))
-			ehca_determine_small_queue(
-				&parms.squeue, max_send_sge, is_llqp);
-		if (HAS_RQ(my_qp))
-			ehca_determine_small_queue(
-				&parms.rqueue, max_recv_sge, is_llqp);
-		parms.qp_storage =
-			(parms.squeue.is_small || parms.rqueue.is_small);
-	}
-
-	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
-			 h_ret);
-		ret = ehca2ib_return_code(h_ret);
-		goto create_qp_exit1;
-	}
-
-	ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
-	my_qp->ipz_qp_handle = parms.qp_handle;
-	my_qp->galpas = parms.galpas;
-
-	swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
-	rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
-
-	switch (qp_type) {
-	case IB_QPT_RC:
-		if (is_llqp) {
-			parms.squeue.act_nr_sges = 1;
-			parms.rqueue.act_nr_sges = 1;
-		}
-		/* hide the extra WQE */
-		parms.squeue.act_nr_wqes--;
-		break;
-	case IB_QPT_UD:
-	case IB_QPT_GSI:
-	case IB_QPT_SMI:
-		/* UD circumvention */
-		if (is_llqp) {
-			parms.squeue.act_nr_sges = 1;
-			parms.rqueue.act_nr_sges = 1;
-		} else {
-			parms.squeue.act_nr_sges -= 2;
-			parms.rqueue.act_nr_sges -= 2;
-		}
-
-		if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
-			parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
-			parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
-			parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
-			parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
-			ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
-		}
-
-		break;
-
-	default:
-		break;
-	}
-
-	/* initialize r/squeue and register queue pages */
-	if (HAS_SQ(my_qp)) {
-		ret = init_qp_queue(
-			shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
-			HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
-			&parms.squeue, swqe_size);
-		if (ret) {
-			ehca_err(pd->device, "Couldn't initialize squeue "
-				 "and pages ret=%i", ret);
-			goto create_qp_exit2;
-		}
-
-		if (!is_user) {
-			my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
-				my_qp->ipz_squeue.qe_size;
-			my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
-						    sizeof(struct ehca_qmap_entry));
-			if (!my_qp->sq_map.map) {
-				ehca_err(pd->device, "Couldn't allocate squeue "
-					 "map ret=%i", ret);
-				goto create_qp_exit3;
-			}
-			INIT_LIST_HEAD(&my_qp->sq_err_node);
-			/* to avoid the generation of bogus flush CQEs */
-			reset_queue_map(&my_qp->sq_map);
-		}
-	}
-
-	if (HAS_RQ(my_qp)) {
-		ret = init_qp_queue(
-			shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
-			H_SUCCESS, &parms.rqueue, rwqe_size);
-		if (ret) {
-			ehca_err(pd->device, "Couldn't initialize rqueue "
-				 "and pages ret=%i", ret);
-			goto create_qp_exit4;
-		}
-		if (!is_user) {
-			my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
-				my_qp->ipz_rqueue.qe_size;
-			my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
-						    sizeof(struct ehca_qmap_entry));
-			if (!my_qp->rq_map.map) {
-				ehca_err(pd->device, "Couldn't allocate squeue "
-					 "map ret=%i", ret);
-				goto create_qp_exit5;
-			}
-			INIT_LIST_HEAD(&my_qp->rq_err_node);
-			/* to avoid the generation of bogus flush CQEs */
-			reset_queue_map(&my_qp->rq_map);
-		}
-	} else if (init_attr->srq && !is_user) {
-		/* this is a base QP, use the queue map of the SRQ */
-		my_qp->rq_map = my_srq->rq_map;
-		INIT_LIST_HEAD(&my_qp->rq_err_node);
-
-		my_qp->ipz_rqueue = my_srq->ipz_rqueue;
-	}
-
-	if (is_srq) {
-		my_qp->ib_srq.pd = &my_pd->ib_pd;
-		my_qp->ib_srq.device = my_pd->ib_pd.device;
-
-		my_qp->ib_srq.srq_context = init_attr->qp_context;
-		my_qp->ib_srq.event_handler = init_attr->event_handler;
-	} else {
-		my_qp->ib_qp.qp_num = ib_qp_num;
-		my_qp->ib_qp.pd = &my_pd->ib_pd;
-		my_qp->ib_qp.device = my_pd->ib_pd.device;
-
-		my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-		my_qp->ib_qp.send_cq = init_attr->send_cq;
-
-		my_qp->ib_qp.qp_type = qp_type;
-		my_qp->ib_qp.srq = init_attr->srq;
-
-		my_qp->ib_qp.qp_context = init_attr->qp_context;
-		my_qp->ib_qp.event_handler = init_attr->event_handler;
-	}
-
-	init_attr->cap.max_inline_data = 0; /* not supported yet */
-	init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
-	init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
-	init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
-	init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
-	my_qp->init_attr = *init_attr;
-
-	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-		shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-			&my_qp->ib_qp;
-		if (ehca_nr_ports < 0) {
-			/* alloc array to cache subsequent modify qp parms
-			 * for autodetect mode
-			 */
-			my_qp->mod_qp_parm =
-				kzalloc(EHCA_MOD_QP_PARM_MAX *
-					sizeof(*my_qp->mod_qp_parm),
-					GFP_KERNEL);
-			if (!my_qp->mod_qp_parm) {
-				ehca_err(pd->device,
-					 "Could not alloc mod_qp_parm");
-				goto create_qp_exit5;
-			}
-		}
-	}
-
-	/* NOTE: define_apq0() not supported yet */
-	if (qp_type == IB_QPT_GSI) {
-		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
-		if (h_ret != H_SUCCESS) {
-			kfree(my_qp->mod_qp_parm);
-			my_qp->mod_qp_parm = NULL;
-			/* the QP pointer is no longer valid */
-			shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
-				NULL;
-			ret = ehca2ib_return_code(h_ret);
-			goto create_qp_exit6;
-		}
-	}
-
-	if (my_qp->send_cq) {
-		ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
-		if (ret) {
-			ehca_err(pd->device,
-				 "Couldn't assign qp to send_cq ret=%i", ret);
-			goto create_qp_exit7;
-		}
-	}
-
-	/* copy queues, galpa data to user space */
-	if (context && udata) {
-		struct ehca_create_qp_resp resp;
-		memset(&resp, 0, sizeof(resp));
-
-		resp.qp_num = my_qp->real_qp_num;
-		resp.token = my_qp->token;
-		resp.qp_type = my_qp->qp_type;
-		resp.ext_type = my_qp->ext_type;
-		resp.qkey = my_qp->qkey;
-		resp.real_qp_num = my_qp->real_qp_num;
-
-		if (HAS_SQ(my_qp))
-			queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
-		if (HAS_RQ(my_qp))
-			queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
-		resp.fw_handle_ofs = (u32)
-			(my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
-
-		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
-			ehca_err(pd->device, "Copy to udata failed");
-			ret = -EINVAL;
-			goto create_qp_exit8;
-		}
-	}
-
-	return my_qp;
-
-create_qp_exit8:
-	ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-
-create_qp_exit7:
-	kfree(my_qp->mod_qp_parm);
-
-create_qp_exit6:
-	if (HAS_RQ(my_qp) && !is_user)
-		vfree(my_qp->rq_map.map);
-
-create_qp_exit5:
-	if (HAS_RQ(my_qp))
-		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-
-create_qp_exit4:
-	if (HAS_SQ(my_qp) && !is_user)
-		vfree(my_qp->sq_map.map);
-
-create_qp_exit3:
-	if (HAS_SQ(my_qp))
-		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-
-create_qp_exit2:
-	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-
-create_qp_exit1:
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-	idr_remove(&ehca_qp_idr, my_qp->token);
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-create_qp_exit0:
-	kmem_cache_free(qp_cache, my_qp);
-	atomic_dec(&shca->num_qps);
-	return ERR_PTR(ret);
-}
-
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *qp_init_attr,
-			     struct ib_udata *udata)
-{
-	struct ehca_qp *ret;
-
-	ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
-	return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-			       struct ib_uobject *uobject);
-
-struct ib_srq *ehca_create_srq(struct ib_pd *pd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata)
-{
-	struct ib_qp_init_attr qp_init_attr;
-	struct ehca_qp *my_qp;
-	struct ib_srq *ret;
-	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
-					      ib_device);
-	struct hcp_modify_qp_control_block *mqpcb;
-	u64 hret, update_mask;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-		return ERR_PTR(-ENOSYS);
-
-	/* For common attributes, internal_create_qp() takes its info
-	 * out of qp_init_attr, so copy all common attrs there.
-	 */
-	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
-	qp_init_attr.event_handler = srq_init_attr->event_handler;
-	qp_init_attr.qp_context = srq_init_attr->srq_context;
-	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-	qp_init_attr.qp_type = IB_QPT_RC;
-	qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
-	qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
-
-	my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
-	if (IS_ERR(my_qp))
-		return (struct ib_srq *)my_qp;
-
-	/* copy back return values */
-	srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
-	srq_init_attr->attr.max_sge = 3;
-
-	/* drive SRQ into RTR state */
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!mqpcb) {
-		ehca_err(pd->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-		ret = ERR_PTR(-ENOMEM);
-		goto create_srq1;
-	}
-
-	mqpcb->qp_state = EHCA_QPS_INIT;
-	mqpcb->prim_phys_port = 1;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not modify SRQ to INIT "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	mqpcb->qp_enable = 1;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not enable SRQ "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	mqpcb->qp_state  = EHCA_QPS_RTR;
-	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				update_mask,
-				mqpcb, my_qp->galpas.kernel);
-	if (hret != H_SUCCESS) {
-		ehca_err(pd->device, "Could not modify SRQ to RTR "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, hret);
-		goto create_srq2;
-	}
-
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return &my_qp->ib_srq;
-
-create_srq2:
-	ret = ERR_PTR(ehca2ib_return_code(hret));
-	ehca_free_fw_ctrlblock(mqpcb);
-
-create_srq1:
-	internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
-
-	return ret;
-}
-
-/*
- * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
- * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
- * returns total number of bad wqes in bad_wqe_cnt
- */
-static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
-			   int *bad_wqe_cnt)
-{
-	u64 h_ret;
-	struct ipz_queue *squeue;
-	void *bad_send_wqe_p, *bad_send_wqe_v;
-	u64 q_ofs;
-	struct ehca_wqe *wqe;
-	int qp_num = my_qp->ib_qp.qp_num;
-
-	/* get send wqe pointer */
-	h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-					   my_qp->ipz_qp_handle, &my_qp->pf,
-					   &bad_send_wqe_p, NULL, 2);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-			 " ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, qp_num, h_ret);
-		return ehca2ib_return_code(h_ret);
-	}
-	bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
-	ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
-		 qp_num, bad_send_wqe_p);
-	/* convert wqe pointer to vadr */
-	bad_send_wqe_v = __va((u64)bad_send_wqe_p);
-	if (ehca_debug_level >= 2)
-		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
-	squeue = &my_qp->ipz_squeue;
-	if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
-		ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
-			 " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
-		return -EFAULT;
-	}
-
-	/* loop sets wqe's purge bit */
-	wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-	*bad_wqe_cnt = 0;
-	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
-		if (ehca_debug_level >= 2)
-			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
-		wqe->nr_of_data_seg = 0; /* suppress data access */
-		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
-		q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
-		wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
-		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
-	}
-	/*
-	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
-	 *  i.e. nr of wqes with flush error status is one less
-	 */
-	ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
-		 qp_num, (*bad_wqe_cnt)-1);
-	wqe->wqef = 0;
-
-	return 0;
-}
-
-static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
-			  struct ehca_queue_map *qmap)
-{
-	void *wqe_v;
-	u64 q_ofs;
-	u32 wqe_idx;
-	unsigned int tail_idx;
-
-	/* convert real to abs address */
-	wqe_p = wqe_p & (~(1UL << 63));
-
-	wqe_v = __va(wqe_p);
-
-	if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
-		ehca_gen_err("Invalid offset for calculating left cqes "
-				"wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
-		return -EFAULT;
-	}
-
-	tail_idx = next_index(qmap->tail, qmap->entries);
-	wqe_idx = q_ofs / ipz_queue->qe_size;
-
-	/* check all processed wqes, whether a cqe is requested or not */
-	while (tail_idx != wqe_idx) {
-		if (qmap->map[tail_idx].cqe_req)
-			qmap->left_to_poll++;
-		tail_idx = next_index(tail_idx, qmap->entries);
-	}
-	/* save index in queue, where we have to start flushing */
-	qmap->next_wqe_idx = wqe_idx;
-	return 0;
-}
-
-static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
-{
-	u64 h_ret;
-	void *send_wqe_p, *recv_wqe_p;
-	int ret;
-	unsigned long flags;
-	int qp_num = my_qp->ib_qp.qp_num;
-
-	/* this hcall is not supported on base QPs */
-	if (my_qp->ext_type != EQPT_SRQBASE) {
-		/* get send and receive wqe pointer */
-		h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle, &my_qp->pf,
-				&send_wqe_p, &recv_wqe_p, 4);
-		if (h_ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device, "disable_and_get_wqe() "
-				 "failed ehca_qp=%p qp_num=%x h_ret=%lli",
-				 my_qp, qp_num, h_ret);
-			return ehca2ib_return_code(h_ret);
-		}
-
-		/*
-		 * acquire lock to ensure that nobody is polling the cq which
-		 * could mean that the qmap->tail pointer is in an
-		 * inconsistent state.
-		 */
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
-				&my_qp->sq_map);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-		if (ret)
-			return ret;
-
-
-		spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-		ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
-				&my_qp->rq_map);
-		spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-		if (ret)
-			return ret;
-	} else {
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		my_qp->sq_map.left_to_poll = 0;
-		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-							my_qp->sq_map.entries);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-		spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-		my_qp->rq_map.left_to_poll = 0;
-		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-							my_qp->rq_map.entries);
-		spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
-	}
-
-	/* this assures flush cqes being generated only for pending wqes */
-	if ((my_qp->sq_map.left_to_poll == 0) &&
-				(my_qp->rq_map.left_to_poll == 0)) {
-		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
-		ehca_add_to_err_list(my_qp, 1);
-		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
-
-		if (HAS_RQ(my_qp)) {
-			spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
-			ehca_add_to_err_list(my_qp, 0);
-			spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
-					flags);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * internal_modify_qp with circumvention to handle aqp0 properly
- * smi_reset2init indicates if this is an internal reset-to-init-call for
- * smi. This flag must always be zero if called from ehca_modify_qp()!
- * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
- */
-static int internal_modify_qp(struct ib_qp *ibqp,
-			      struct ib_qp_attr *attr,
-			      int attr_mask, int smi_reset2init)
-{
-	enum ib_qp_state qp_cur_state, qp_new_state;
-	int cnt, qp_attr_idx, ret = 0;
-	enum ib_qp_statetrans statetrans;
-	struct hcp_modify_qp_control_block *mqpcb;
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca =
-		container_of(ibqp->pd->device, struct ehca_shca, ib_device);
-	u64 update_mask;
-	u64 h_ret;
-	int bad_wqe_cnt = 0;
-	int is_user = 0;
-	int squeue_locked = 0;
-	unsigned long flags = 0;
-
-	/* do query_qp to obtain current attr values */
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
-	if (!mqpcb) {
-		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				mqpcb, my_qp->galpas.kernel);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, ibqp->qp_num, h_ret);
-		ret = ehca2ib_return_code(h_ret);
-		goto modify_qp_exit1;
-	}
-	if (ibqp->uobject)
-		is_user = 1;
-
-	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
-
-	if (qp_cur_state == -EINVAL) {	/* invalid qp state */
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 mqpcb->qp_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-	/*
-	 * circumvention to set aqp0 initial state to init
-	 * as expected by IB spec
-	 */
-	if (smi_reset2init == 0 &&
-	    ibqp->qp_type == IB_QPT_SMI &&
-	    qp_cur_state == IB_QPS_RESET &&
-	    (attr_mask & IB_QP_STATE) &&
-	    attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
-		struct ib_qp_attr smiqp_attr = {
-			.qp_state = IB_QPS_INIT,
-			.port_num = my_qp->init_attr.port_num,
-			.pkey_index = 0,
-			.qkey = 0
-		};
-		int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
-			IB_QP_PKEY_INDEX | IB_QP_QKEY;
-		int smirc = internal_modify_qp(
-			ibqp, &smiqp_attr, smiqp_attr_mask, 1);
-		if (smirc) {
-			ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-				 "ehca_modify_qp() rc=%i", smirc);
-			ret = H_PARAMETER;
-			goto modify_qp_exit1;
-		}
-		qp_cur_state = IB_QPS_INIT;
-		ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
-	}
-	/* is transmitted current state  equal to "real" current state */
-	if ((attr_mask & IB_QP_CUR_STATE) &&
-	    qp_cur_state != attr->cur_qp_state) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device,
-			 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
-			 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
-			 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
-		 "new qp_state=%x attribute_mask=%x",
-		 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
-
-	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
-	if (!smi_reset2init &&
-	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device,
-			 "Invalid qp transition new_state=%x cur_state=%x "
-			 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
-			 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
-		goto modify_qp_exit1;
-	}
-
-	mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
-	if (mqpcb->qp_state)
-		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
-	else {
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "Invalid new qp state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 qp_new_state, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	/* retrieve state transition struct to get req and opt attrs */
-	statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
-	if (statetrans < 0) {
-		ret = -EINVAL;
-		ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
-			 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
-			 "qp_num=%x", qp_cur_state, qp_new_state,
-			 statetrans, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
-
-	if (qp_attr_idx < 0) {
-		ret = qp_attr_idx;
-		ehca_err(ibqp->device,
-			 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
-			 ibqp->qp_type, my_qp, ibqp->qp_num);
-		goto modify_qp_exit1;
-	}
-
-	ehca_dbg(ibqp->device,
-		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
-		 my_qp, ibqp->qp_num, statetrans);
-
-	/* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
-	 * in non-LL UD QPs.
-	 */
-	if ((my_qp->qp_type == IB_QPT_UD) &&
-	    (my_qp->ext_type != EQPT_LLQP) &&
-	    (statetrans == IB_QPST_INIT2RTR) &&
-	    (shca->hw_level >= 0x22)) {
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-		mqpcb->send_grh_flag = 1;
-	}
-
-	/* sqe -> rts: set purge bit of bad wqe before actual trans */
-	if ((my_qp->qp_type == IB_QPT_UD ||
-	     my_qp->qp_type == IB_QPT_GSI ||
-	     my_qp->qp_type == IB_QPT_SMI) &&
-	    statetrans == IB_QPST_SQE2RTS) {
-		/* mark next free wqe if kernel */
-		if (!ibqp->uobject) {
-			struct ehca_wqe *wqe;
-			/* lock send queue */
-			spin_lock_irqsave(&my_qp->spinlock_s, flags);
-			squeue_locked = 1;
-			/* mark next free wqe */
-			wqe = (struct ehca_wqe *)
-				ipz_qeit_get(&my_qp->ipz_squeue);
-			wqe->optype = wqe->wqef = 0xff;
-			ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
-				 ibqp->qp_num, wqe);
-		}
-		ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
-		if (ret) {
-			ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-				 "ehca_qp=%p qp_num=%x ret=%i",
-				 my_qp, ibqp->qp_num, ret);
-			goto modify_qp_exit2;
-		}
-	}
-
-	/*
-	 * enable RDMA_Atomic_Control if reset->init und reliable con
-	 * this is necessary since gen2 does not provide that flag,
-	 * but pHyp requires it
-	 */
-	if (statetrans == IB_QPST_RESET2INIT &&
-	    (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
-		mqpcb->rdma_atomic_ctrl = 3;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
-	}
-	/* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
-	if (statetrans == IB_QPST_INIT2RTR &&
-	    (ibqp->qp_type == IB_QPT_UC) &&
-	    !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
-		mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX) {
-		if (attr->pkey_index >= 16) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid pkey_index=%x. "
-				 "ehca_qp=%p qp_num=%x max_pkey_index=f",
-				 attr->pkey_index, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->prim_p_key_idx = attr->pkey_index;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
-	}
-	if (attr_mask & IB_QP_PORT) {
-		struct ehca_sport *sport;
-		struct ehca_qp *aqp1;
-		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid port=%x. "
-				 "ehca_qp=%p qp_num=%x num_ports=%x",
-				 attr->port_num, my_qp, ibqp->qp_num,
-				 shca->num_ports);
-			goto modify_qp_exit2;
-		}
-		sport = &shca->sport[attr->port_num - 1];
-		if (!sport->ibqp_sqp[IB_QPT_GSI]) {
-			/* should not occur */
-			ret = -EFAULT;
-			ehca_err(ibqp->device, "AQP1 was not created for "
-				 "port=%x", attr->port_num);
-			goto modify_qp_exit2;
-		}
-		aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
-				    struct ehca_qp, ib_qp);
-		if (ibqp->qp_type != IB_QPT_GSI &&
-		    ibqp->qp_type != IB_QPT_SMI &&
-		    aqp1->mod_qp_parm) {
-			/*
-			 * firmware will reject this modify_qp() because
-			 * port is not activated/initialized fully
-			 */
-			ret = -EFAULT;
-			ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
-				  "either port is being activated (try again) "
-				  "or cabling issue", attr->port_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->prim_phys_port = attr->port_num;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
-	}
-	if (attr_mask & IB_QP_QKEY) {
-		mqpcb->qkey = attr->qkey;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
-	}
-	if (attr_mask & IB_QP_AV) {
-		mqpcb->dlid = attr->ah_attr.dlid;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
-		mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
-		mqpcb->service_level = attr->ah_attr.sl;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
-
-		if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
-				  attr->ah_attr.static_rate,
-				  &mqpcb->max_static_rate)) {
-			ret = -EINVAL;
-			goto modify_qp_exit2;
-		}
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
-
-		/*
-		 * Always supply the GRH flag, even if it's zero, to give the
-		 * hypervisor a clear "yes" or "no" instead of a "perhaps"
-		 */
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
-
-		/*
-		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
-		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-		 */
-		if (attr->ah_attr.ah_flags == IB_AH_GRH) {
-			mqpcb->send_grh_flag = 1;
-
-			mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
-
-			for (cnt = 0; cnt < 16; cnt++)
-				mqpcb->dest_gid.byte[cnt] =
-					attr->ah_attr.grh.dgid.raw[cnt];
-
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
-			mqpcb->flow_label = attr->ah_attr.grh.flow_label;
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
-			mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
-			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
-			mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		/* store ld(MTU) */
-		my_qp->mtu_shift = attr->path_mtu + 7;
-		mqpcb->path_mtu = attr->path_mtu;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
-	}
-	if (attr_mask & IB_QP_TIMEOUT) {
-		mqpcb->timeout = attr->timeout;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
-	}
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		mqpcb->retry_count = attr->retry_cnt;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
-	}
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		mqpcb->rnr_retry_count = attr->rnr_retry;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
-	}
-	if (attr_mask & IB_QP_RQ_PSN) {
-		mqpcb->receive_psn = attr->rq_psn;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
-	}
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-		mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
-			attr->max_dest_rd_atomic : 2;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
-	}
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-		mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
-			attr->max_rd_atomic : 2;
-		update_mask |=
-			EHCA_BMASK_SET
-			(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
-	}
-	if (attr_mask & IB_QP_ALT_PATH) {
-		if (attr->alt_port_num < 1
-		    || attr->alt_port_num > shca->num_ports) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid alt_port=%x. "
-				 "ehca_qp=%p qp_num=%x num_ports=%x",
-				 attr->alt_port_num, my_qp, ibqp->qp_num,
-				 shca->num_ports);
-			goto modify_qp_exit2;
-		}
-		mqpcb->alt_phys_port = attr->alt_port_num;
-
-		if (attr->alt_pkey_index >= 16) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
-				 "ehca_qp=%p qp_num=%x max_pkey_index=f",
-				 attr->pkey_index, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-		mqpcb->alt_p_key_idx = attr->alt_pkey_index;
-
-		mqpcb->timeout_al = attr->alt_timeout;
-		mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-		mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-		mqpcb->service_level_al = attr->alt_ah_attr.sl;
-
-		if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
-				  attr->alt_ah_attr.static_rate,
-				  &mqpcb->max_static_rate_al)) {
-			ret = -EINVAL;
-			goto modify_qp_exit2;
-		}
-
-		/* OpenIB doesn't support alternate retry counts - copy them */
-		mqpcb->retry_count_al = mqpcb->retry_count;
-		mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
-
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
-
-		/*
-		 * Always supply the GRH flag, even if it's zero, to give the
-		 * hypervisor a clear "yes" or "no" instead of a "perhaps"
-		 */
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-
-		/*
-		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
-		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
-		 */
-		if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-			mqpcb->send_grh_flag_al = 1;
-
-			for (cnt = 0; cnt < 16; cnt++)
-				mqpcb->dest_gid_al.byte[cnt] =
-					attr->alt_ah_attr.grh.dgid.raw[cnt];
-			mqpcb->source_gid_idx_al =
-				attr->alt_ah_attr.grh.sgid_index;
-			mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-			mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-			mqpcb->traffic_class_al =
-				attr->alt_ah_attr.grh.traffic_class;
-
-			update_mask |=
-				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
-				| EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
-				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
-		}
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
-		mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
-	}
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		mqpcb->send_psn = attr->sq_psn;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
-	}
-
-	if (attr_mask & IB_QP_DEST_QPN) {
-		mqpcb->dest_qp_nr = attr->dest_qp_num;
-		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state != IB_MIG_REARM
-		    && attr->path_mig_state != IB_MIG_MIGRATED) {
-			ret = -EINVAL;
-			ehca_err(ibqp->device, "Invalid mig_state=%x",
-				 attr->path_mig_state);
-			goto modify_qp_exit2;
-		}
-		mqpcb->path_migration_state = attr->path_mig_state + 1;
-		if (attr->path_mig_state == IB_MIG_REARM)
-			my_qp->mig_armed = 1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
-	}
-
-	if (attr_mask & IB_QP_CAP) {
-		mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
-		mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
-		/* no support for max_send/recv_sge yet */
-	}
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
-
-	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-				 my_qp->ipz_qp_handle,
-				 &my_qp->pf,
-				 update_mask,
-				 mqpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
-		goto modify_qp_exit2;
-	}
-
-	if ((my_qp->qp_type == IB_QPT_UD ||
-	     my_qp->qp_type == IB_QPT_GSI ||
-	     my_qp->qp_type == IB_QPT_SMI) &&
-	    statetrans == IB_QPST_SQE2RTS) {
-		/* doorbell to reprocessing wqes */
-		iosync(); /* serialize GAL register access */
-		hipz_update_sqa(my_qp, bad_wqe_cnt-1);
-		ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
-	}
-
-	if (statetrans == IB_QPST_RESET2INIT ||
-	    statetrans == IB_QPST_INIT2INIT) {
-		mqpcb->qp_enable = 1;
-		mqpcb->qp_state = EHCA_QPS_INIT;
-		update_mask = 0;
-		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
-
-		h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
-					 my_qp->ipz_qp_handle,
-					 &my_qp->pf,
-					 update_mask,
-					 mqpcb,
-					 my_qp->galpas.kernel);
-
-		if (h_ret != H_SUCCESS) {
-			ret = ehca2ib_return_code(h_ret);
-			ehca_err(ibqp->device, "ENABLE in context of "
-				 "RESET_2_INIT failed! Maybe you didn't get "
-				 "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
-				 h_ret, my_qp, ibqp->qp_num);
-			goto modify_qp_exit2;
-		}
-	}
-	if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
-	    && !is_user) {
-		ret = check_for_left_cqes(my_qp, shca);
-		if (ret)
-			goto modify_qp_exit2;
-	}
-
-	if (statetrans == IB_QPST_ANY2RESET) {
-		ipz_qeit_reset(&my_qp->ipz_rqueue);
-		ipz_qeit_reset(&my_qp->ipz_squeue);
-
-		if (qp_cur_state == IB_QPS_ERR && !is_user) {
-			del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-			if (HAS_RQ(my_qp))
-				del_from_err_list(my_qp->recv_cq,
-						  &my_qp->rq_err_node);
-		}
-		if (!is_user)
-			reset_queue_map(&my_qp->sq_map);
-
-		if (HAS_RQ(my_qp) && !is_user)
-			reset_queue_map(&my_qp->rq_map);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		my_qp->qkey = attr->qkey;
-
-modify_qp_exit2:
-	if (squeue_locked) { /* this means: sqe -> rts */
-		spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-		my_qp->sqerr_purgeflag = 1;
-	}
-
-modify_qp_exit1:
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return ret;
-}
-
-int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
-		   struct ib_udata *udata)
-{
-	int ret = 0;
-
-	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
-					      ib_device);
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-
-	/* The if-block below caches qp_attr to be modified for GSI and SMI
-	 * qps during the initialization by ib_mad. When the respective port
-	 * is activated, ie we got an event PORT_ACTIVE, we'll replay the
-	 * cached modify calls sequence, see ehca_recover_sqs() below.
-	 * Why that is required:
-	 * 1) If one port is connected, older code requires that port one
-	 *    to be connected and module option nr_ports=1 to be given by
-	 *    user, which is very inconvenient for end user.
-	 * 2) Firmware accepts modify_qp() only if respective port has become
-	 *    active. Older code had a wait loop of 30sec create_qp()/
-	 *    define_aqp1(), which is not appropriate in practice. This
-	 *    code now removes that wait loop, see define_aqp1(), and always
-	 *    reports all ports to ib_mad resp. users. Only activated ports
-	 *    will then usable for the users.
-	 */
-	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
-		int port = my_qp->init_attr.port_num;
-		struct ehca_sport *sport = &shca->sport[port - 1];
-		unsigned long flags;
-		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-		/* cache qp_attr only during init */
-		if (my_qp->mod_qp_parm) {
-			struct ehca_mod_qp_parm *p;
-			if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
-				ehca_err(&shca->ib_device,
-					 "mod_qp_parm overflow state=%x port=%x"
-					 " type=%x", attr->qp_state,
-					 my_qp->init_attr.port_num,
-					 ibqp->qp_type);
-				spin_unlock_irqrestore(&sport->mod_sqp_lock,
-						       flags);
-				return -EINVAL;
-			}
-			p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
-			p->mask = attr_mask;
-			p->attr = *attr;
-			my_qp->mod_qp_parm_idx++;
-			ehca_dbg(&shca->ib_device,
-				 "Saved qp_attr for state=%x port=%x type=%x",
-				 attr->qp_state, my_qp->init_attr.port_num,
-				 ibqp->qp_type);
-			spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-			goto out;
-		}
-		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-	}
-
-	ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
-
-out:
-	if ((ret == 0) && (attr_mask & IB_QP_STATE))
-		my_qp->state = attr->qp_state;
-
-	return ret;
-}
-
-void ehca_recover_sqp(struct ib_qp *sqp)
-{
-	struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
-	int port = my_sqp->init_attr.port_num;
-	struct ib_qp_attr attr;
-	struct ehca_mod_qp_parm *qp_parm;
-	int i, qp_parm_idx, ret;
-	unsigned long flags, wr_cnt;
-
-	if (!my_sqp->mod_qp_parm)
-		return;
-	ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
-
-	qp_parm = my_sqp->mod_qp_parm;
-	qp_parm_idx = my_sqp->mod_qp_parm_idx;
-	for (i = 0; i < qp_parm_idx; i++) {
-		attr = qp_parm[i].attr;
-		ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
-		if (ret) {
-			ehca_err(sqp->device, "Could not modify SQP port=%x "
-				 "qp_num=%x ret=%x", port, sqp->qp_num, ret);
-			goto free_qp_parm;
-		}
-		ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
-			 port, sqp->qp_num, attr.qp_state);
-	}
-
-	/* re-trigger posted recv wrs */
-	wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
-		my_sqp->ipz_rqueue.qe_size;
-	if (wr_cnt) {
-		spin_lock_irqsave(&my_sqp->spinlock_r, flags);
-		hipz_update_rqa(my_sqp, wr_cnt);
-		spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
-		ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
-			 port, sqp->qp_num, wr_cnt);
-	}
-
-free_qp_parm:
-	kfree(qp_parm);
-	/* this prevents subsequent calls to modify_qp() to cache qp_attr */
-	my_sqp->mod_qp_parm = NULL;
-}
-
-int ehca_query_qp(struct ib_qp *qp,
-		  struct ib_qp_attr *qp_attr,
-		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	struct hcp_modify_qp_control_block *qpcb;
-	int cnt, ret = 0;
-	u64 h_ret;
-
-	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
-		ehca_err(qp->device, "Invalid attribute mask "
-			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
-			 my_qp, qp->qp_num, qp_attr_mask);
-		return -EINVAL;
-	}
-
-	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!qpcb) {
-		ehca_err(qp->device, "Out of memory for qpcb "
-			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(adapter_handle,
-				my_qp->ipz_qp_handle,
-				&my_qp->pf,
-				qpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(qp->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, qp->qp_num, h_ret);
-		goto query_qp_exit1;
-	}
-
-	qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
-	qp_attr->qp_state = qp_attr->cur_qp_state;
-
-	if (qp_attr->cur_qp_state == -EINVAL) {
-		ret = -EINVAL;
-		ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
-			 "ehca_qp=%p qp_num=%x",
-			 qpcb->qp_state, my_qp, qp->qp_num);
-		goto query_qp_exit1;
-	}
-
-	if (qp_attr->qp_state == IB_QPS_SQD)
-		qp_attr->sq_draining = 1;
-
-	qp_attr->qkey = qpcb->qkey;
-	qp_attr->path_mtu = qpcb->path_mtu;
-	qp_attr->path_mig_state = qpcb->path_migration_state - 1;
-	qp_attr->rq_psn = qpcb->receive_psn;
-	qp_attr->sq_psn = qpcb->send_psn;
-	qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
-	qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
-	qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
-	/* UD_AV CIRCUMVENTION */
-	if (my_qp->qp_type == IB_QPT_UD) {
-		qp_attr->cap.max_send_sge =
-			qpcb->actual_nr_sges_in_sq_wqe - 2;
-		qp_attr->cap.max_recv_sge =
-			qpcb->actual_nr_sges_in_rq_wqe - 2;
-	} else {
-		qp_attr->cap.max_send_sge =
-			qpcb->actual_nr_sges_in_sq_wqe;
-		qp_attr->cap.max_recv_sge =
-			qpcb->actual_nr_sges_in_rq_wqe;
-	}
-
-	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
-	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
-
-	qp_attr->pkey_index = qpcb->prim_p_key_idx;
-	qp_attr->port_num = qpcb->prim_phys_port;
-	qp_attr->timeout = qpcb->timeout;
-	qp_attr->retry_cnt = qpcb->retry_count;
-	qp_attr->rnr_retry = qpcb->rnr_retry_count;
-
-	qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
-	qp_attr->alt_port_num = qpcb->alt_phys_port;
-	qp_attr->alt_timeout = qpcb->timeout_al;
-
-	qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
-	qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
-
-	/* primary av */
-	qp_attr->ah_attr.sl = qpcb->service_level;
-
-	if (qpcb->send_grh_flag) {
-		qp_attr->ah_attr.ah_flags = IB_AH_GRH;
-	}
-
-	qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
-	qp_attr->ah_attr.dlid = qpcb->dlid;
-	qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
-	qp_attr->ah_attr.port_num = qp_attr->port_num;
-
-	/* primary GRH */
-	qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
-	qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
-	qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
-	qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
-
-	for (cnt = 0; cnt < 16; cnt++)
-		qp_attr->ah_attr.grh.dgid.raw[cnt] =
-			qpcb->dest_gid.byte[cnt];
-
-	/* alternate AV */
-	qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
-	if (qpcb->send_grh_flag_al) {
-		qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
-	}
-
-	qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
-	qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
-	qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
-
-	/* alternate GRH */
-	qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
-	qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
-	qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
-	qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
-
-	for (cnt = 0; cnt < 16; cnt++)
-		qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
-			qpcb->dest_gid_al.byte[cnt];
-
-	/* return init attributes given in ehca_create_qp */
-	if (qp_init_attr)
-		*qp_init_attr = my_qp->init_attr;
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
-
-query_qp_exit1:
-	ehca_free_fw_ctrlblock(qpcb);
-
-	return ret;
-}
-
-int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
-{
-	struct ehca_qp *my_qp =
-		container_of(ibsrq, struct ehca_qp, ib_srq);
-	struct ehca_shca *shca =
-		container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
-	struct hcp_modify_qp_control_block *mqpcb;
-	u64 update_mask;
-	u64 h_ret;
-	int ret = 0;
-
-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!mqpcb) {
-		ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
-			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
-		return -ENOMEM;
-	}
-
-	update_mask = 0;
-	if (attr_mask & IB_SRQ_LIMIT) {
-		attr_mask &= ~IB_SRQ_LIMIT;
-		update_mask |=
-			EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
-			| EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
-		mqpcb->curr_srq_limit = attr->srq_limit;
-		mqpcb->qp_aff_asyn_ev_log_reg =
-			EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
-	}
-
-	/* by now, all bits in attr_mask should have been cleared */
-	if (attr_mask) {
-		ehca_err(ibsrq->device, "invalid attribute mask bits set  "
-			 "attr_mask=%x", attr_mask);
-		ret = -EINVAL;
-		goto modify_srq_exit0;
-	}
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
-				 NULL, update_mask, mqpcb,
-				 my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x",
-			 h_ret, my_qp, my_qp->real_qp_num);
-	}
-
-modify_srq_exit0:
-	ehca_free_fw_ctrlblock(mqpcb);
-
-	return ret;
-}
-
-int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
-{
-	struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-	struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
-					      ib_device);
-	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	struct hcp_modify_qp_control_block *qpcb;
-	int ret = 0;
-	u64 h_ret;
-
-	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
-	if (!qpcb) {
-		ehca_err(srq->device, "Out of memory for qpcb "
-			 "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
-		return -ENOMEM;
-	}
-
-	h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
-				NULL, qpcb, my_qp->galpas.kernel);
-
-	if (h_ret != H_SUCCESS) {
-		ret = ehca2ib_return_code(h_ret);
-		ehca_err(srq->device, "hipz_h_query_qp() failed "
-			 "ehca_qp=%p qp_num=%x h_ret=%lli",
-			 my_qp, my_qp->real_qp_num, h_ret);
-		goto query_srq_exit1;
-	}
-
-	srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
-	srq_attr->max_sge = 3;
-	srq_attr->srq_limit = qpcb->curr_srq_limit;
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
-
-query_srq_exit1:
-	ehca_free_fw_ctrlblock(qpcb);
-
-	return ret;
-}
-
-static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
-			       struct ib_uobject *uobject)
-{
-	struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
-	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-					     ib_pd);
-	struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-	u32 qp_num = my_qp->real_qp_num;
-	int ret;
-	u64 h_ret;
-	u8 port_num;
-	int is_user = 0;
-	enum ib_qp_type	qp_type;
-	unsigned long flags;
-
-	if (uobject) {
-		is_user = 1;
-		if (my_qp->mm_count_galpa ||
-		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-			ehca_err(dev, "Resources still referenced in "
-				 "user space qp_num=%x", qp_num);
-			return -EINVAL;
-		}
-	}
-
-	if (my_qp->send_cq) {
-		ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
-		if (ret) {
-			ehca_err(dev, "Couldn't unassign qp from "
-				 "send_cq ret=%i qp_num=%x cq_num=%x", ret,
-				 qp_num, my_qp->send_cq->cq_number);
-			return ret;
-		}
-	}
-
-	write_lock_irqsave(&ehca_qp_idr_lock, flags);
-	idr_remove(&ehca_qp_idr, my_qp->token);
-	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
-	/*
-	 * SRQs will never get into an error list and do not have a recv_cq,
-	 * so we need to skip them here.
-	 */
-	if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
-		del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
-
-	if (HAS_SQ(my_qp) && !is_user)
-		del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
-
-	/* now wait until all pending events have completed */
-	wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
-
-	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
-	if (h_ret != H_SUCCESS) {
-		ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
-			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
-		return ehca2ib_return_code(h_ret);
-	}
-
-	port_num = my_qp->init_attr.port_num;
-	qp_type  = my_qp->init_attr.qp_type;
-
-	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
-		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
-		kfree(my_qp->mod_qp_parm);
-		my_qp->mod_qp_parm = NULL;
-		shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
-		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-	}
-
-	/* no support for IB_QPT_SMI yet */
-	if (qp_type == IB_QPT_GSI) {
-		struct ib_event event;
-		ehca_info(dev, "device %s: port %x is inactive.",
-				shca->ib_device.name, port_num);
-		event.device = &shca->ib_device;
-		event.event = IB_EVENT_PORT_ERR;
-		event.element.port_num = port_num;
-		shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
-		ib_dispatch_event(&event);
-	}
-
-	if (HAS_RQ(my_qp)) {
-		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
-		if (!is_user)
-			vfree(my_qp->rq_map.map);
-	}
-	if (HAS_SQ(my_qp)) {
-		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
-		if (!is_user)
-			vfree(my_qp->sq_map.map);
-	}
-	kmem_cache_free(qp_cache, my_qp);
-	atomic_dec(&shca->num_qps);
-	return 0;
-}
-
-int ehca_destroy_qp(struct ib_qp *qp)
-{
-	return internal_destroy_qp(qp->device,
-				   container_of(qp, struct ehca_qp, ib_qp),
-				   qp->uobject);
-}
-
-int ehca_destroy_srq(struct ib_srq *srq)
-{
-	return internal_destroy_qp(srq->device,
-				   container_of(srq, struct ehca_qp, ib_srq),
-				   srq->uobject);
-}
-
-int ehca_init_qp_cache(void)
-{
-	qp_cache = kmem_cache_create("ehca_cache_qp",
-				     sizeof(struct ehca_qp), 0,
-				     SLAB_HWCACHE_ALIGN,
-				     NULL);
-	if (!qp_cache)
-		return -ENOMEM;
-	return 0;
-}
-
-void ehca_cleanup_qp_cache(void)
-{
-	kmem_cache_destroy(qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
deleted file mode 100644
index 11813b880e16..000000000000
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ /dev/null
@@ -1,953 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  post_send/recv, poll_cq, req_notify
- *
- *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-#include "hipz_fns.h"
-
-/* in RC traffic, insert an empty RDMA READ every this many packets */
-#define ACK_CIRC_THRESHOLD 2000000
-
-static u64 replace_wr_id(u64 wr_id, u16 idx)
-{
-	u64 ret;
-
-	ret = wr_id & ~QMAP_IDX_MASK;
-	ret |= idx & QMAP_IDX_MASK;
-
-	return ret;
-}
-
-static u16 get_app_wr_id(u64 wr_id)
-{
-	return wr_id & QMAP_IDX_MASK;
-}
-
-static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
-				  struct ehca_wqe *wqe_p,
-				  struct ib_recv_wr *recv_wr,
-				  u32 rq_map_idx)
-{
-	u8 cnt_ds;
-	if (unlikely((recv_wr->num_sge < 0) ||
-		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
-		ehca_gen_err("Invalid number of WQE SGE. "
-			 "num_sqe=%x max_nr_of_sg=%x",
-			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
-		return -EINVAL; /* invalid SG list length */
-	}
-
-	/* clear wqe header until sglist */
-	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-	wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
-	wqe_p->nr_of_data_seg = recv_wr->num_sge;
-
-	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
-		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
-			recv_wr->sg_list[cnt_ds].addr;
-		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
-			recv_wr->sg_list[cnt_ds].lkey;
-		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
-			recv_wr->sg_list[cnt_ds].length;
-	}
-
-	if (ehca_debug_level >= 3) {
-		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
-			     ipz_rqueue);
-		ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
-	}
-
-	return 0;
-}
-
-#if defined(DEBUG_GSI_SEND_WR)
-
-/* need ib_mad struct */
-#include <rdma/ib_mad.h>
-
-static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
-{
-	int idx;
-	int j;
-	while (ud_wr) {
-		struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
-		struct ib_sge *sge = ud_wr->wr.sg_list;
-		ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
-			     "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
-			     ud_wr->wr.num_sge, ud_wr->wr.send_flags,
-			     ud_wr->.wr.opcode);
-		if (mad_hdr) {
-			ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
-				     "mgmt_class=%x class_version=%x method=%x "
-				     "status=%x class_specific=%x tid=%lx "
-				     "attr_id=%x resv=%x attr_mod=%x",
-				     idx, mad_hdr->base_version,
-				     mad_hdr->mgmt_class,
-				     mad_hdr->class_version, mad_hdr->method,
-				     mad_hdr->status, mad_hdr->class_specific,
-				     mad_hdr->tid, mad_hdr->attr_id,
-				     mad_hdr->resv,
-				     mad_hdr->attr_mod);
-		}
-		for (j = 0; j < ud_wr->wr.num_sge; j++) {
-			u8 *data = __va(sge->addr);
-			ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
-				     "lkey=%x",
-				     idx, j, data, sge->length, sge->lkey);
-			/* assume length is n*16 */
-			ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
-				 idx, j);
-			sge++;
-		} /* eof for j */
-		idx++;
-		ud_wr = ud_wr(ud_wr->wr.next);
-	} /* eof while ud_wr */
-}
-
-#endif /* DEBUG_GSI_SEND_WR */
-
-static inline int ehca_write_swqe(struct ehca_qp *qp,
-				  struct ehca_wqe *wqe_p,
-				  struct ib_send_wr *send_wr,
-				  u32 sq_map_idx,
-				  int hidden)
-{
-	u32 idx;
-	u64 dma_length;
-	struct ehca_av *my_av;
-	u32 remote_qkey;
-	struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
-
-	if (unlikely((send_wr->num_sge < 0) ||
-		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
-		ehca_gen_err("Invalid number of WQE SGE. "
-			 "num_sqe=%x max_nr_of_sg=%x",
-			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
-		return -EINVAL; /* invalid SG list length */
-	}
-
-	/* clear wqe header until sglist */
-	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
-
-	wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
-
-	qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
-	qmap_entry->reported = 0;
-	qmap_entry->cqe_req = 0;
-
-	switch (send_wr->opcode) {
-	case IB_WR_SEND:
-	case IB_WR_SEND_WITH_IMM:
-		wqe_p->optype = WQE_OPTYPE_SEND;
-		break;
-	case IB_WR_RDMA_WRITE:
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
-		break;
-	case IB_WR_RDMA_READ:
-		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
-		break;
-	default:
-		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
-		return -EINVAL; /* invalid opcode */
-	}
-
-	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
-
-	wqe_p->wr_flag = 0;
-
-	if ((send_wr->send_flags & IB_SEND_SIGNALED ||
-	    qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-	    && !hidden) {
-		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
-		qmap_entry->cqe_req = 1;
-	}
-
-	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
-	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
-		/* this might not work as long as HW does not support it */
-		wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
-		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
-	}
-
-	wqe_p->nr_of_data_seg = send_wr->num_sge;
-
-	switch (qp->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		/* no break is intential here */
-	case IB_QPT_UD:
-		/* IB 1.2 spec C10-15 compliance */
-		remote_qkey = ud_wr(send_wr)->remote_qkey;
-		if (remote_qkey & 0x80000000)
-			remote_qkey = qp->qkey;
-
-		wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
-		wqe_p->local_ee_context_qkey = remote_qkey;
-		if (unlikely(!ud_wr(send_wr)->ah)) {
-			ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
-			return -EINVAL;
-		}
-		if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
-			ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
-			return -EINVAL;
-		}
-		my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
-		wqe_p->u.ud_av.ud_av = my_av->av;
-
-		/*
-		 * omitted check of IB_SEND_INLINE
-		 * since HW does not support it
-		 */
-		for (idx = 0; idx < send_wr->num_sge; idx++) {
-			wqe_p->u.ud_av.sg_list[idx].vaddr =
-				send_wr->sg_list[idx].addr;
-			wqe_p->u.ud_av.sg_list[idx].lkey =
-				send_wr->sg_list[idx].lkey;
-			wqe_p->u.ud_av.sg_list[idx].length =
-				send_wr->sg_list[idx].length;
-		} /* eof for idx */
-		if (qp->qp_type == IB_QPT_SMI ||
-		    qp->qp_type == IB_QPT_GSI)
-			wqe_p->u.ud_av.ud_av.pmtu = 1;
-		if (qp->qp_type == IB_QPT_GSI) {
-			wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
-#ifdef DEBUG_GSI_SEND_WR
-			trace_ud_wr(ud_wr(send_wr));
-#endif /* DEBUG_GSI_SEND_WR */
-		}
-		break;
-
-	case IB_QPT_UC:
-		if (send_wr->send_flags & IB_SEND_FENCE)
-			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
-		/* no break is intentional here */
-	case IB_QPT_RC:
-		/* TODO: atomic not implemented */
-		wqe_p->u.nud.remote_virtual_address =
-			rdma_wr(send_wr)->remote_addr;
-		wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
-
-		/*
-		 * omitted checking of IB_SEND_INLINE
-		 * since HW does not support it
-		 */
-		dma_length = 0;
-		for (idx = 0; idx < send_wr->num_sge; idx++) {
-			wqe_p->u.nud.sg_list[idx].vaddr =
-				send_wr->sg_list[idx].addr;
-			wqe_p->u.nud.sg_list[idx].lkey =
-				send_wr->sg_list[idx].lkey;
-			wqe_p->u.nud.sg_list[idx].length =
-				send_wr->sg_list[idx].length;
-			dma_length += send_wr->sg_list[idx].length;
-		} /* eof idx */
-		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
-
-		/* unsolicited ack circumvention */
-		if (send_wr->opcode == IB_WR_RDMA_READ) {
-			/* on RDMA read, switch on and reset counters */
-			qp->message_count = qp->packet_count = 0;
-			qp->unsol_ack_circ = 1;
-		} else
-			/* else estimate #packets */
-			qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
-
-		break;
-
-	default:
-		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
-		return -EINVAL;
-	}
-
-	if (ehca_debug_level >= 3) {
-		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
-		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
-	}
-	return 0;
-}
-
-/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
-static inline void map_ib_wc_status(u32 cqe_status,
-				    enum ib_wc_status *wc_status)
-{
-	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
-		switch (cqe_status & 0x3F) {
-		case 0x01:
-		case 0x21:
-			*wc_status = IB_WC_LOC_LEN_ERR;
-			break;
-		case 0x02:
-		case 0x22:
-			*wc_status = IB_WC_LOC_QP_OP_ERR;
-			break;
-		case 0x03:
-		case 0x23:
-			*wc_status = IB_WC_LOC_EEC_OP_ERR;
-			break;
-		case 0x04:
-		case 0x24:
-			*wc_status = IB_WC_LOC_PROT_ERR;
-			break;
-		case 0x05:
-		case 0x25:
-			*wc_status = IB_WC_WR_FLUSH_ERR;
-			break;
-		case 0x06:
-			*wc_status = IB_WC_MW_BIND_ERR;
-			break;
-		case 0x07: /* remote error - look into bits 20:24 */
-			switch ((cqe_status
-				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
-			case 0x0:
-				/*
-				 * PSN Sequence Error!
-				 * couldn't find a matching status!
-				 */
-				*wc_status = IB_WC_GENERAL_ERR;
-				break;
-			case 0x1:
-				*wc_status = IB_WC_REM_INV_REQ_ERR;
-				break;
-			case 0x2:
-				*wc_status = IB_WC_REM_ACCESS_ERR;
-				break;
-			case 0x3:
-				*wc_status = IB_WC_REM_OP_ERR;
-				break;
-			case 0x4:
-				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
-				break;
-			}
-			break;
-		case 0x08:
-			*wc_status = IB_WC_RETRY_EXC_ERR;
-			break;
-		case 0x09:
-			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
-			break;
-		case 0x0A:
-		case 0x2D:
-			*wc_status = IB_WC_REM_ABORT_ERR;
-			break;
-		case 0x0B:
-		case 0x2E:
-			*wc_status = IB_WC_INV_EECN_ERR;
-			break;
-		case 0x0C:
-		case 0x2F:
-			*wc_status = IB_WC_INV_EEC_STATE_ERR;
-			break;
-		case 0x0D:
-			*wc_status = IB_WC_BAD_RESP_ERR;
-			break;
-		case 0x10:
-			/* WQE purged */
-			*wc_status = IB_WC_WR_FLUSH_ERR;
-			break;
-		default:
-			*wc_status = IB_WC_FATAL_ERR;
-
-		}
-	} else
-		*wc_status = IB_WC_SUCCESS;
-}
-
-static inline int post_one_send(struct ehca_qp *my_qp,
-			 struct ib_send_wr *cur_send_wr,
-			 int hidden)
-{
-	struct ehca_wqe *wqe_p;
-	int ret;
-	u32 sq_map_idx;
-	u64 start_offset = my_qp->ipz_squeue.current_q_offset;
-
-	/* get pointer next to free WQE */
-	wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
-	if (unlikely(!wqe_p)) {
-		/* too many posted work requests: queue overflow */
-		ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
-			 "qp_num=%x", my_qp->ib_qp.qp_num);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the index of the WQE in the send queue. The same index is used
-	 * for writing into the sq_map.
-	 */
-	sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
-
-	/* write a SEND WQE into the QUEUE */
-	ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
-	/*
-	 * if something failed,
-	 * reset the free entry pointer to the start value
-	 */
-	if (unlikely(ret)) {
-		my_qp->ipz_squeue.current_q_offset = start_offset;
-		ehca_err(my_qp->ib_qp.device, "Could not write WQE "
-			 "qp_num=%x", my_qp->ib_qp.qp_num);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int ehca_post_send(struct ib_qp *qp,
-		   struct ib_send_wr *send_wr,
-		   struct ib_send_wr **bad_send_wr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	int wqe_cnt = 0;
-	int ret = 0;
-	unsigned long flags;
-
-	/* Reject WR if QP is in RESET, INIT or RTR state */
-	if (unlikely(my_qp->state < IB_QPS_RTS)) {
-		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-			 my_qp->state, qp->qp_num);
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_s, flags);
-
-	/* Send an empty extra RDMA read if:
-	 *  1) there has been an RDMA read on this connection before
-	 *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
-	 *  3) we can be sure that any previous extra RDMA read has been
-	 *     processed so we don't overflow the SQ
-	 */
-	if (unlikely(my_qp->unsol_ack_circ &&
-		     my_qp->packet_count > ACK_CIRC_THRESHOLD &&
-		     my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
-		/* insert an empty RDMA READ to fix up the remote QP state */
-		struct ib_send_wr circ_wr;
-		memset(&circ_wr, 0, sizeof(circ_wr));
-		circ_wr.opcode = IB_WR_RDMA_READ;
-		post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
-		wqe_cnt++;
-		ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
-		my_qp->message_count = my_qp->packet_count = 0;
-	}
-
-	/* loop processes list of send reqs */
-	while (send_wr) {
-		ret = post_one_send(my_qp, send_wr, 0);
-		if (unlikely(ret)) {
-			goto post_send_exit0;
-		}
-		wqe_cnt++;
-		send_wr = send_wr->next;
-	}
-
-post_send_exit0:
-	iosync(); /* serialize GAL register access */
-	hipz_update_sqa(my_qp, wqe_cnt);
-	if (unlikely(ret || ehca_debug_level >= 2))
-		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-			 my_qp, qp->qp_num, wqe_cnt, ret);
-	my_qp->message_count += wqe_cnt;
-	spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
-
-out:
-	if (ret)
-		*bad_send_wr = send_wr;
-	return ret;
-}
-
-static int internal_post_recv(struct ehca_qp *my_qp,
-			      struct ib_device *dev,
-			      struct ib_recv_wr *recv_wr,
-			      struct ib_recv_wr **bad_recv_wr)
-{
-	struct ehca_wqe *wqe_p;
-	int wqe_cnt = 0;
-	int ret = 0;
-	u32 rq_map_idx;
-	unsigned long flags;
-	struct ehca_qmap_entry *qmap_entry;
-
-	if (unlikely(!HAS_RQ(my_qp))) {
-		ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
-			 my_qp, my_qp->real_qp_num, my_qp->ext_type);
-		ret = -ENODEV;
-		goto out;
-	}
-
-	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_r, flags);
-
-	/* loop processes list of recv reqs */
-	while (recv_wr) {
-		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
-		/* get pointer next to free WQE */
-		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
-		if (unlikely(!wqe_p)) {
-			/* too many posted work requests: queue overflow */
-			ret = -ENOMEM;
-			ehca_err(dev, "Too many posted WQEs "
-				"qp_num=%x", my_qp->real_qp_num);
-			goto post_recv_exit0;
-		}
-		/*
-		 * Get the index of the WQE in the recv queue. The same index
-		 * is used for writing into the rq_map.
-		 */
-		rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
-
-		/* write a RECV WQE into the QUEUE */
-		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
-				rq_map_idx);
-		/*
-		 * if something failed,
-		 * reset the free entry pointer to the start value
-		 */
-		if (unlikely(ret)) {
-			my_qp->ipz_rqueue.current_q_offset = start_offset;
-			ret = -EINVAL;
-			ehca_err(dev, "Could not write WQE "
-				"qp_num=%x", my_qp->real_qp_num);
-			goto post_recv_exit0;
-		}
-
-		qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-		qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
-		qmap_entry->reported = 0;
-		qmap_entry->cqe_req = 1;
-
-		wqe_cnt++;
-		recv_wr = recv_wr->next;
-	} /* eof for recv_wr */
-
-post_recv_exit0:
-	iosync(); /* serialize GAL register access */
-	hipz_update_rqa(my_qp, wqe_cnt);
-	if (unlikely(ret || ehca_debug_level >= 2))
-	    ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
-		     my_qp, my_qp->real_qp_num, wqe_cnt, ret);
-	spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
-
-out:
-	if (ret)
-		*bad_recv_wr = recv_wr;
-
-	return ret;
-}
-
-int ehca_post_recv(struct ib_qp *qp,
-		   struct ib_recv_wr *recv_wr,
-		   struct ib_recv_wr **bad_recv_wr)
-{
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-
-	/* Reject WR if QP is in RESET state */
-	if (unlikely(my_qp->state == IB_QPS_RESET)) {
-		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
-			 my_qp->state, qp->qp_num);
-		*bad_recv_wr = recv_wr;
-		return -EINVAL;
-	}
-
-	return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
-}
-
-int ehca_post_srq_recv(struct ib_srq *srq,
-		       struct ib_recv_wr *recv_wr,
-		       struct ib_recv_wr **bad_recv_wr)
-{
-	return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
-				  srq->device, recv_wr, bad_recv_wr);
-}
-
-/*
- * ib_wc_opcode table converts ehca wc opcode to ib
- * Since we use zero to indicate invalid opcode, the actual ib opcode must
- * be decremented!!!
- */
-static const u8 ib_wc_opcode[255] = {
-	[0x01] = IB_WC_RECV+1,
-	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
-	[0x08] = IB_WC_FETCH_ADD+1,
-	[0x10] = IB_WC_COMP_SWAP+1,
-	[0x20] = IB_WC_RDMA_WRITE+1,
-	[0x40] = IB_WC_RDMA_READ+1,
-	[0x80] = IB_WC_SEND+1
-};
-
-/* internal function to poll one entry of cq */
-static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
-{
-	int ret = 0, qmap_tail_idx;
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	struct ehca_cqe *cqe;
-	struct ehca_qp *my_qp;
-	struct ehca_qmap_entry *qmap_entry;
-	struct ehca_queue_map *qmap;
-	int cqe_count = 0, is_error;
-
-repoll:
-	cqe = (struct ehca_cqe *)
-		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
-	if (!cqe) {
-		ret = -EAGAIN;
-		if (ehca_debug_level >= 3)
-			ehca_dbg(cq->device, "Completion queue is empty  "
-				 "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
-		goto poll_cq_one_exit0;
-	}
-
-	/* prevents loads being reordered across this point */
-	rmb();
-
-	cqe_count++;
-	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
-		struct ehca_qp *qp;
-		int purgeflag;
-		unsigned long flags;
-
-		qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
-		if (!qp) {
-			ehca_err(cq->device, "cq_num=%x qp_num=%x "
-				 "could not find qp -> ignore cqe",
-				 my_cq->cq_number, cqe->local_qp_number);
-			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
-				 my_cq->cq_number, cqe->local_qp_number);
-			/* ignore this purged cqe */
-			goto repoll;
-		}
-		spin_lock_irqsave(&qp->spinlock_s, flags);
-		purgeflag = qp->sqerr_purgeflag;
-		spin_unlock_irqrestore(&qp->spinlock_s, flags);
-
-		if (purgeflag) {
-			ehca_dbg(cq->device,
-				 "Got CQE with purged bit qp_num=%x src_qp=%x",
-				 cqe->local_qp_number, cqe->remote_qp_number);
-			if (ehca_debug_level >= 2)
-				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
-					 cqe->local_qp_number,
-					 cqe->remote_qp_number);
-			/*
-			 * ignore this to avoid double cqes of bad wqe
-			 * that caused sqe and turn off purge flag
-			 */
-			qp->sqerr_purgeflag = 0;
-			goto repoll;
-		}
-	}
-
-	is_error = cqe->status & WC_STATUS_ERROR_BIT;
-
-	/* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
-	if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
-		ehca_dbg(cq->device,
-			 "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
-			 is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
-		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-			 my_cq, my_cq->cq_number);
-		ehca_dbg(cq->device,
-			 "ehca_cq=%p cq_num=%x -------------------------",
-			 my_cq, my_cq->cq_number);
-	}
-
-	read_lock(&ehca_qp_idr_lock);
-	my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
-	read_unlock(&ehca_qp_idr_lock);
-	if (!my_qp)
-		goto repoll;
-	wc->qp = &my_qp->ib_qp;
-
-	qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-	if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-		/* We got a send completion. */
-		qmap = &my_qp->sq_map;
-	else
-		/* We got a receive completion. */
-		qmap = &my_qp->rq_map;
-
-	/* advance the tail pointer */
-	qmap->tail = qmap_tail_idx;
-
-	if (is_error) {
-		/*
-		 * set left_to_poll to 0 because in error state, we will not
-		 * get any additional CQEs
-		 */
-		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
-							my_qp->sq_map.entries);
-		my_qp->sq_map.left_to_poll = 0;
-		ehca_add_to_err_list(my_qp, 1);
-
-		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
-							my_qp->rq_map.entries);
-		my_qp->rq_map.left_to_poll = 0;
-		if (HAS_RQ(my_qp))
-			ehca_add_to_err_list(my_qp, 0);
-	}
-
-	qmap_entry = &qmap->map[qmap_tail_idx];
-	if (qmap_entry->reported) {
-		ehca_warn(cq->device, "Double cqe on qp_num=%#x",
-				my_qp->real_qp_num);
-		/* found a double cqe, discard it and read next one */
-		goto repoll;
-	}
-
-	wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
-	qmap_entry->reported = 1;
-
-	/* if left_to_poll is decremented to 0, add the QP to the error list */
-	if (qmap->left_to_poll > 0) {
-		qmap->left_to_poll--;
-		if ((my_qp->sq_map.left_to_poll == 0) &&
-				(my_qp->rq_map.left_to_poll == 0)) {
-			ehca_add_to_err_list(my_qp, 1);
-			if (HAS_RQ(my_qp))
-				ehca_add_to_err_list(my_qp, 0);
-		}
-	}
-
-	/* eval ib_wc_opcode */
-	wc->opcode = ib_wc_opcode[cqe->optype]-1;
-	if (unlikely(wc->opcode == -1)) {
-		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
-			 "ehca_cq=%p cq_num=%x",
-			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
-		/* dump cqe for other infos */
-		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
-			 my_cq, my_cq->cq_number);
-		/* update also queue adder to throw away this entry!!! */
-		goto repoll;
-	}
-
-	/* eval ib_wc_status */
-	if (unlikely(is_error)) {
-		/* complete with errors */
-		map_ib_wc_status(cqe->status, &wc->status);
-		wc->vendor_err = wc->status;
-	} else
-		wc->status = IB_WC_SUCCESS;
-
-	wc->byte_len = cqe->nr_bytes_transferred;
-	wc->pkey_index = cqe->pkey_index;
-	wc->slid = cqe->rlid;
-	wc->dlid_path_bits = cqe->dlid;
-	wc->src_qp = cqe->remote_qp_number;
-	/*
-	 * HW has "Immed data present" and "GRH present" in bits 6 and 5.
-	 * SW defines those in bits 1 and 0, so we can just shift and mask.
-	 */
-	wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
-	wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
-	wc->sl = cqe->service_level;
-
-poll_cq_one_exit0:
-	if (cqe_count > 0)
-		hipz_update_feca(my_cq, cqe_count);
-
-	return ret;
-}
-
-static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
-			       struct ib_wc *wc, int num_entries,
-			       struct ipz_queue *ipz_queue, int on_sq)
-{
-	int nr = 0;
-	struct ehca_wqe *wqe;
-	u64 offset;
-	struct ehca_queue_map *qmap;
-	struct ehca_qmap_entry *qmap_entry;
-
-	if (on_sq)
-		qmap = &my_qp->sq_map;
-	else
-		qmap = &my_qp->rq_map;
-
-	qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-	while ((nr < num_entries) && (qmap_entry->reported == 0)) {
-		/* generate flush CQE */
-
-		memset(wc, 0, sizeof(*wc));
-
-		offset = qmap->next_wqe_idx * ipz_queue->qe_size;
-		wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
-		if (!wqe) {
-			ehca_err(cq->device, "Invalid wqe offset=%#llx on "
-				 "qp_num=%#x", offset, my_qp->real_qp_num);
-			return nr;
-		}
-
-		wc->wr_id = replace_wr_id(wqe->work_request_id,
-					  qmap_entry->app_wr_id);
-
-		if (on_sq) {
-			switch (wqe->optype) {
-			case WQE_OPTYPE_SEND:
-				wc->opcode = IB_WC_SEND;
-				break;
-			case WQE_OPTYPE_RDMAWRITE:
-				wc->opcode = IB_WC_RDMA_WRITE;
-				break;
-			case WQE_OPTYPE_RDMAREAD:
-				wc->opcode = IB_WC_RDMA_READ;
-				break;
-			default:
-				ehca_err(cq->device, "Invalid optype=%x",
-						wqe->optype);
-				return nr;
-			}
-		} else
-			wc->opcode = IB_WC_RECV;
-
-		if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
-			wc->ex.imm_data = wqe->immediate_data;
-			wc->wc_flags |= IB_WC_WITH_IMM;
-		}
-
-		wc->status = IB_WC_WR_FLUSH_ERR;
-
-		wc->qp = &my_qp->ib_qp;
-
-		/* mark as reported and advance next_wqe pointer */
-		qmap_entry->reported = 1;
-		qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
-						qmap->entries);
-		qmap_entry = &qmap->map[qmap->next_wqe_idx];
-
-		wc++; nr++;
-	}
-
-	return nr;
-
-}
-
-int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
-{
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int nr;
-	struct ehca_qp *err_qp;
-	struct ib_wc *current_wc = wc;
-	int ret = 0;
-	unsigned long flags;
-	int entries_left = num_entries;
-
-	if (num_entries < 1) {
-		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
-			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
-		ret = -EINVAL;
-		goto poll_cq_exit0;
-	}
-
-	spin_lock_irqsave(&my_cq->spinlock, flags);
-
-	/* generate flush cqes for send queues */
-	list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
-		nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-				&err_qp->ipz_squeue, 1);
-		entries_left -= nr;
-		current_wc += nr;
-
-		if (entries_left == 0)
-			break;
-	}
-
-	/* generate flush cqes for receive queues */
-	list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
-		nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
-				&err_qp->ipz_rqueue, 0);
-		entries_left -= nr;
-		current_wc += nr;
-
-		if (entries_left == 0)
-			break;
-	}
-
-	for (nr = 0; nr < entries_left; nr++) {
-		ret = ehca_poll_cq_one(cq, current_wc);
-		if (ret)
-			break;
-		current_wc++;
-	} /* eof for nr */
-	entries_left -= nr;
-
-	spin_unlock_irqrestore(&my_cq->spinlock, flags);
-	if (ret == -EAGAIN  || !ret)
-		ret = num_entries - entries_left;
-
-poll_cq_exit0:
-	return ret;
-}
-
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
-{
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	int ret = 0;
-
-	switch (notify_flags & IB_CQ_SOLICITED_MASK) {
-	case IB_CQ_SOLICITED:
-		hipz_set_cqx_n0(my_cq, 1);
-		break;
-	case IB_CQ_NEXT_COMP:
-		hipz_set_cqx_n1(my_cq, 1);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
-		unsigned long spl_flags;
-		spin_lock_irqsave(&my_cq->spinlock, spl_flags);
-		ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
-		spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
-	}
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c
deleted file mode 100644
index 376b031c2c7f..000000000000
--- a/drivers/staging/rdma/ehca/ehca_sqp.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  SQP functions
- *
- *  Authors: Khadija Souissi <souissi@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <rdma/ib_mad.h>
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "ehca_iverbs.h"
-#include "hcp_if.h"
-
-#define IB_MAD_STATUS_REDIRECT		cpu_to_be16(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION	cpu_to_be16(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD	cpu_to_be16(0x0008)
-
-#define IB_PMA_CLASS_PORT_INFO		cpu_to_be16(0x0001)
-
-/**
- * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
- * pair is created successfully, the corresponding port gets active.
- *
- * Define Special Queue pair 0 (SMI QP) is still not supported.
- *
- * @qp_init_attr: Queue pair init attributes with port and queue pair type
- */
-
-u64 ehca_define_sqp(struct ehca_shca *shca,
-		    struct ehca_qp *ehca_qp,
-		    struct ib_qp_init_attr *qp_init_attr)
-{
-	u32 pma_qp_nr, bma_qp_nr;
-	u64 ret;
-	u8 port = qp_init_attr->port_num;
-	int counter;
-
-	shca->sport[port - 1].port_state = IB_PORT_DOWN;
-
-	switch (qp_init_attr->qp_type) {
-	case IB_QPT_SMI:
-		/* function not supported yet */
-		break;
-	case IB_QPT_GSI:
-		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
-					 ehca_qp->ipz_qp_handle,
-					 ehca_qp->galpas.kernel,
-					 (u32) qp_init_attr->port_num,
-					 &pma_qp_nr, &bma_qp_nr);
-
-		if (ret != H_SUCCESS) {
-			ehca_err(&shca->ib_device,
-				 "Can't define AQP1 for port %x. h_ret=%lli",
-				 port, ret);
-			return ret;
-		}
-		shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
-		ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
-			 port, pma_qp_nr);
-		break;
-	default:
-		ehca_err(&shca->ib_device, "invalid qp_type=%x",
-			 qp_init_attr->qp_type);
-		return H_PARAMETER;
-	}
-
-	if (ehca_nr_ports < 0) /* autodetect mode */
-		return H_SUCCESS;
-
-	for (counter = 0;
-	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
-		     counter < ehca_port_act_time;
-	     counter++) {
-		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
-			 port);
-		msleep_interruptible(1000);
-	}
-
-	if (counter == ehca_port_act_time) {
-		ehca_err(&shca->ib_device, "Port %x is not active.", port);
-		return H_HARDWARE;
-	}
-
-	return H_SUCCESS;
-}
-
-struct ib_perf {
-	struct ib_mad_hdr mad_hdr;
-	u8 reserved[40];
-	u8 data[192];
-} __attribute__ ((packed));
-
-/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
-struct tcslfl {
-	u32 tc:8;
-	u32 sl:4;
-	u32 fl:20;
-} __attribute__ ((packed));
-
-/* IP Version/TC/FL packed into 32 bits, as in GRH */
-struct vertcfl {
-	u32 ver:4;
-	u32 tc:8;
-	u32 fl:20;
-} __attribute__ ((packed));
-
-static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
-			     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			     const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
-	const struct ib_perf *in_perf = (const struct ib_perf *)in_mad;
-	struct ib_perf *out_perf = (struct ib_perf *)out_mad;
-	struct ib_class_port_info *poi =
-		(struct ib_class_port_info *)out_perf->data;
-	struct tcslfl *tcslfl =
-		(struct tcslfl *)&poi->redirect_tcslfl;
-	struct ehca_shca *shca =
-		container_of(ibdev, struct ehca_shca, ib_device);
-	struct ehca_sport *sport = &shca->sport[port_num - 1];
-
-	ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
-
-	*out_mad = *in_mad;
-
-	if (in_perf->mad_hdr.class_version != 1) {
-		ehca_warn(ibdev, "Unsupported class_version=%x",
-			  in_perf->mad_hdr.class_version);
-		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
-		goto perf_reply;
-	}
-
-	switch (in_perf->mad_hdr.method) {
-	case IB_MGMT_METHOD_GET:
-	case IB_MGMT_METHOD_SET:
-		/* set class port info for redirection */
-		out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
-		out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
-		memset(poi, 0, sizeof(*poi));
-		poi->base_version = 1;
-		poi->class_version = 1;
-		poi->resp_time_value = 18;
-
-		/* copy local routing information from WC where applicable */
-		tcslfl->sl         = in_wc->sl;
-		poi->redirect_lid  =
-			sport->saved_attr.lid | in_wc->dlid_path_bits;
-		poi->redirect_qp   = sport->pma_qp_nr;
-		poi->redirect_qkey = IB_QP1_QKEY;
-
-		ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
-				&poi->redirect_pkey);
-
-		/* if request was globally routed, copy route info */
-		if (in_grh) {
-			const struct vertcfl *vertcfl =
-				(const struct vertcfl *)&in_grh->version_tclass_flow;
-			memcpy(poi->redirect_gid, in_grh->dgid.raw,
-			       sizeof(poi->redirect_gid));
-			tcslfl->tc        = vertcfl->tc;
-			tcslfl->fl        = vertcfl->fl;
-		} else
-			/* else only fill in default GID */
-			ehca_query_gid(ibdev, port_num, 0,
-				       (union ib_gid *)&poi->redirect_gid);
-
-		ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
-			 sport->saved_attr.lid, sport->pma_qp_nr);
-		break;
-
-	case IB_MGMT_METHOD_GET_RESP:
-		return IB_MAD_RESULT_FAILURE;
-
-	default:
-		out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
-		break;
-	}
-
-perf_reply:
-	out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
-
-	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in, size_t in_mad_size,
-		     struct ib_mad_hdr *out, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index)
-{
-	int ret;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
-
-	if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
-		return IB_MAD_RESULT_FAILURE;
-
-	/* accept only pma request */
-	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
-		return IB_MAD_RESULT_SUCCESS;
-
-	ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-	ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
-				in_mad, out_mad);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h
deleted file mode 100644
index d280b12aae64..000000000000
--- a/drivers/staging/rdma/ehca/ehca_tools.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  auxiliary functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Khadija Souissi <souissik@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef EHCA_TOOLS_H
-#define EHCA_TOOLS_H
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/idr.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/vmalloc.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/device.h>
-
-#include <linux/atomic.h>
-#include <asm/ibmebus.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/hvcall.h>
-
-extern int ehca_debug_level;
-
-#define ehca_dbg(ib_dev, format, arg...) \
-	do { \
-		if (unlikely(ehca_debug_level)) \
-			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
-				   "PU%04x EHCA_DBG:%s " format "\n", \
-				   raw_smp_processor_id(), __func__, \
-				   ## arg); \
-	} while (0)
-
-#define ehca_info(ib_dev, format, arg...) \
-	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-		 raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_warn(ib_dev, format, arg...) \
-	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-		 raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_err(ib_dev, format, arg...) \
-	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-		raw_smp_processor_id(), __func__, ## arg)
-
-/* use this one only if no ib_dev available */
-#define ehca_gen_dbg(format, arg...) \
-	do { \
-		if (unlikely(ehca_debug_level)) \
-			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-			       raw_smp_processor_id(), __func__, ## arg); \
-	} while (0)
-
-#define ehca_gen_warn(format, arg...) \
-	printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-	       raw_smp_processor_id(), __func__, ## arg)
-
-#define ehca_gen_err(format, arg...) \
-	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-	       raw_smp_processor_id(), __func__, ## arg)
-
-/**
- * ehca_dmp - printk a memory block, whose length is n*8 bytes.
- * Each line has the following layout:
- * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
- */
-#define ehca_dmp(adr, len, format, args...) \
-	do { \
-		unsigned int x; \
-		unsigned int l = (unsigned int)(len); \
-		unsigned char *deb = (unsigned char *)(adr); \
-		for (x = 0; x < l; x += 16) { \
-			printk(KERN_INFO "EHCA_DMP:%s " format \
-			       " adr=%p ofs=%04x %016llx %016llx\n", \
-			       __func__, ##args, deb, x, \
-			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
-			deb += 16; \
-		} \
-	} while (0)
-
-/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
-
-/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
-
-/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
-
-/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
-
-/**
- * EHCA_BMASK_SET - return value shifted and masked by mask
- * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
- * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
- * in variable
- */
-#define EHCA_BMASK_SET(mask, value) \
-	((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
-
-/**
- * EHCA_BMASK_GET - extract a parameter from value by mask
- */
-#define EHCA_BMASK_GET(mask, value) \
-	(EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
-
-/* Converts ehca to ib return code */
-int ehca2ib_return_code(u64 ehca_rc);
-
-#endif /* EHCA_TOOLS_H */
diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c
deleted file mode 100644
index 1a1d5d99fcf9..000000000000
--- a/drivers/staging/rdma/ehca/ehca_uverbs.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  userspace support verbs
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_classes.h"
-#include "ehca_iverbs.h"
-#include "ehca_mrmw.h"
-#include "ehca_tools.h"
-#include "hcp_if.h"
-
-struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
-					struct ib_udata *udata)
-{
-	struct ehca_ucontext *my_context;
-
-	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
-	if (!my_context) {
-		ehca_err(device, "Out of memory device=%p", device);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return &my_context->ib_ucontext;
-}
-
-int ehca_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
-	return 0;
-}
-
-static void ehca_mm_open(struct vm_area_struct *vma)
-{
-	u32 *count = (u32 *)vma->vm_private_data;
-	if (!count) {
-		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-		return;
-	}
-	(*count)++;
-	if (!(*count))
-		ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-		     vma->vm_start, vma->vm_end, *count);
-}
-
-static void ehca_mm_close(struct vm_area_struct *vma)
-{
-	u32 *count = (u32 *)vma->vm_private_data;
-	if (!count) {
-		ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
-			     vma->vm_start, vma->vm_end);
-		return;
-	}
-	(*count)--;
-	ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
-		     vma->vm_start, vma->vm_end, *count);
-}
-
-static const struct vm_operations_struct vm_ops = {
-	.open =	ehca_mm_open,
-	.close = ehca_mm_close,
-};
-
-static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
-			u32 *mm_count)
-{
-	int ret;
-	u64 vsize, physical;
-
-	vsize = vma->vm_end - vma->vm_start;
-	if (vsize < EHCA_PAGESIZE) {
-		ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
-		return -EINVAL;
-	}
-
-	physical = galpas->user.fw_handle;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
-	ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
-			   vma->vm_page_prot);
-	if (unlikely(ret)) {
-		ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
-		return -ENOMEM;
-	}
-
-	vma->vm_private_data = mm_count;
-	(*mm_count)++;
-	vma->vm_ops = &vm_ops;
-
-	return 0;
-}
-
-static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
-			   u32 *mm_count)
-{
-	int ret;
-	u64 start, ofs;
-	struct page *page;
-
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	start = vma->vm_start;
-	for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
-		u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
-		page = virt_to_page(virt_addr);
-		ret = vm_insert_page(vma, start, page);
-		if (unlikely(ret)) {
-			ehca_gen_err("vm_insert_page() failed rc=%i", ret);
-			return ret;
-		}
-		start += PAGE_SIZE;
-	}
-	vma->vm_private_data = mm_count;
-	(*mm_count)++;
-	vma->vm_ops = &vm_ops;
-
-	return 0;
-}
-
-static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
-			u32 rsrc_type)
-{
-	int ret;
-
-	switch (rsrc_type) {
-	case 0: /* galpa fw handle */
-		ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
-		ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_fw() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	case 1: /* cq queue_addr */
-		ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
-		ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_queue() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
-			 rsrc_type, cq->cq_number);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
-			u32 rsrc_type)
-{
-	int ret;
-
-	switch (rsrc_type) {
-	case 0: /* galpa fw handle */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
-		ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "remap_pfn_range() failed ret=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return -ENOMEM;
-		}
-		break;
-
-	case 1: /* qp rqueue_addr */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
-		ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
-				      &qp->mm_count_rqueue);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	case 2: /* qp squeue_addr */
-		ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
-		ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
-				      &qp->mm_count_squeue);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
-			 rsrc_type, qp->ib_qp.qp_num);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	u64 fileoffset = vma->vm_pgoff;
-	u32 idr_handle = fileoffset & 0x1FFFFFF;
-	u32 q_type = (fileoffset >> 27) & 0x1;	  /* CQ, QP,...        */
-	u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-	u32 ret;
-	struct ehca_cq *cq;
-	struct ehca_qp *qp;
-	struct ib_uobject *uobject;
-
-	switch (q_type) {
-	case  0: /* CQ */
-		read_lock(&ehca_cq_idr_lock);
-		cq = idr_find(&ehca_cq_idr, idr_handle);
-		read_unlock(&ehca_cq_idr_lock);
-
-		/* make sure this mmap really belongs to the authorized user */
-		if (!cq)
-			return -EINVAL;
-
-		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
-			return -EINVAL;
-
-		ret = ehca_mmap_cq(vma, cq, rsrc_type);
-		if (unlikely(ret)) {
-			ehca_err(cq->ib_cq.device,
-				 "ehca_mmap_cq() failed rc=%i cq_num=%x",
-				 ret, cq->cq_number);
-			return ret;
-		}
-		break;
-
-	case 1: /* QP */
-		read_lock(&ehca_qp_idr_lock);
-		qp = idr_find(&ehca_qp_idr, idr_handle);
-		read_unlock(&ehca_qp_idr_lock);
-
-		/* make sure this mmap really belongs to the authorized user */
-		if (!qp)
-			return -EINVAL;
-
-		uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
-		if (!uobject || uobject->context != context)
-			return -EINVAL;
-
-		ret = ehca_mmap_qp(vma, qp, rsrc_type);
-		if (unlikely(ret)) {
-			ehca_err(qp->ib_qp.device,
-				 "ehca_mmap_qp() failed rc=%i qp_num=%x",
-				 ret, qp->ib_qp.qp_num);
-			return ret;
-		}
-		break;
-
-	default:
-		ehca_gen_err("bad queue type %x", q_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c
deleted file mode 100644
index 89517ffb4389..000000000000
--- a/drivers/staging/rdma/ehca/hcp_if.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Joachim Fenkes <fenkes@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <asm/hvcall.h>
-#include "ehca_tools.h"
-#include "hcp_if.h"
-#include "hcp_phyp.h"
-#include "hipz_fns.h"
-#include "ipz_pt_fn.h"
-
-#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
-#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
-#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
-#define H_ALL_RES_QP_STORAGE            EHCA_BMASK_IBM(16, 17)
-#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
-#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
-#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
-#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
-#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
-#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
-#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
-
-#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
-#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
-#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
-
-#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
-#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
-#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
-
-#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
-#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
-#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
-#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
-
-#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
-#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
-
-#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
-#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
-#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
-
-#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
-#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
-#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
-
-static DEFINE_SPINLOCK(hcall_lock);
-
-static long ehca_plpar_hcall_norets(unsigned long opcode,
-				    unsigned long arg1,
-				    unsigned long arg2,
-				    unsigned long arg3,
-				    unsigned long arg4,
-				    unsigned long arg5,
-				    unsigned long arg6,
-				    unsigned long arg7)
-{
-	long ret;
-	int i, sleep_msecs;
-	unsigned long flags = 0;
-
-	if (unlikely(ehca_debug_level >= 2))
-		ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
-			     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
-
-	for (i = 0; i < 5; i++) {
-		/* serialize hCalls to work around firmware issue */
-		if (ehca_lock_hcalls)
-			spin_lock_irqsave(&hcall_lock, flags);
-
-		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
-					 arg5, arg6, arg7);
-
-		if (ehca_lock_hcalls)
-			spin_unlock_irqrestore(&hcall_lock, flags);
-
-		if (H_IS_LONG_BUSY(ret)) {
-			sleep_msecs = get_longbusy_msecs(ret);
-			msleep_interruptible(sleep_msecs);
-			continue;
-		}
-
-		if (ret < H_SUCCESS)
-			ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
-				     opcode, ret, arg1, arg2, arg3,
-				     arg4, arg5, arg6, arg7);
-		else
-			if (unlikely(ehca_debug_level >= 2))
-				ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
-
-		return ret;
-	}
-
-	return H_BUSY;
-}
-
-static long ehca_plpar_hcall9(unsigned long opcode,
-			      unsigned long *outs, /* array of 9 outputs */
-			      unsigned long arg1,
-			      unsigned long arg2,
-			      unsigned long arg3,
-			      unsigned long arg4,
-			      unsigned long arg5,
-			      unsigned long arg6,
-			      unsigned long arg7,
-			      unsigned long arg8,
-			      unsigned long arg9)
-{
-	long ret;
-	int i, sleep_msecs;
-	unsigned long flags = 0;
-
-	if (unlikely(ehca_debug_level >= 2))
-		ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
-			     arg1, arg2, arg3, arg4, arg5,
-			     arg6, arg7, arg8, arg9);
-
-	for (i = 0; i < 5; i++) {
-		/* serialize hCalls to work around firmware issue */
-		if (ehca_lock_hcalls)
-			spin_lock_irqsave(&hcall_lock, flags);
-
-		ret = plpar_hcall9(opcode, outs,
-				   arg1, arg2, arg3, arg4, arg5,
-				   arg6, arg7, arg8, arg9);
-
-		if (ehca_lock_hcalls)
-			spin_unlock_irqrestore(&hcall_lock, flags);
-
-		if (H_IS_LONG_BUSY(ret)) {
-			sleep_msecs = get_longbusy_msecs(ret);
-			msleep_interruptible(sleep_msecs);
-			continue;
-		}
-
-		if (ret < H_SUCCESS) {
-			ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
-				     opcode, arg1, arg2, arg3, arg4, arg5,
-				     arg6, arg7, arg8, arg9);
-			ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-				     ret, outs[0], outs[1], outs[2], outs[3],
-				     outs[4], outs[5], outs[6], outs[7],
-				     outs[8]);
-		} else if (unlikely(ehca_debug_level >= 2))
-			ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
-				     ret, outs[0], outs[1], outs[2], outs[3],
-				     outs[4], outs[5], outs[6], outs[7],
-				     outs[8]);
-		return ret;
-	}
-
-	return H_BUSY;
-}
-
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u32 neq_control,
-			     const u32 number_of_entries,
-			     struct ipz_eq_handle *eq_handle,
-			     u32 *act_nr_of_entries,
-			     u32 *act_pages,
-			     u32 *eq_ist)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-	u64 allocate_controls;
-
-	/* resource type */
-	allocate_controls = 3ULL;
-
-	/* ISN is associated */
-	if (neq_control != 1)
-		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
-	else /* notification event queue */
-		allocate_controls = (1ULL << 63) | allocate_controls;
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,  /* r4 */
-				allocate_controls,      /* r5 */
-				number_of_entries,      /* r6 */
-				0, 0, 0, 0, 0, 0);
-	eq_handle->handle = outs[0];
-	*act_nr_of_entries = (u32)outs[3];
-	*act_pages = (u32)outs[4];
-	*eq_ist = (u32)outs[5];
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resource - ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-		       struct ipz_eq_handle eq_handle,
-		       const u64 event_mask)
-{
-	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
-				       adapter_handle.handle, /* r4 */
-				       eq_handle.handle,      /* r5 */
-				       event_mask,	      /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_cq *cq,
-			     struct ehca_alloc_cq_parms *param)
-{
-	int rc;
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,   /* r4  */
-				2,	                 /* r5  */
-				param->eq_handle.handle, /* r6  */
-				cq->token,	         /* r7  */
-				param->nr_cqe,           /* r8  */
-				0, 0, 0, 0);
-	cq->ipz_cq_handle.handle = outs[0];
-	param->act_nr_of_entries = (u32)outs[3];
-	param->act_pages = (u32)outs[4];
-
-	if (ret == H_SUCCESS) {
-		rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
-		if (rc) {
-			ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-				     rc, outs[5]);
-
-			ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-						adapter_handle.handle,     /* r4 */
-						cq->ipz_cq_handle.handle,  /* r5 */
-						0, 0, 0, 0, 0);
-			ret = H_NO_MEM;
-		}
-	}
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms, int is_user)
-{
-	int rc;
-	u64 ret;
-	u64 allocate_controls, max_r10_reg, r11, r12;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	allocate_controls =
-		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
-				 parms->squeue.page_size)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
-				 parms->rqueue.page_size)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-				 !!(parms->ll_comp_flags & LLQP_RECV_COMP))
-		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-				 !!(parms->ll_comp_flags & LLQP_SEND_COMP))
-		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
-				 parms->ud_av_l_key_ctl)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
-
-	max_r10_reg =
-		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-			       parms->squeue.max_wr + 1)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-				 parms->rqueue.max_wr + 1)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
-				 parms->squeue.max_sge)
-		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
-				 parms->rqueue.max_sge);
-
-	r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
-
-	if (parms->ext_type == EQPT_SRQ)
-		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
-	else
-		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,	           /* r4  */
-				allocate_controls,	           /* r5  */
-				parms->send_cq_handle.handle,
-				parms->recv_cq_handle.handle,
-				parms->eq_handle.handle,
-				((u64)parms->token << 32) | parms->pd.value,
-				max_r10_reg, r11, r12);
-
-	parms->qp_handle.handle = outs[0];
-	parms->real_qp_num = (u32)outs[1];
-	parms->squeue.act_nr_wqes =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
-	parms->rqueue.act_nr_wqes =
-		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
-	parms->squeue.act_nr_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
-	parms->rqueue.act_nr_sges =
-		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
-	parms->squeue.queue_size =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
-	parms->rqueue.queue_size =
-		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
-
-	if (ret == H_SUCCESS) {
-		rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
-		if (rc) {
-			ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
-				     rc, outs[6]);
-
-			ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-						adapter_handle.handle,     /* r4 */
-						parms->qp_handle.handle,  /* r5 */
-						0, 0, 0, 0, 0);
-			ret = H_NO_MEM;
-		}
-	}
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-		      const u8 port_id,
-		      struct hipz_query_port *query_port_response_block)
-{
-	u64 ret;
-	u64 r_cb = __pa(query_port_response_block);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("response block not page aligned");
-		return H_PARAMETER;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
-				      adapter_handle.handle, /* r4 */
-				      port_id,	             /* r5 */
-				      r_cb,	             /* r6 */
-				      0, 0, 0, 0);
-
-	if (ehca_debug_level >= 2)
-		ehca_dmp(query_port_response_block, 64, "response_block");
-
-	return ret;
-}
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-		       const u8 port_id, const u32 port_cap,
-		       const u8 init_type, const int modify_mask)
-{
-	u64 port_attributes = port_cap;
-
-	if (modify_mask & IB_PORT_SHUTDOWN)
-		port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
-	if (modify_mask & IB_PORT_INIT_TYPE)
-		port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
-	if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
-
-	return ehca_plpar_hcall_norets(H_MODIFY_PORT,
-				       adapter_handle.handle, /* r4 */
-				       port_id,               /* r5 */
-				       port_attributes,       /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-		     struct hipz_query_hca *query_hca_rblock)
-{
-	u64 r_cb = __pa(query_hca_rblock);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("response_block=%p not page aligned",
-			     query_hca_rblock);
-		return H_PARAMETER;
-	}
-
-	return ehca_plpar_hcall_norets(H_QUERY_HCA,
-				       adapter_handle.handle, /* r4 */
-				       r_cb,                  /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-			  const u8 pagesize,
-			  const u8 queue_type,
-			  const u64 resource_handle,
-			  const u64 logical_address_of_page,
-			  u64 count)
-{
-	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
-				       adapter_handle.handle,      /* r4  */
-				       (u64)queue_type | ((u64)pagesize) << 8,
-				       /* r5  */
-				       resource_handle,	           /* r6  */
-				       logical_address_of_page,    /* r7  */
-				       count,	                   /* r8  */
-				       0, 0);
-}
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_eq_handle eq_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count)
-{
-	if (count != 1) {
-		ehca_gen_err("Ppage counter=%llx", count);
-		return H_PARAMETER;
-	}
-	return hipz_h_register_rpage(adapter_handle,
-				     pagesize,
-				     queue_type,
-				     eq_handle.handle,
-				     logical_address_of_page, count);
-}
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
-			   u32 ist)
-{
-	u64 ret;
-	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
-				      adapter_handle.handle, /* r4 */
-				      ist,                   /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret != H_SUCCESS && ret != H_BUSY)
-		ehca_gen_err("Could not query interrupt state.");
-
-	return ret;
-}
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_cq_handle cq_handle,
-			     struct ehca_pfcq *pfcq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa gal)
-{
-	if (count != 1) {
-		ehca_gen_err("Page counter=%llx", count);
-		return H_PARAMETER;
-	}
-
-	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-				     cq_handle.handle, logical_address_of_page,
-				     count);
-}
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_qp_handle qp_handle,
-			     struct ehca_pfqp *pfqp,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa galpa)
-{
-	if (count > 1) {
-		ehca_gen_err("Page counter=%llx", count);
-		return H_PARAMETER;
-	}
-
-	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
-				     qp_handle.handle, logical_address_of_page,
-				     count);
-}
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-			       const struct ipz_qp_handle qp_handle,
-			       struct ehca_pfqp *pfqp,
-			       void **log_addr_next_sq_wqe2processed,
-			       void **log_addr_next_rq_wqe2processed,
-			       int dis_and_get_function_code)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-				adapter_handle.handle,     /* r4 */
-				dis_and_get_function_code, /* r5 */
-				qp_handle.handle,	   /* r6 */
-				0, 0, 0, 0, 0, 0);
-	if (log_addr_next_sq_wqe2processed)
-		*log_addr_next_sq_wqe2processed = (void *)outs[0];
-	if (log_addr_next_rq_wqe2processed)
-		*log_addr_next_rq_wqe2processed = (void *)outs[1];
-
-	return ret;
-}
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-		     const struct ipz_qp_handle qp_handle,
-		     struct ehca_pfqp *pfqp,
-		     const u64 update_mask,
-		     struct hcp_modify_qp_control_block *mqpcb,
-		     struct h_galpa gal)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
-				adapter_handle.handle, /* r4 */
-				qp_handle.handle,      /* r5 */
-				update_mask,	       /* r6 */
-				__pa(mqpcb),	       /* r7 */
-				0, 0, 0, 0, 0);
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Insufficient resources ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-		    const struct ipz_qp_handle qp_handle,
-		    struct ehca_pfqp *pfqp,
-		    struct hcp_modify_qp_control_block *qqpcb,
-		    struct h_galpa gal)
-{
-	return ehca_plpar_hcall_norets(H_QUERY_QP,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       __pa(qqpcb),	      /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_qp *qp)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = hcp_galpas_dtor(&qp->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct qp->galpas");
-		return H_RESOURCE;
-	}
-	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
-				adapter_handle.handle,     /* r4 */
-				/* function code */
-				1,	                   /* r5 */
-				qp->ipz_qp_handle.handle,  /* r6 */
-				0, 0, 0, 0, 0, 0);
-	if (ret == H_HARDWARE)
-		ehca_gen_err("HCA not operational. ret=%lli", ret);
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      qp->ipz_qp_handle.handle,  /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("Resource still in use. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port)
-{
-	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       port,                  /* r6 */
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port, u32 * pma_qp_nr,
-		       u32 * bma_qp_nr)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
-				adapter_handle.handle, /* r4 */
-				qp_handle.handle,      /* r5 */
-				port,	               /* r6 */
-				0, 0, 0, 0, 0, 0);
-	*pma_qp_nr = (u32)outs[0];
-	*bma_qp_nr = (u32)outs[1];
-
-	if (ret == H_ALIAS_EXIST)
-		ehca_gen_err("AQP1 already exists. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id)
-{
-	u64 ret;
-
-	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
-				      adapter_handle.handle,  /* r4 */
-				      qp_handle.handle,       /* r5 */
-				      mcg_dlid,               /* r6 */
-				      interface_id,           /* r7 */
-				      subnet_prefix,          /* r8 */
-				      0, 0);
-
-	if (ret == H_NOT_ENOUGH_RESOURCES)
-		ehca_gen_err("Not enough resources. ret=%lli", ret);
-
-	return ret;
-}
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id)
-{
-	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
-				       adapter_handle.handle, /* r4 */
-				       qp_handle.handle,      /* r5 */
-				       mcg_dlid,              /* r6 */
-				       interface_id,          /* r7 */
-				       subnet_prefix,         /* r8 */
-				       0, 0);
-}
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_cq *cq,
-		      u8 force_flag)
-{
-	u64 ret;
-
-	ret = hcp_galpas_dtor(&cq->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct cp->galpas");
-		return H_RESOURCE;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      cq->ipz_cq_handle.handle,  /* r5 */
-				      force_flag != 0 ? 1L : 0L, /* r6 */
-				      0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_eq *eq)
-{
-	u64 ret;
-
-	ret = hcp_galpas_dtor(&eq->galpas);
-	if (ret) {
-		ehca_gen_err("Could not destruct eq->galpas");
-		return H_RESOURCE;
-	}
-
-	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				      adapter_handle.handle,     /* r4 */
-				      eq->ipz_eq_handle.handle,  /* r5 */
-				      0, 0, 0, 0, 0);
-
-	if (ret == H_RESOURCE)
-		ehca_gen_err("Resource in use. ret=%lli ", ret);
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u64 vaddr,
-			     const u64 length,
-			     const u32 access_ctrl,
-			     const struct ipz_pd pd,
-			     struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,            /* r4 */
-				5,                                /* r5 */
-				vaddr,                            /* r6 */
-				length,                           /* r7 */
-				(((u64)access_ctrl) << 32ULL),    /* r8 */
-				pd.value,                         /* r9 */
-				0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count)
-{
-	u64 ret;
-
-	if (unlikely(ehca_debug_level >= 3)) {
-		if (count > 1) {
-			u64 *kpage;
-			int i;
-			kpage = __va(logical_address_of_page);
-			for (i = 0; i < count; i++)
-				ehca_gen_dbg("kpage[%d]=%p",
-					     i, (void *)kpage[i]);
-		} else
-			ehca_gen_dbg("kpage=%p",
-				     (void *)logical_address_of_page);
-	}
-
-	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
-		ehca_gen_err("logical_address_of_page not on a 4k boundary "
-			     "adapter_handle=%llx mr=%p mr_handle=%llx "
-			     "pagesize=%x queue_type=%x "
-			     "logical_address_of_page=%llx count=%llx",
-			     adapter_handle.handle, mr,
-			     mr->ipz_mr_handle.handle, pagesize, queue_type,
-			     logical_address_of_page, count);
-		ret = H_PARAMETER;
-	} else
-		ret = hipz_h_register_rpage(adapter_handle, pagesize,
-					    queue_type,
-					    mr->ipz_mr_handle.handle,
-					    logical_address_of_page, count);
-	return ret;
-}
-
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mr *mr,
-		    struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
-				adapter_handle.handle,     /* r4 */
-				mr->ipz_mr_handle.handle,  /* r5 */
-				0, 0, 0, 0, 0, 0, 0);
-	outparms->len = outs[0];
-	outparms->vaddr = outs[1];
-	outparms->acl  = outs[4] >> 32;
-	outparms->lkey = (u32)(outs[5] >> 32);
-	outparms->rkey = (u32)(outs[5] & (0xffffffff));
-
-	return ret;
-}
-
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mr *mr)
-{
-	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				       adapter_handle.handle,    /* r4 */
-				       mr->ipz_mr_handle.handle, /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-			  const struct ehca_mr *mr,
-			  const u64 vaddr_in,
-			  const u64 length,
-			  const u32 access_ctrl,
-			  const struct ipz_pd pd,
-			  const u64 mr_addr_cb,
-			  struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
-				adapter_handle.handle,    /* r4 */
-				mr->ipz_mr_handle.handle, /* r5 */
-				vaddr_in,	          /* r6 */
-				length,                   /* r7 */
-				/* r8 */
-				((((u64)access_ctrl) << 32ULL) | pd.value),
-				mr_addr_cb,               /* r9 */
-				0, 0, 0);
-	outparms->vaddr = outs[1];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-			const struct ehca_mr *mr,
-			const struct ehca_mr *orig_mr,
-			const u64 vaddr_in,
-			const u32 access_ctrl,
-			const struct ipz_pd pd,
-			struct ehca_mr_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
-				adapter_handle.handle,            /* r4 */
-				orig_mr->ipz_mr_handle.handle,    /* r5 */
-				vaddr_in,                         /* r6 */
-				(((u64)access_ctrl) << 32ULL),    /* r7 */
-				pd.value,                         /* r8 */
-				0, 0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->lkey = (u32)outs[2];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mw *mw,
-			     const struct ipz_pd pd,
-			     struct ehca_mw_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
-				adapter_handle.handle,      /* r4 */
-				6,                          /* r5 */
-				pd.value,                   /* r6 */
-				0, 0, 0, 0, 0, 0);
-	outparms->handle.handle = outs[0];
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mw *mw,
-		    struct ehca_mw_hipzout_parms *outparms)
-{
-	u64 ret;
-	unsigned long outs[PLPAR_HCALL9_BUFSIZE];
-
-	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
-				adapter_handle.handle,    /* r4 */
-				mw->ipz_mw_handle.handle, /* r5 */
-				0, 0, 0, 0, 0, 0, 0);
-	outparms->rkey = (u32)outs[3];
-
-	return ret;
-}
-
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mw *mw)
-{
-	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
-				       adapter_handle.handle,    /* r4 */
-				       mw->ipz_mw_handle.handle, /* r5 */
-				       0, 0, 0, 0, 0);
-}
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-		      const u64 ressource_handle,
-		      void *rblock,
-		      unsigned long *byte_count)
-{
-	u64 r_cb = __pa(rblock);
-
-	if (r_cb & (EHCA_PAGESIZE-1)) {
-		ehca_gen_err("rblock not page aligned.");
-		return H_PARAMETER;
-	}
-
-	return ehca_plpar_hcall_norets(H_ERROR_DATA,
-				       adapter_handle.handle,
-				       ressource_handle,
-				       r_cb,
-				       0, 0, 0, 0);
-}
-
-u64 hipz_h_eoi(int irq)
-{
-	unsigned long xirr;
-
-	iosync();
-	xirr = (0xffULL << 24) | irq;
-
-	return plpar_hcall_norets(H_EOI, xirr);
-}
diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h
deleted file mode 100644
index a46e514c367b..000000000000
--- a/drivers/staging/rdma/ehca/hcp_if.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware Infiniband Interface code for POWER
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_IF_H__
-#define __HCP_IF_H__
-
-#include "ehca_classes.h"
-#include "ehca_tools.h"
-#include "hipz_hw.h"
-
-/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
- * resources, create the empty EQPT (ring).
- */
-u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u32 neq_control,
-			     const u32 number_of_entries,
-			     struct ipz_eq_handle *eq_handle,
-			     u32 * act_nr_of_entries,
-			     u32 * act_pages,
-			     u32 * eq_ist);
-
-u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
-		       struct ipz_eq_handle eq_handle,
-		       const u64 event_mask);
-/*
- * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
- * resources, create the empty CQPT (ring).
- */
-u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_cq *cq,
-			     struct ehca_alloc_cq_parms *param);
-
-
-/*
- * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
- * initialize resources, create empty QPPTs (2 rings).
- */
-u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_alloc_qp_parms *parms, int is_user);
-
-u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
-		      const u8 port_id,
-		      struct hipz_query_port *query_port_response_block);
-
-u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
-		       const u8 port_id, const u32 port_cap,
-		       const u8 init_type, const int modify_mask);
-
-u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
-		     struct hipz_query_hca *query_hca_rblock);
-
-/*
- * hipz_h_register_rpage internal function in hcp_if.h for all
- * hcp_H_REGISTER_RPAGE calls.
- */
-u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
-			  const u8 pagesize,
-			  const u8 queue_type,
-			  const u64 resource_handle,
-			  const u64 logical_address_of_page,
-			  u64 count);
-
-u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_eq_handle eq_handle,
-			     struct ehca_pfeq *pfeq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count);
-
-u64 hipz_h_query_int_state(const struct ipz_adapter_handle
-			   hcp_adapter_handle,
-			   u32 ist);
-
-u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_cq_handle cq_handle,
-			     struct ehca_pfcq *pfcq,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa gal);
-
-u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
-			     const struct ipz_qp_handle qp_handle,
-			     struct ehca_pfqp *pfqp,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count,
-			     const struct h_galpa galpa);
-
-u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
-			       const struct ipz_qp_handle qp_handle,
-			       struct ehca_pfqp *pfqp,
-			       void **log_addr_next_sq_wqe_tb_processed,
-			       void **log_addr_next_rq_wqe_tb_processed,
-			       int dis_and_get_function_code);
-enum hcall_sigt {
-	HCALL_SIGT_NO_CQE = 0,
-	HCALL_SIGT_BY_WQE = 1,
-	HCALL_SIGT_EVERY = 2
-};
-
-u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
-		     const struct ipz_qp_handle qp_handle,
-		     struct ehca_pfqp *pfqp,
-		     const u64 update_mask,
-		     struct hcp_modify_qp_control_block *mqpcb,
-		     struct h_galpa gal);
-
-u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
-		    const struct ipz_qp_handle qp_handle,
-		    struct ehca_pfqp *pfqp,
-		    struct hcp_modify_qp_control_block *qqpcb,
-		    struct h_galpa gal);
-
-u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_qp *qp);
-
-u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port);
-
-u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u32 port, u32 * pma_qp_nr,
-		       u32 * bma_qp_nr);
-
-u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
-		       const struct ipz_qp_handle qp_handle,
-		       struct h_galpa gal,
-		       u16 mcg_dlid,
-		       u64 subnet_prefix, u64 interface_id);
-
-u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_cq *cq,
-		      u8 force_flag);
-
-u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
-		      struct ehca_eq *eq);
-
-/*
- * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u64 vaddr,
-			     const u64 length,
-			     const u32 access_ctrl,
-			     const struct ipz_pd pd,
-			     struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
-u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mr *mr,
-			     const u8 pagesize,
-			     const u8 queue_type,
-			     const u64 logical_address_of_page,
-			     const u64 count);
-
-/* hipz_h_query_mr queries MR in HW and FW */
-u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mr *mr,
-		    struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mr frees MR resources in HW and FW */
-u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mr *mr);
-
-/* hipz_h_reregister_pmr reregisters MR in HW and FW */
-u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-			  const struct ehca_mr *mr,
-			  const u64 vaddr_in,
-			  const u64 length,
-			  const u32 access_ctrl,
-			  const struct ipz_pd pd,
-			  const u64 mr_addr_cb,
-			  struct ehca_mr_hipzout_parms *outparms);
-
-/* hipz_h_register_smr register shared MR in HW and FW */
-u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
-			const struct ehca_mr *mr,
-			const struct ehca_mr *orig_mr,
-			const u64 vaddr_in,
-			const u32 access_ctrl,
-			const struct ipz_pd pd,
-			struct ehca_mr_hipzout_parms *outparms);
-
-/*
- * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
- * resources.
- */
-u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			     const struct ehca_mw *mw,
-			     const struct ipz_pd pd,
-			     struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_query_mw queries MW in HW and FW */
-u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
-		    const struct ehca_mw *mw,
-		    struct ehca_mw_hipzout_parms *outparms);
-
-/* hipz_h_free_resource_mw frees MW resources in HW and FW */
-u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
-			    const struct ehca_mw *mw);
-
-u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
-		      const u64 ressource_handle,
-		      void *rblock,
-		      unsigned long *byte_count);
-u64 hipz_h_eoi(int irq);
-
-#endif /* __HCP_IF_H__ */
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c
deleted file mode 100644
index 077376ff3d28..000000000000
--- a/drivers/staging/rdma/ehca/hcp_phyp.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *   load store abstraction for ehca register access with tracing
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-u64 hcall_map_page(u64 physaddr)
-{
-	return (u64)ioremap(physaddr, EHCA_PAGESIZE);
-}
-
-int hcall_unmap_page(u64 mapaddr)
-{
-	iounmap((volatile void __iomem *) mapaddr);
-	return 0;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-		    u64 paddr_kernel, u64 paddr_user)
-{
-	if (!is_user) {
-		galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
-		if (!galpas->kernel.fw_handle)
-			return -ENOMEM;
-	} else
-		galpas->kernel.fw_handle = 0;
-
-	galpas->user.fw_handle = paddr_user;
-
-	return 0;
-}
-
-int hcp_galpas_dtor(struct h_galpas *galpas)
-{
-	if (galpas->kernel.fw_handle) {
-		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
-		if (ret)
-			return ret;
-	}
-
-	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h
deleted file mode 100644
index d1b029910249..000000000000
--- a/drivers/staging/rdma/ehca/hcp_phyp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  Firmware calls
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Waleri Fomin <fomin@de.ibm.com>
- *           Gerd Bayer <gerd.bayer@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HCP_PHYP_H__
-#define __HCP_PHYP_H__
-
-
-/*
- * eHCA page (mapped into memory)
- * resource to access eHCA register pages in CPU address space
-*/
-struct h_galpa {
-	u64 fw_handle;
-	/* for pSeries this is a 64bit memory address where
-	   I/O memory is mapped into CPU address space (kv) */
-};
-
-/*
- * resource to access eHCA address space registers, all types
- */
-struct h_galpas {
-	u32 pid;		/*PID of userspace galpa checking */
-	struct h_galpa user;	/* user space accessible resource,
-				   set to 0 if unused */
-	struct h_galpa kernel;	/* kernel space accessible resource,
-				   set to 0 if unused */
-};
-
-static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
-{
-	u64 addr = galpa.fw_handle + offset;
-	return *(volatile u64 __force *)addr;
-}
-
-static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
-{
-	u64 addr = galpa.fw_handle + offset;
-	*(volatile u64 __force *)addr = value;
-}
-
-int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
-		    u64 paddr_kernel, u64 paddr_user);
-
-int hcp_galpas_dtor(struct h_galpas *galpas);
-
-u64 hcall_map_page(u64 physaddr);
-
-int hcall_unmap_page(u64 mapaddr);
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h
deleted file mode 100644
index 9dac93d02140..000000000000
--- a/drivers/staging/rdma/ehca/hipz_fns.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_H__
-#define __HIPZ_FNS_H__
-
-#include "ehca_classes.h"
-#include "hipz_hw.h"
-
-#include "hipz_fns_core.h"
-
-#define hipz_galpa_store_eq(gal, offset, value) \
-	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_eq(gal, offset) \
-	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qped(gal, offset, value) \
-	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_qped(gal, offset) \
-	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
-
-#define hipz_galpa_store_mrmw(gal, offset, value) \
-	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_mrmw(gal, offset) \
-	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
-
-#endif
diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h
deleted file mode 100644
index 868735fd3187..000000000000
--- a/drivers/staging/rdma/ehca/hipz_fns_core.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  HW abstraction register functions
- *
- *  Authors: Christoph Raisch <raisch@de.ibm.com>
- *           Heiko J Schick <schickhj@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_FNS_CORE_H__
-#define __HIPZ_FNS_CORE_H__
-
-#include "hcp_phyp.h"
-#include "hipz_hw.h"
-
-#define hipz_galpa_store_cq(gal, offset, value) \
-	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
-
-#define hipz_galpa_load_cq(gal, offset) \
-	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-
-#define hipz_galpa_store_qp(gal, offset, value) \
-	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
-#define hipz_galpa_load_qp(gal, offset) \
-	hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
-
-static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-	/*  ringing doorbell :-) */
-	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
-			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
-}
-
-static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
-{
-	/*  ringing doorbell :-) */
-	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
-			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
-}
-
-static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
-{
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
-			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
-}
-
-static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
-{
-	u64 cqx_n0_reg;
-
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
-			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
-					   value));
-	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
-}
-
-static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
-{
-	u64 cqx_n1_reg;
-
-	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
-			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
-	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
-}
-
-#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h
deleted file mode 100644
index bf996c7acc42..000000000000
--- a/drivers/staging/rdma/ehca/hipz_hw.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  eHCA register definitions
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __HIPZ_HW_H__
-#define __HIPZ_HW_H__
-
-#include "ehca_tools.h"
-
-#define EHCA_MAX_MTU 4
-
-/* QP Table Entry Memory Map */
-struct hipz_qptemm {
-	u64 qpx_hcr;
-	u64 qpx_c;
-	u64 qpx_herr;
-	u64 qpx_aer;
-/* 0x20*/
-	u64 qpx_sqa;
-	u64 qpx_sqc;
-	u64 qpx_rqa;
-	u64 qpx_rqc;
-/* 0x40*/
-	u64 qpx_st;
-	u64 qpx_pmstate;
-	u64 qpx_pmfa;
-	u64 qpx_pkey;
-/* 0x60*/
-	u64 qpx_pkeya;
-	u64 qpx_pkeyb;
-	u64 qpx_pkeyc;
-	u64 qpx_pkeyd;
-/* 0x80*/
-	u64 qpx_qkey;
-	u64 qpx_dqp;
-	u64 qpx_dlidp;
-	u64 qpx_portp;
-/* 0xa0*/
-	u64 qpx_slidp;
-	u64 qpx_slidpp;
-	u64 qpx_dlida;
-	u64 qpx_porta;
-/* 0xc0*/
-	u64 qpx_slida;
-	u64 qpx_slidpa;
-	u64 qpx_slvl;
-	u64 qpx_ipd;
-/* 0xe0*/
-	u64 qpx_mtu;
-	u64 qpx_lato;
-	u64 qpx_rlimit;
-	u64 qpx_rnrlimit;
-/* 0x100*/
-	u64 qpx_t;
-	u64 qpx_sqhp;
-	u64 qpx_sqptp;
-	u64 qpx_nspsn;
-/* 0x120*/
-	u64 qpx_nspsnhwm;
-	u64 reserved1;
-	u64 qpx_sdsi;
-	u64 qpx_sdsbc;
-/* 0x140*/
-	u64 qpx_sqwsize;
-	u64 qpx_sqwts;
-	u64 qpx_lsn;
-	u64 qpx_nssn;
-/* 0x160 */
-	u64 qpx_mor;
-	u64 qpx_cor;
-	u64 qpx_sqsize;
-	u64 qpx_erc;
-/* 0x180*/
-	u64 qpx_rnrrc;
-	u64 qpx_ernrwt;
-	u64 qpx_rnrresp;
-	u64 qpx_lmsna;
-/* 0x1a0 */
-	u64 qpx_sqhpc;
-	u64 qpx_sqcptp;
-	u64 qpx_sigt;
-	u64 qpx_wqecnt;
-/* 0x1c0*/
-	u64 qpx_rqhp;
-	u64 qpx_rqptp;
-	u64 qpx_rqsize;
-	u64 qpx_nrr;
-/* 0x1e0*/
-	u64 qpx_rdmac;
-	u64 qpx_nrpsn;
-	u64 qpx_lapsn;
-	u64 qpx_lcr;
-/* 0x200*/
-	u64 qpx_rwc;
-	u64 qpx_rwva;
-	u64 qpx_rdsi;
-	u64 qpx_rdsbc;
-/* 0x220*/
-	u64 qpx_rqwsize;
-	u64 qpx_crmsn;
-	u64 qpx_rdd;
-	u64 qpx_larpsn;
-/* 0x240*/
-	u64 qpx_pd;
-	u64 qpx_scqn;
-	u64 qpx_rcqn;
-	u64 qpx_aeqn;
-/* 0x260*/
-	u64 qpx_aaelog;
-	u64 qpx_ram;
-	u64 qpx_rdmaqe0;
-	u64 qpx_rdmaqe1;
-/* 0x280*/
-	u64 qpx_rdmaqe2;
-	u64 qpx_rdmaqe3;
-	u64 qpx_nrpsnhwm;
-/* 0x298*/
-	u64 reserved[(0x400 - 0x298) / 8];
-/* 0x400 extended data */
-	u64 reserved_ext[(0x500 - 0x400) / 8];
-/* 0x500 */
-	u64 reserved2[(0x1000 - 0x500) / 8];
-/* 0x1000      */
-};
-
-#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
-#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
-
-/* MRMWPT Entry Memory Map */
-struct hipz_mrmwmm {
-	/* 0x00 */
-	u64 mrx_hcr;
-
-	u64 mrx_c;
-	u64 mrx_herr;
-	u64 mrx_aer;
-	/* 0x20 */
-	u64 mrx_pp;
-	u64 reserved1;
-	u64 reserved2;
-	u64 reserved3;
-	/* 0x40 */
-	u64 reserved4[(0x200 - 0x40) / 8];
-	/* 0x200 */
-	u64 mrx_ctl[64];
-
-};
-
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
-
-struct hipz_qpedmm {
-	/* 0x00 */
-	u64 reserved0[(0x400) / 8];
-	/* 0x400 */
-	u64 qpedx_phh;
-	u64 qpedx_ppsgp;
-	/* 0x410 */
-	u64 qpedx_ppsgu;
-	u64 qpedx_ppdgp;
-	/* 0x420 */
-	u64 qpedx_ppdgu;
-	u64 qpedx_aph;
-	/* 0x430 */
-	u64 qpedx_apsgp;
-	u64 qpedx_apsgu;
-	/* 0x440 */
-	u64 qpedx_apdgp;
-	u64 qpedx_apdgu;
-	/* 0x450 */
-	u64 qpedx_apav;
-	u64 qpedx_apsav;
-	/* 0x460  */
-	u64 qpedx_hcr;
-	u64 reserved1[4];
-	/* 0x488 */
-	u64 qpedx_rrl0;
-	/* 0x490 */
-	u64 qpedx_rrrkey0;
-	u64 qpedx_rrva0;
-	/* 0x4a0 */
-	u64 reserved2;
-	u64 qpedx_rrl1;
-	/* 0x4b0 */
-	u64 qpedx_rrrkey1;
-	u64 qpedx_rrva1;
-	/* 0x4c0 */
-	u64 reserved3;
-	u64 qpedx_rrl2;
-	/* 0x4d0 */
-	u64 qpedx_rrrkey2;
-	u64 qpedx_rrva2;
-	/* 0x4e0 */
-	u64 reserved4;
-	u64 qpedx_rrl3;
-	/* 0x4f0 */
-	u64 qpedx_rrrkey3;
-	u64 qpedx_rrva3;
-};
-
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
-
-/* CQ Table Entry Memory Map */
-struct hipz_cqtemm {
-	u64 cqx_hcr;
-	u64 cqx_c;
-	u64 cqx_herr;
-	u64 cqx_aer;
-/* 0x20  */
-	u64 cqx_ptp;
-	u64 cqx_tp;
-	u64 cqx_fec;
-	u64 cqx_feca;
-/* 0x40  */
-	u64 cqx_ep;
-	u64 cqx_eq;
-/* 0x50  */
-	u64 reserved1;
-	u64 cqx_n0;
-/* 0x60  */
-	u64 cqx_n1;
-	u64 reserved2[(0x1000 - 0x60) / 8];
-/* 0x1000 */
-};
-
-#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32, 63)
-#define CQX_FECADDER              EHCA_BMASK_IBM(32, 63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
-
-/* EQ Table Entry Memory Map */
-struct hipz_eqtemm {
-	u64 eqx_hcr;
-	u64 eqx_c;
-
-	u64 eqx_herr;
-	u64 eqx_aer;
-/* 0x20 */
-	u64 eqx_ptp;
-	u64 eqx_tp;
-	u64 eqx_ssba;
-	u64 eqx_psba;
-
-/* 0x40 */
-	u64 eqx_cec;
-	u64 eqx_meql;
-	u64 eqx_xisbi;
-	u64 eqx_xisc;
-/* 0x60 */
-	u64 eqx_it;
-
-};
-
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
-
-/* access control defines for MR/MW */
-#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
-#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
-#define HIPZ_ACCESSCTRL_R_READ   0x00200000
-#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
-#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
-
-/* query hca response block */
-struct hipz_query_hca {
-	u32 cur_reliable_dg;
-	u32 cur_qp;
-	u32 cur_cq;
-	u32 cur_eq;
-	u32 cur_mr;
-	u32 cur_mw;
-	u32 cur_ee_context;
-	u32 cur_mcast_grp;
-	u32 cur_qp_attached_mcast_grp;
-	u32 reserved1;
-	u32 cur_ipv6_qp;
-	u32 cur_eth_qp;
-	u32 cur_hp_mr;
-	u32 reserved2[3];
-	u32 max_rd_domain;
-	u32 max_qp;
-	u32 max_cq;
-	u32 max_eq;
-	u32 max_mr;
-	u32 max_hp_mr;
-	u32 max_mw;
-	u32 max_mrwpte;
-	u32 max_special_mrwpte;
-	u32 max_rd_ee_context;
-	u32 max_mcast_grp;
-	u32 max_total_mcast_qp_attach;
-	u32 max_mcast_qp_attach;
-	u32 max_raw_ipv6_qp;
-	u32 max_raw_ethy_qp;
-	u32 internal_clock_frequency;
-	u32 max_pd;
-	u32 max_ah;
-	u32 max_cqe;
-	u32 max_wqes_wq;
-	u32 max_partitions;
-	u32 max_rr_ee_context;
-	u32 max_rr_qp;
-	u32 max_rr_hca;
-	u32 max_act_wqs_ee_context;
-	u32 max_act_wqs_qp;
-	u32 max_sge;
-	u32 max_sge_rd;
-	u32 memory_page_size_supported;
-	u64 max_mr_size;
-	u32 local_ca_ack_delay;
-	u32 num_ports;
-	u32 vendor_id;
-	u32 vendor_part_id;
-	u32 hw_ver;
-	u64 node_guid;
-	u64 hca_cap_indicators;
-	u32 data_counter_register_size;
-	u32 max_shared_rq;
-	u32 max_isns_eq;
-	u32 max_neq;
-} __attribute__ ((packed));
-
-#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
-#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
-#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
-#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
-#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
-#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
-#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
-#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
-#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
-#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
-#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
-#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
-#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
-#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
-#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
-#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
-#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
-#define HCA_CAP_H_ALLOC_RES_SYNC      EHCA_BMASK_IBM(19, 19)
-
-/* query port response block */
-struct hipz_query_port {
-	u32 state;
-	u32 bad_pkey_cntr;
-	u32 lmc;
-	u32 lid;
-	u32 subnet_timeout;
-	u32 qkey_viol_cntr;
-	u32 sm_sl;
-	u32 sm_lid;
-	u32 capability_mask;
-	u32 init_type_reply;
-	u32 pkey_tbl_len;
-	u32 gid_tbl_len;
-	u64 gid_prefix;
-	u32 port_nr;
-	u16 pkey_entries[16];
-	u8  reserved1[32];
-	u32 trent_size;
-	u32 trbuf_size;
-	u64 max_msg_sz;
-	u32 max_mtu;
-	u32 vl_cap;
-	u32 phys_pstate;
-	u32 phys_state;
-	u32 phys_speed;
-	u32 phys_width;
-	u8  reserved2[1884];
-	u64 guid_entries[255];
-} __attribute__ ((packed));
-
-#endif
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c
deleted file mode 100644
index 7ffc748cb973..000000000000
--- a/drivers/staging/rdma/ehca/ipz_pt_fn.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-
-#include "ehca_tools.h"
-#include "ipz_pt_fn.h"
-#include "ehca_classes.h"
-
-#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
-
-struct kmem_cache *small_qp_cache;
-
-void *ipz_qpageit_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	queue->current_q_offset += queue->pagesize;
-	if (queue->current_q_offset > queue->queue_length) {
-		queue->current_q_offset -= queue->pagesize;
-		ret = NULL;
-	}
-	if (((u64)ret) % queue->pagesize) {
-		ehca_gen_err("ERROR!! not at PAGE-Boundary");
-		return NULL;
-	}
-	return ret;
-}
-
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
-
-	queue->current_q_offset += queue->qe_size;
-	if (queue->current_q_offset > last_entry_in_q) {
-		queue->current_q_offset = 0;
-		queue->toggle_state = (~queue->toggle_state) & 1;
-	}
-
-	return ret;
-}
-
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
-{
-	int i;
-	for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
-		u64 page = __pa(queue->queue_pages[i]);
-		if (addr >= page && addr < page + queue->pagesize) {
-			*q_offset = addr - page + i * queue->pagesize;
-			return 0;
-		}
-	}
-	return -EINVAL;
-}
-
-#if PAGE_SHIFT < EHCA_PAGESHIFT
-#error Kernel pages must be at least as large than eHCA pages (4K) !
-#endif
-
-/*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
-static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
-{
-	int k, f = 0;
-	u8 *kpage;
-
-	while (f < nr_of_pages) {
-		kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
-		if (!kpage)
-			goto out;
-
-		for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
-			queue->queue_pages[f] = (struct ipz_page *)kpage;
-			kpage += EHCA_PAGESIZE;
-			f++;
-		}
-	}
-	return 1;
-
-out:
-	for (f = 0; f < nr_of_pages && queue->queue_pages[f];
-	     f += PAGES_PER_KPAGE)
-		free_page((unsigned long)(queue->queue_pages)[f]);
-	return 0;
-}
-
-static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-	int order = ilog2(queue->pagesize) - 9;
-	struct ipz_small_queue_page *page;
-	unsigned long bit;
-
-	mutex_lock(&pd->lock);
-
-	if (!list_empty(&pd->free[order]))
-		page = list_entry(pd->free[order].next,
-				  struct ipz_small_queue_page, list);
-	else {
-		page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
-		if (!page)
-			goto out;
-
-		page->page = get_zeroed_page(GFP_KERNEL);
-		if (!page->page) {
-			kmem_cache_free(small_qp_cache, page);
-			goto out;
-		}
-
-		list_add(&page->list, &pd->free[order]);
-	}
-
-	bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
-	__set_bit(bit, page->bitmap);
-	page->fill++;
-
-	if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
-		list_move(&page->list, &pd->full[order]);
-
-	mutex_unlock(&pd->lock);
-
-	queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
-	queue->small_page = page;
-	queue->offset = bit << (order + 9);
-	return 1;
-
-out:
-	ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
-	mutex_unlock(&pd->lock);
-	return 0;
-}
-
-static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
-{
-	int order = ilog2(queue->pagesize) - 9;
-	struct ipz_small_queue_page *page = queue->small_page;
-	unsigned long bit;
-	int free_page = 0;
-
-	bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
-		>> (order + 9);
-
-	mutex_lock(&pd->lock);
-
-	__clear_bit(bit, page->bitmap);
-	page->fill--;
-
-	if (page->fill == 0) {
-		list_del(&page->list);
-		free_page = 1;
-	}
-
-	if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
-		/* the page was full until we freed the chunk */
-		list_move_tail(&page->list, &pd->free[order]);
-
-	mutex_unlock(&pd->lock);
-
-	if (free_page) {
-		free_page(page->page);
-		kmem_cache_free(small_qp_cache, page);
-	}
-}
-
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-		   const u32 nr_of_pages, const u32 pagesize,
-		   const u32 qe_size, const u32 nr_of_sg,
-		   int is_small)
-{
-	if (pagesize > PAGE_SIZE) {
-		ehca_gen_err("FATAL ERROR: pagesize=%x "
-			     "is greater than kernel page size", pagesize);
-		return 0;
-	}
-
-	/* init queue fields */
-	queue->queue_length = nr_of_pages * pagesize;
-	queue->pagesize = pagesize;
-	queue->qe_size = qe_size;
-	queue->act_nr_of_sg = nr_of_sg;
-	queue->current_q_offset = 0;
-	queue->toggle_state = 1;
-	queue->small_page = NULL;
-
-	/* allocate queue page pointers */
-	queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
-				     GFP_KERNEL | __GFP_NOWARN);
-	if (!queue->queue_pages) {
-		queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
-		if (!queue->queue_pages) {
-			ehca_gen_err("Couldn't allocate queue page list");
-			return 0;
-		}
-	}
-
-	/* allocate actual queue pages */
-	if (is_small) {
-		if (!alloc_small_queue_page(queue, pd))
-			goto ipz_queue_ctor_exit0;
-	} else
-		if (!alloc_queue_pages(queue, nr_of_pages))
-			goto ipz_queue_ctor_exit0;
-
-	return 1;
-
-ipz_queue_ctor_exit0:
-	ehca_gen_err("Couldn't alloc pages queue=%p "
-		 "nr_of_pages=%x",  queue, nr_of_pages);
-	kvfree(queue->queue_pages);
-
-	return 0;
-}
-
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
-{
-	int i, nr_pages;
-
-	if (!queue || !queue->queue_pages) {
-		ehca_gen_dbg("queue or queue_pages is NULL");
-		return 0;
-	}
-
-	if (queue->small_page)
-		free_small_queue_page(queue, pd);
-	else {
-		nr_pages = queue->queue_length / queue->pagesize;
-		for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
-			free_page((unsigned long)queue->queue_pages[i]);
-	}
-
-	kvfree(queue->queue_pages);
-
-	return 1;
-}
-
-int ehca_init_small_qp_cache(void)
-{
-	small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
-					   sizeof(struct ipz_small_queue_page),
-					   0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!small_qp_cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void ehca_cleanup_small_qp_cache(void)
-{
-	kmem_cache_destroy(small_qp_cache);
-}
diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h
deleted file mode 100644
index a801274ea337..000000000000
--- a/drivers/staging/rdma/ehca/ipz_pt_fn.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- *  IBM eServer eHCA Infiniband device driver for Linux on POWER
- *
- *  internal queue handling
- *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Reinhard Ernst <rernst@de.ibm.com>
- *           Christoph Raisch <raisch@de.ibm.com>
- *
- *  Copyright (c) 2005 IBM Corporation
- *
- *  All rights reserved.
- *
- *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
- *  BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __IPZ_PT_FN_H__
-#define __IPZ_PT_FN_H__
-
-#define EHCA_PAGESHIFT   12
-#define EHCA_PAGESIZE   4096UL
-#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
-#define EHCA_PT_ENTRIES 512UL
-
-#include "ehca_tools.h"
-#include "ehca_qes.h"
-
-struct ehca_pd;
-struct ipz_small_queue_page;
-
-extern struct kmem_cache *small_qp_cache;
-
-/* struct generic ehca page */
-struct ipz_page {
-	u8 entries[EHCA_PAGESIZE];
-};
-
-#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
-
-struct ipz_small_queue_page {
-	unsigned long page;
-	unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
-	int fill;
-	void *mapped_addr;
-	u32 mmap_count;
-	struct list_head list;
-};
-
-/* struct generic queue in linux kernel virtual memory (kv) */
-struct ipz_queue {
-	u64 current_q_offset;	/* current queue entry */
-
-	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
-	u32 qe_size;		/* queue entry size */
-	u32 act_nr_of_sg;
-	u32 queue_length;	/* queue length allocated in bytes */
-	u32 pagesize;
-	u32 toggle_state;	/* toggle flag - per page */
-	u32 offset; /* save offset within page for small_qp */
-	struct ipz_small_queue_page *small_page;
-};
-
-/*
- * return current Queue Entry for a certain q_offset
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
-{
-	struct ipz_page *current_page;
-	if (q_offset >= queue->queue_length)
-		return NULL;
-	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-	return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
-}
-
-/*
- * return current Queue Entry
- * returns address (kv) of Queue Entry
- */
-static inline void *ipz_qeit_get(struct ipz_queue *queue)
-{
-	return ipz_qeit_calc(queue, queue->current_q_offset);
-}
-
-/*
- * return current Queue Page , increment Queue Page iterator from
- * page to page in struct ipz_queue, last increment will return 0! and
- * NOT wrap
- * returns address (kv) of Queue Page
- * warning don't use in parallel with ipz_QE_get_inc()
- */
-void *ipz_qpageit_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	queue->current_q_offset += queue->qe_size;
-	if (queue->current_q_offset >= queue->queue_length) {
-		queue->current_q_offset = 0;
-		/* toggle the valid flag */
-		queue->toggle_state = (~queue->toggle_state) & 1;
-	}
-
-	return ret;
-}
-
-/*
- * return a bool indicating whether current Queue Entry is valid
- */
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-	struct ehca_cqe *cqe = ipz_qeit_get(queue);
-	return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
-}
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_qpageit_get_inc()
- */
-static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
-{
-	return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
-}
-
-/*
- * returns and resets Queue Entry iterator
- * returns address (kv) of first Queue Entry
- */
-static inline void *ipz_qeit_reset(struct ipz_queue *queue)
-{
-	queue->current_q_offset = 0;
-	return ipz_qeit_get(queue);
-}
-
-/*
- * return the q_offset corresponding to an absolute address
- */
-int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
-
-/*
- * return the next queue offset. don't modify the queue.
- */
-static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
-{
-	offset += queue->qe_size;
-	if (offset >= queue->queue_length) offset = 0;
-	return offset;
-}
-
-/* struct generic page table */
-struct ipz_pt {
-	u64 entries[EHCA_PT_ENTRIES];
-};
-
-/* struct page table for a queue, only to be used in pf */
-struct ipz_qpt {
-	/* queue page tables (kv), use u64 because we know the element length */
-	u64 *qpts;
-	u32 n_qpts;
-	u32 n_ptes;       /*  number of page table entries */
-	u64 *current_pte_addr;
-};
-
-/*
- * constructor for a ipz_queue_t, placement new for ipz_queue_t,
- * new for all dependent datastructors
- * all QP Tables are the same
- * flow:
- *    allocate+pin queue
- * see ipz_qpt_ctor()
- * returns true if ok, false if out of memory
- */
-int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
-		   const u32 nr_of_pages, const u32 pagesize,
-		   const u32 qe_size, const u32 nr_of_sg,
-		   int is_small);
-
-/*
- * destructor for a ipz_queue_t
- *  -# free queue
- *  see ipz_queue_ctor()
- *  returns true if ok, false if queue was NULL-ptr of free failed
- */
-int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
-
-/*
- * constructor for a ipz_qpt_t,
- * placement new for struct ipz_queue, new for all dependent datastructors
- * all QP Tables are the same,
- * flow:
- * -# allocate+pin queue
- * -# initialise ptcb
- * -# allocate+pin PTs
- * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
- * -# the ring must have room for exactly nr_of_PTEs
- * see ipz_qpt_ctor()
- */
-void ipz_qpt_ctor(struct ipz_qpt *qpt,
-		  const u32 nr_of_qes,
-		  const u32 pagesize,
-		  const u32 qe_size,
-		  const u8 lowbyte, const u8 toggle,
-		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
-
-/*
- * return current Queue Entry, increment Queue Entry iterator by one
- * step in struct ipz_queue, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- * fix EQ page problems
- */
-void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
-
-/*
- * return current Event Queue Entry, increment Queue Entry iterator
- * by one step in struct ipz_queue if valid, will wrap in ringbuffer
- * returns address (kv) of Queue Entry BEFORE increment
- * returns 0 and does not increment, if wrong valid state
- * warning don't use in parallel with ipz_queue_QPageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
- */
-static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u32 qe = *(u8 *)ret;
-	if ((qe >> 7) != (queue->toggle_state & 1))
-		return NULL;
-	ipz_qeit_eq_get_inc(queue); /* this is a good one */
-	return ret;
-}
-
-static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
-{
-	void *ret = ipz_qeit_get(queue);
-	u32 qe = *(u8 *)ret;
-	if ((qe >> 7) != (queue->toggle_state & 1))
-		return NULL;
-	return ret;
-}
-
-/* returns address (GX) of first queue entry */
-static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
-{
-	return be64_to_cpu(qpt->qpts[0]);
-}
-
-/* returns address (kv) of first page of queue page table */
-static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
-{
-	return qpt->qpts;
-}
-
-#endif				/* __IPZ_PT_FN_H__ */
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig
index fd25078ee923..3e668d852f03 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/staging/rdma/hfi1/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_HFI1
 	tristate "Intel OPA Gen1 support"
-	depends on X86_64
+	depends on X86_64 && INFINIBAND_RDMAVT
+	select MMU_NOTIFIER
 	default m
 	---help---
 	This is a low-level driver for Intel OPA Gen1 adapter.
@@ -25,13 +26,3 @@ config SDMA_VERBOSITY
 	---help---
 	This is a configuration flag to enable verbose
 	SDMA debug
-config PRESCAN_RXQ
-	bool "Enable prescanning of the RX queue for ECNs"
-	depends on INFINIBAND_HFI1
-	default n
-	---help---
-	This option toggles the prescanning of the receive queue for
-	Explicit Congestion Notifications. If an ECN is detected, it
-	is processed as quickly as possible, the ECN is toggled off.
-	After the prescanning step, the receive queue is processed as
-	usual.
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile
index 68c5a315e557..8dc59382ee96 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/staging/rdma/hfi1/Makefile
@@ -7,10 +7,12 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
-hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \
-	init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \
-	qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \
-	uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o
+hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+	eprom.o file_ops.o firmware.o \
+	init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
+	qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+	uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
+	verbs_txreq.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c
new file mode 100644
index 000000000000..2cb8ca77f876
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/topology.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+#include "trace.h"
+
+struct cpu_mask_set {
+	struct cpumask mask;
+	struct cpumask used;
+	uint gen;
+};
+
+struct hfi1_affinity {
+	struct cpu_mask_set def_intr;
+	struct cpu_mask_set rcv_intr;
+	struct cpu_mask_set proc;
+	/* spin lock to protect affinity struct */
+	spinlock_t lock;
+};
+
+/* Name of IRQ types, indexed by enum irq_type */
+static const char * const irq_type_names[] = {
+	"SDMA",
+	"RCVCTXT",
+	"GENERAL",
+	"OTHER",
+};
+
+static inline void init_cpu_mask_set(struct cpu_mask_set *set)
+{
+	cpumask_clear(&set->mask);
+	cpumask_clear(&set->used);
+	set->gen = 0;
+}
+
+/*
+ * Interrupt affinity.
+ *
+ * non-rcv avail gets a default mask that
+ * starts as possible cpus with threads reset
+ * and each rcv avail reset.
+ *
+ * rcv avail gets node relative 1 wrapping back
+ * to the node relative 1 as necessary.
+ *
+ */
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+{
+	int node = pcibus_to_node(dd->pcidev->bus);
+	struct hfi1_affinity *info;
+	const struct cpumask *local_mask;
+	int curr_cpu, possible, i, ht;
+
+	if (node < 0)
+		node = numa_node_id();
+	dd->node = node;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	spin_lock_init(&info->lock);
+
+	init_cpu_mask_set(&info->def_intr);
+	init_cpu_mask_set(&info->rcv_intr);
+	init_cpu_mask_set(&info->proc);
+
+	local_mask = cpumask_of_node(dd->node);
+	if (cpumask_first(local_mask) >= nr_cpu_ids)
+		local_mask = topology_core_cpumask(0);
+	/* use local mask as default */
+	cpumask_copy(&info->def_intr.mask, local_mask);
+	/*
+	 * Remove HT cores from the default mask.  Do this in two steps below.
+	 */
+	possible = cpumask_weight(&info->def_intr.mask);
+	ht = cpumask_weight(topology_sibling_cpumask(
+					cpumask_first(&info->def_intr.mask)));
+	/*
+	 * Step 1.  Skip over the first N HT siblings and use them as the
+	 * "real" cores.  Assumes that HT cores are not enumerated in
+	 * succession (except in the single core case).
+	 */
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	for (i = 0; i < possible / ht; i++)
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	/*
+	 * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
+	 * skip any gaps.
+	 */
+	for (; i < possible; i++) {
+		cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	}
+
+	/*  fill in the receive list */
+	possible = cpumask_weight(&info->def_intr.mask);
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	if (possible == 1) {
+		/*  only one CPU, everyone will use it */
+		cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+	} else {
+		/*
+		 * Retain the first CPU in the default list for the control
+		 * context.
+		 */
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+		/*
+		 * Remove the remaining kernel receive queues from
+		 * the default list and add them to the receive list.
+		 */
+		for (i = 0; i < dd->n_krcv_queues - 1; i++) {
+			cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+			cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+			curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+			if (curr_cpu >= nr_cpu_ids)
+				break;
+		}
+	}
+
+	cpumask_copy(&info->proc.mask, cpu_online_mask);
+	dd->affinity = info;
+	return 0;
+}
+
+void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
+{
+	kfree(dd->affinity);
+}
+
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+	int ret;
+	cpumask_var_t diff;
+	struct cpu_mask_set *set;
+	struct sdma_engine *sde = NULL;
+	struct hfi1_ctxtdata *rcd = NULL;
+	char extra[64];
+	int cpu = -1;
+
+	extra[0] = '\0';
+	cpumask_clear(&msix->mask);
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde = (struct sdma_engine *)msix->arg;
+		scnprintf(extra, 64, "engine %u", sde->this_idx);
+		/* fall through */
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		if (rcd->ctxt == HFI1_CTRL_CTXT) {
+			set = &dd->affinity->def_intr;
+			cpu = cpumask_first(&set->mask);
+		} else {
+			set = &dd->affinity->rcv_intr;
+		}
+		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+		break;
+	default:
+		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
+		return -EINVAL;
+	}
+
+	/*
+	 * The control receive context is placed on a particular CPU, which
+	 * is set above.  Skip accounting for it.  Everything else finds its
+	 * CPU here.
+	 */
+	if (cpu == -1) {
+		spin_lock(&dd->affinity->lock);
+		if (cpumask_equal(&set->mask, &set->used)) {
+			/*
+			 * We've used up all the CPUs, bump up the generation
+			 * and reset the 'used' map
+			 */
+			set->gen++;
+			cpumask_clear(&set->used);
+		}
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpu = cpumask_first(diff);
+		cpumask_set_cpu(cpu, &set->used);
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde->cpu = cpu;
+		break;
+	case IRQ_GENERAL:
+	case IRQ_RCVCTXT:
+	case IRQ_OTHER:
+		break;
+	}
+
+	cpumask_set_cpu(cpu, &msix->mask);
+	dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
+		    msix->msix.vector, irq_type_names[msix->type],
+		    extra, cpu);
+	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+	free_cpumask_var(diff);
+	return 0;
+}
+
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+			   struct hfi1_msix_entry *msix)
+{
+	struct cpu_mask_set *set = NULL;
+	struct hfi1_ctxtdata *rcd;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		/* only do accounting for non control contexts */
+		if (rcd->ctxt != HFI1_CTRL_CTXT)
+			set = &dd->affinity->rcv_intr;
+		break;
+	default:
+		return;
+	}
+
+	if (set) {
+		spin_lock(&dd->affinity->lock);
+		cpumask_andnot(&set->used, &set->used, &msix->mask);
+		if (cpumask_empty(&set->used) && set->gen) {
+			set->gen--;
+			cpumask_copy(&set->used, &set->mask);
+		}
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	irq_set_affinity_hint(msix->msix.vector, NULL);
+	cpumask_clear(&msix->mask);
+}
+
+int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+{
+	int cpu = -1, ret;
+	cpumask_var_t diff, mask, intrs;
+	const struct cpumask *node_mask,
+		*proc_mask = tsk_cpus_allowed(current);
+	struct cpu_mask_set *set = &dd->affinity->proc;
+	char buf[1024];
+
+	/*
+	 * check whether process/context affinity has already
+	 * been set
+	 */
+	if (cpumask_weight(proc_mask) == 1) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
+			  current->pid, current->comm, buf);
+		/*
+		 * Mark the pre-set CPU as used. This is atomic so we don't
+		 * need the lock
+		 */
+		cpu = cpumask_first(proc_mask);
+		cpumask_set_cpu(cpu, &set->used);
+		goto done;
+	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
+			  current->pid, current->comm, buf);
+		goto done;
+	}
+
+	/*
+	 * The process does not have a preset CPU affinity so find one to
+	 * recommend. We prefer CPUs on the same NUMA as the device.
+	 */
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		goto done;
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret)
+		goto free_diff;
+	ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+	if (!ret)
+		goto free_mask;
+
+	spin_lock(&dd->affinity->lock);
+	/*
+	 * If we've used all available CPUs, clear the mask and start
+	 * overloading.
+	 */
+	if (cpumask_equal(&set->mask, &set->used)) {
+		set->gen++;
+		cpumask_clear(&set->used);
+	}
+
+	/* CPUs used by interrupt handlers */
+	cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
+			     &dd->affinity->def_intr.mask :
+			     &dd->affinity->def_intr.used));
+	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
+				  &dd->affinity->rcv_intr.mask :
+				  &dd->affinity->rcv_intr.used));
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
+	hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+
+	/*
+	 * If we don't have a NUMA node requested, preference is towards
+	 * device NUMA node
+	 */
+	if (node == -1)
+		node = dd->node;
+	node_mask = cpumask_of_node(node);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
+	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+
+	/* diff will hold all unused cpus */
+	cpumask_andnot(diff, &set->mask, &set->used);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
+	hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+
+	/* get cpumask of available CPUs on preferred NUMA */
+	cpumask_and(mask, diff, node_mask);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+
+	/*
+	 * At first, we don't want to place processes on the same
+	 * CPUs as interrupt handlers.
+	 */
+	cpumask_andnot(diff, mask, intrs);
+	if (!cpumask_empty(diff))
+		cpumask_copy(mask, diff);
+
+	/*
+	 * if we don't have a cpu on the preferred NUMA, get
+	 * the list of the remaining available CPUs
+	 */
+	if (cpumask_empty(mask)) {
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpumask_andnot(mask, diff, node_mask);
+	}
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids) /* empty */
+		cpu = -1;
+	else
+		cpumask_set_cpu(cpu, &set->used);
+	spin_unlock(&dd->affinity->lock);
+
+	free_cpumask_var(intrs);
+free_mask:
+	free_cpumask_var(mask);
+free_diff:
+	free_cpumask_var(diff);
+done:
+	return cpu;
+}
+
+void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+{
+	struct cpu_mask_set *set = &dd->affinity->proc;
+
+	if (cpu < 0)
+		return;
+	spin_lock(&dd->affinity->lock);
+	cpumask_clear_cpu(cpu, &set->used);
+	if (cpumask_empty(&set->used) && set->gen) {
+		set->gen--;
+		cpumask_copy(&set->used, &set->mask);
+	}
+	spin_unlock(&dd->affinity->lock);
+}
+
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h
new file mode 100644
index 000000000000..b287e4963024
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_AFFINITY_H
+#define _HFI1_AFFINITY_H
+
+#include "hfi.h"
+
+enum irq_type {
+	IRQ_SDMA,
+	IRQ_RCVCTXT,
+	IRQ_GENERAL,
+	IRQ_OTHER
+};
+
+/* Can be used for both memory and cpu */
+enum affinity_flags {
+	AFF_AUTO,
+	AFF_NUMA_LOCAL,
+	AFF_DEV_LOCAL,
+	AFF_IRQ_LOCAL
+};
+
+struct hfi1_msix_entry;
+
+/* Initialize driver affinity data */
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
+/* Free driver affinity data */
+void hfi1_dev_affinity_free(struct hfi1_devdata *);
+/*
+ * Set IRQ affinity to a CPU. The function will determine the
+ * CPU and set the affinity to it.
+ */
+int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Remove the IRQ's CPU affinity. This function also updates
+ * any internal CPU tracking data
+ */
+void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Determine a CPU affinity for a user process, if the process does not
+ * have an affinity set yet.
+ */
+int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+/* Release a CPU used by a user process. */
+void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+
+#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h
new file mode 100644
index 000000000000..0d58fe3b49b5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/aspm.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _ASPM_H
+#define _ASPM_H
+
+#include "hfi.h"
+
+extern uint aspm_mode;
+
+enum aspm_mode {
+	ASPM_MODE_DISABLED = 0,	/* ASPM always disabled, performance mode */
+	ASPM_MODE_ENABLED = 1,	/* ASPM always enabled, power saving mode */
+	ASPM_MODE_DYNAMIC = 2,	/* ASPM enabled/disabled dynamically */
+};
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+	(((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+	u32 up, dn;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return false;
+
+	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+	dn = ASPM_L1_SUPPORTED(dn);
+
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+	up = ASPM_L1_SUPPORTED(up);
+
+	/* ASPM works on A-step but is reported as not supported */
+	return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+	u32 l1_ent_lat = 0x4u;
+	u32 reg32;
+
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+	reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+	reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return;
+
+	/* Enable ASPM L1 first in upstream component and then downstream */
+	pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/* Disable ASPM L1 first in downstream component and then upstream */
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, 0x0);
+	if (parent)
+		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static inline void aspm_enable(struct hfi1_devdata *dd)
+{
+	if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+	    !dd->aspm_supported)
+		return;
+
+	aspm_hw_enable_l1(dd);
+	dd->aspm_enabled = true;
+}
+
+static inline void aspm_disable(struct hfi1_devdata *dd)
+{
+	if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+		return;
+
+	aspm_hw_disable_l1(dd);
+	dd->aspm_enabled = false;
+}
+
+static inline void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	aspm_disable(dd);
+	atomic_inc(&dd->aspm_disabled_cnt);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static inline void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+		aspm_enable(dd);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+	bool restart_timer;
+	bool close_interrupts;
+	unsigned long flags;
+	ktime_t now, prev;
+
+	/* Quickest exit for minimum impact */
+	if (!rcd->aspm_intr_supported)
+		return;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	/* PSM contexts are open */
+	if (!rcd->aspm_intr_enable)
+		goto unlock;
+
+	prev = rcd->aspm_ts_last_intr;
+	now = ktime_get();
+	rcd->aspm_ts_last_intr = now;
+
+	/* An interrupt pair close together in time */
+	close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+	/* Don't push out our timer till this much time has elapsed */
+	restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+				    ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+	restart_timer = restart_timer && close_interrupts;
+
+	/* Disable ASPM and schedule timer */
+	if (rcd->aspm_enabled && close_interrupts) {
+		aspm_disable_inc(rcd->dd);
+		rcd->aspm_enabled = false;
+		restart_timer = true;
+	}
+
+	if (restart_timer) {
+		mod_timer(&rcd->aspm_timer,
+			  jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+		rcd->aspm_ts_timer_sched = now;
+	}
+unlock:
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static inline void aspm_ctx_timer_function(unsigned long data)
+{
+	struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	aspm_enable_dec(rcd->dd);
+	rcd->aspm_enabled = true;
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+static inline void aspm_disable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		del_timer_sync(&rcd->aspm_timer);
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = false;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+
+	aspm_disable(dd);
+	atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+static inline void aspm_enable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	aspm_enable(dd);
+
+	if (aspm_mode != ASPM_MODE_DYNAMIC)
+		return;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = true;
+		rcd->aspm_enabled = true;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+}
+
+static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+	spin_lock_init(&rcd->aspm_lock);
+	setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function,
+		    (unsigned long)rcd);
+	rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+		aspm_mode == ASPM_MODE_DYNAMIC &&
+		rcd->ctxt < rcd->dd->first_user_ctxt;
+}
+
+static inline void aspm_init(struct hfi1_devdata *dd)
+{
+	unsigned i;
+
+	spin_lock_init(&dd->aspm_lock);
+	dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+	for (i = 0; i < dd->first_user_ctxt; i++)
+		aspm_ctx_init(dd->rcd[i]);
+
+	/* Start with ASPM disabled */
+	aspm_hw_set_l1_ent_latency(dd);
+	dd->aspm_enabled = false;
+	aspm_hw_disable_l1(dd);
+
+	/* Now turn on ASPM if configured */
+	aspm_enable_all(dd);
+}
+
+static inline void aspm_exit(struct hfi1_devdata *dd)
+{
+	aspm_disable_all(dd);
+
+	/* Turn on ASPM on exit to conserve power */
+	aspm_enable(dd);
+}
+
+#endif /* _ASPM_H */
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index bbe5ad85cec0..16eb653903e0 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -64,6 +61,8 @@
 #include "sdma.h"
 #include "eprom.h"
 #include "efivar.h"
+#include "platform.h"
+#include "aspm.h"
 
 #define NUM_IB_PORTS 1
 
@@ -420,10 +419,10 @@ static struct flag_table pio_err_status_flags[] = {
 	SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
 /*23*/	FLAG_ENTRY("PioWriteQwValidParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
 /*24*/	FLAG_ENTRY("PioBlockQwCountParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
 /*25*/	FLAG_ENTRY("PioVlfVlLenParity",
 	SEC_SPC_FREEZE,
@@ -509,6 +508,12 @@ static struct flag_table sdma_err_status_flags[] = {
 		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
 		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
 
+/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */
+#define PORT_DISCARD_EGRESS_ERRS \
+	(SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_ERR_SMASK)
+
 /*
  * TXE Egress Error flags
  */
@@ -936,7 +941,7 @@ static struct flag_table dc8051_err_flags[] = {
 	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
 	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
 	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
-		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
+		    D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
 	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
 };
 
@@ -950,7 +955,7 @@ static struct flag_table dc8051_info_err_flags[] = {
 	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
 	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
 	FLAG_ENTRY0("Serdes internal loopback failure",
-					FAILED_SERDES_INTERNAL_LOOPBACK),
+		    FAILED_SERDES_INTERNAL_LOOPBACK),
 	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
 	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
 	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
@@ -958,7 +963,8 @@ static struct flag_table dc8051_info_err_flags[] = {
 	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
-	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
+	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT),
+	FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT)
 };
 
 /*
@@ -978,7 +984,6 @@ static struct flag_table dc8051_info_host_msg_flags[] = {
 	FLAG_ENTRY0("Link going down", 0x0100),
 };
 
-
 static u32 encoded_size(u32 size);
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
@@ -1140,11 +1145,8 @@ struct cntr_entry {
 	/*
 	 * accessor for stat element, context either dd or ppd
 	 */
-	u64 (*rw_cntr)(const struct cntr_entry *,
-			       void *context,
-			       int vl,
-			       int mode,
-			       u64 data);
+	u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl,
+		       int mode, u64 data);
 };
 
 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
@@ -1188,7 +1190,7 @@ CNTR_ELEM(#name, \
 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
 #define OVR_ELM(ctx) \
 CNTR_ELEM("RcvHdrOvr" #ctx, \
-	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
+	  (RCV_HDR_OVFL_CNT + ctx * 0x100), \
 	  0, CNTR_NORMAL, port_access_u64_csr)
 
 /* 32bit TXE */
@@ -1250,11 +1252,8 @@ CNTR_ELEM(#name, \
 
 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
 {
-	u64 val;
-
 	if (dd->flags & HFI1_PRESENT) {
-		val = readq((void __iomem *)dd->kregbase + offset);
-		return val;
+		return readq((void __iomem *)dd->kregbase + offset);
 	}
 	return -1;
 }
@@ -1277,7 +1276,6 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
 {
 	u64 ret;
 
-
 	if (mode == CNTR_MODE_R) {
 		ret = read_csr(dd, csr);
 	} else if (mode == CNTR_MODE_W) {
@@ -1294,17 +1292,65 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
 
 /* Dev Access */
 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
-			    void *context, int vl, int mode, u64 data)
+			      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
+	u64 csr = entry->csr;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
-	return read_write_csr(dd, entry->csr, mode, data);
+	if (entry->flags & CNTR_SDMA) {
+		if (vl == CNTR_INVALID_VL)
+			return 0;
+		csr += 0x100 * vl;
+	} else {
+		if (vl != CNTR_INVALID_VL)
+			return 0;
+	}
+	return read_write_csr(dd, csr, mode, data);
+}
+
+static u64 access_sde_err_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].err_cnt;
+	return 0;
+}
+
+static u64 access_sde_int_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].sdma_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry,
+				   void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].idle_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry,
+				       void *context, int idx, int mode,
+				       u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].progress_int_cnt;
+	return 0;
 }
 
 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1325,7 +1371,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
 }
 
 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 	u32 csr = entry->csr;
@@ -1349,7 +1395,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
 
 /* Port Access */
 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
-			     int vl, int mode, u64 data)
+			       int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1359,7 +1405,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
 }
 
 static u64 port_access_u64_csr(const struct cntr_entry *entry,
-			     void *context, int vl, int mode, u64 data)
+			       void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 	u64 val;
@@ -1399,7 +1445,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
 }
 
 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1409,7 +1455,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
 }
 
 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1430,18 +1476,25 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
-				    void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
-	struct hfi1_pportdata *ppd = context;
+	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
+	u64 zero = 0;
+	u64 *counter;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
+	if (vl == CNTR_INVALID_VL)
+		counter = &ppd->port_xmit_discards;
+	else if (vl >= 0 && vl < C_VL_COUNT)
+		counter = &ppd->port_xmit_discards_vl[vl];
+	else
+		counter = &zero;
 
-	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
+	return read_write_sw(ppd->dd, counter, mode, data);
 }
 
 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				       void *context, int vl, int mode,
+				       u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1453,7 +1506,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
 }
 
 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1478,7 +1531,6 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
 			  u64 __percpu *cntr,
 			  int vl, int mode, u64 data)
 {
-
 	u64 ret = 0;
 
 	if (vl != CNTR_INVALID_VL)
@@ -1510,7 +1562,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
-			      void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1526,6 +1578,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry,
 	return dd->verbs_dev.n_piowait;
 }
 
+static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+			       void *context, int vl, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	return dd->verbs_dev.n_piodrain;
+}
+
 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
 			      void *context, int vl, int mode, u64 data)
 {
@@ -1543,11 +1603,12 @@ static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
-			       void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
 
-	return dd->verbs_dev.n_send_schedule;
+	return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl,
+			      mode, data);
 }
 
 /* Software counters for the error status bits within MISC_ERR_STATUS */
@@ -3885,8 +3946,8 @@ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,		      \
 			      void *context, int vl, int mode, u64 data)      \
 {									      \
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
-	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
-			      ppd->ibport_data.cntr, vl,		      \
+	return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr,	      \
+			      ppd->ibport_data.rvp.cntr, vl,		      \
 			      mode, data);				      \
 }
 
@@ -3903,7 +3964,7 @@ static u64 access_ibp_##cntr(const struct cntr_entry *entry,		      \
 	if (vl != CNTR_INVALID_VL)					      \
 		return 0;						      \
 									      \
-	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
+	return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr,	      \
 			     mode, data);				      \
 }
 
@@ -4066,10 +4127,28 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 			    access_sw_vtx_wait),
 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
 			    access_sw_pio_wait),
+[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+			    access_sw_pio_drain),
 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
 			    access_sw_kmem_wait),
 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
 			    access_sw_send_schedule),
+[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
+				      SEND_DMA_DESC_FETCHED_CNT, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      dev_access_u32_csr),
+[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_int_cnt),
+[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_err_cnt),
+[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0,
+				  CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				  access_sde_idle_int_cnt),
+[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      access_sde_progress_int_cnt),
 /* MISC_ERR_STATUS */
 [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0,
 				CNTR_NORMAL,
@@ -4879,28 +4958,28 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				     CNTR_SYNTH | CNTR_VL),
 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_dn_cnt),
+			     access_sw_link_dn_cnt),
 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_up_cnt),
+			   access_sw_link_up_cnt),
 [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL,
 				 access_sw_unknown_frame_cnt),
 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_xmit_discards),
+			     access_sw_xmit_discards),
 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
-			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
-			access_sw_xmit_discards),
+				CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
+				access_sw_xmit_discards),
 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
-			access_xmit_constraint_errs),
+				 access_xmit_constraint_errs),
 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
-			access_rcv_constraint_errs),
+				access_rcv_constraint_errs),
 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
@@ -4916,9 +4995,9 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
 			       access_sw_cpu_rc_acks),
 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_qacks),
+				access_sw_cpu_rc_qacks),
 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_delayed_comp),
+				       access_sw_cpu_rc_delayed_comp),
 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
@@ -5067,7 +5146,7 @@ done:
  * the buffer.  End in '*' if the buffer is too short.
  */
 static char *flag_string(char *buf, int buf_len, u64 flags,
-				struct flag_table *table, int table_size)
+			 struct flag_table *table, int table_size)
 {
 	char extra[32];
 	char *p = buf;
@@ -5128,10 +5207,8 @@ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
 	if (source < ARRAY_SIZE(cce_misc_names))
 		strncpy(buf, cce_misc_names[source], bsize);
 	else
-		snprintf(buf,
-			bsize,
-			"Reserved%u",
-			source + IS_GENERAL_ERR_START);
+		snprintf(buf, bsize, "Reserved%u",
+			 source + IS_GENERAL_ERR_START);
 
 	return buf;
 }
@@ -5170,7 +5247,7 @@ static char *is_various_name(char *buf, size_t bsize, unsigned int source)
 	if (source < ARRAY_SIZE(various_names))
 		strncpy(buf, various_names[source], bsize);
 	else
-		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
+		snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
 	return buf;
 }
 
@@ -5255,51 +5332,56 @@ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
+			   cce_err_status_flags,
+			   ARRAY_SIZE(cce_err_status_flags));
 }
 
 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
+			   rxe_err_status_flags,
+			   ARRAY_SIZE(rxe_err_status_flags));
 }
 
 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags, misc_err_status_flags,
-			ARRAY_SIZE(misc_err_status_flags));
+			   ARRAY_SIZE(misc_err_status_flags));
 }
 
 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
+			   pio_err_status_flags,
+			   ARRAY_SIZE(pio_err_status_flags));
 }
 
 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sdma_err_status_flags,
-			ARRAY_SIZE(sdma_err_status_flags));
+			   sdma_err_status_flags,
+			   ARRAY_SIZE(sdma_err_status_flags));
 }
 
 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
+			   egress_err_status_flags,
+			   ARRAY_SIZE(egress_err_status_flags));
 }
 
 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
+			   egress_err_info_flags,
+			   ARRAY_SIZE(egress_err_info_flags));
 }
 
 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			send_err_status_flags,
-			ARRAY_SIZE(send_err_status_flags));
+			   send_err_status_flags,
+			   ARRAY_SIZE(send_err_status_flags));
 }
 
 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -5312,7 +5394,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	 * report or record it.
 	 */
 	dd_dev_info(dd, "CCE Error: %s\n",
-		cce_err_status_string(buf, sizeof(buf), reg));
+		    cce_err_status_string(buf, sizeof(buf), reg));
 
 	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
 	    is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
@@ -5342,14 +5424,14 @@ static void update_rcverr_timer(unsigned long opaque)
 	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 
 	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
-		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
+	    ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
-		set_link_down_reason(ppd,
-		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
-			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
+		set_link_down_reason(
+		ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
+		OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
 	}
-	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
+	dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
 
 	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
 }
@@ -5375,7 +5457,7 @@ static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Receive Error: %s\n",
-		rxe_err_status_string(buf, sizeof(buf), reg));
+		    rxe_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_RXE_FREEZE_ERR) {
 		int flags = 0;
@@ -5402,7 +5484,7 @@ static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Misc Error: %s",
-		misc_err_status_string(buf, sizeof(buf), reg));
+		    misc_err_status_string(buf, sizeof(buf), reg));
 	for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
 			incr_cntr64(&dd->misc_err_status_cnt[i]);
@@ -5415,7 +5497,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "PIO Error: %s\n",
-		pio_err_status_string(buf, sizeof(buf), reg));
+		    pio_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_PIO_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5432,7 +5514,7 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "SDMA Error: %s\n",
-		sdma_err_status_string(buf, sizeof(buf), reg));
+		    sdma_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_SDMA_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5443,12 +5525,14 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	}
 }
 
-static void count_port_inactive(struct hfi1_devdata *dd)
+static inline void __count_port_discards(struct hfi1_pportdata *ppd)
 {
-	struct hfi1_pportdata *ppd = dd->pport;
+	incr_cntr64(&ppd->port_xmit_discards);
+}
 
-	if (ppd->port_xmit_discards < ~(u64)0)
-		ppd->port_xmit_discards++;
+static void count_port_inactive(struct hfi1_devdata *dd)
+{
+	__count_port_discards(dd->pport);
 }
 
 /*
@@ -5460,7 +5544,8 @@ static void count_port_inactive(struct hfi1_devdata *dd)
  * egress error if more than one packet fails the same integrity check
  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
  */
-static void handle_send_egress_err_info(struct hfi1_devdata *dd)
+static void handle_send_egress_err_info(struct hfi1_devdata *dd,
+					int vl)
 {
 	struct hfi1_pportdata *ppd = dd->pport;
 	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
@@ -5471,14 +5556,44 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
 
 	dd_dev_info(dd,
-		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
-		info, egress_err_info_string(buf, sizeof(buf), info), src);
+		    "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
+		    info, egress_err_info_string(buf, sizeof(buf), info), src);
 
 	/* Eventually add other counters for each bit */
+	if (info & PORT_DISCARD_EGRESS_ERRS) {
+		int weight, i;
 
-	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
-		if (ppd->port_xmit_discards < ~(u64)0)
-			ppd->port_xmit_discards++;
+		/*
+		 * Count all applicable bits as individual errors and
+		 * attribute them to the packet that triggered this handler.
+		 * This may not be completely accurate due to limitations
+		 * on the available hardware error information.  There is
+		 * a single information register and any number of error
+		 * packets may have occurred and contributed to it before
+		 * this routine is called.  This means that:
+		 * a) If multiple packets with the same error occur before
+		 *    this routine is called, earlier packets are missed.
+		 *    There is only a single bit for each error type.
+		 * b) Errors may not be attributed to the correct VL.
+		 *    The driver is attributing all bits in the info register
+		 *    to the packet that triggered this call, but bits
+		 *    could be an accumulation of different packets with
+		 *    different VLs.
+		 * c) A single error packet may have multiple counts attached
+		 *    to it.  There is no way for the driver to know if
+		 *    multiple bits set in the info register are due to a
+		 *    single packet or multiple packets.  The driver assumes
+		 *    multiple packets.
+		 */
+		weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS);
+		for (i = 0; i < weight; i++) {
+			__count_port_discards(ppd);
+			if (vl >= 0 && vl < TXE_NUM_DATA_VL)
+				incr_cntr64(&ppd->port_xmit_discards_vl[vl]);
+			else if (vl == 15)
+				incr_cntr64(&ppd->port_xmit_discards_vl
+					    [C_VL_15]);
+		}
 	}
 }
 
@@ -5496,12 +5611,71 @@ static inline int port_inactive_err(u64 posn)
  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
  * register. Does it represent a 'disallowed packet' error?
  */
-static inline int disallowed_pkt_err(u64 posn)
+static inline int disallowed_pkt_err(int posn)
 {
 	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
 		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
 }
 
+/*
+ * Input value is a bit position of one of the SDMA engine disallowed
+ * packet errors.  Return which engine.  Use of this must be guarded by
+ * disallowed_pkt_err().
+ */
+static inline int disallowed_pkt_engine(int posn)
+{
+	return posn - SEES(TX_SDMA0_DISALLOWED_PACKET);
+}
+
+/*
+ * Translate an SDMA engine to a VL.  Return -1 if the tranlation cannot
+ * be done.
+ */
+static int engine_to_vl(struct hfi1_devdata *dd, int engine)
+{
+	struct sdma_vl_map *m;
+	int vl;
+
+	/* range check */
+	if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES)
+		return -1;
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->sdma_map);
+	vl = m->engine_to_vl[engine];
+	rcu_read_unlock();
+
+	return vl;
+}
+
+/*
+ * Translate the send context (sofware index) into a VL.  Return -1 if the
+ * translation cannot be done.
+ */
+static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
+{
+	struct send_context_info *sci;
+	struct send_context *sc;
+	int i;
+
+	sci = &dd->send_contexts[sw_index];
+
+	/* there is no information for user (PSM) and ack contexts */
+	if (sci->type != SC_KERNEL)
+		return -1;
+
+	sc = sci->sc;
+	if (!sc)
+		return -1;
+	if (dd->vld[15].sc == sc)
+		return 15;
+	for (i = 0; i < num_vls; i++)
+		if (dd->vld[i].sc == sc)
+			return i;
+
+	return -1;
+}
+
 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 {
 	u64 reg_copy = reg, handled = 0;
@@ -5510,34 +5684,34 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 
 	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
-	if (is_ax(dd) && (reg &
-		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
-		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
+	else if (is_ax(dd) &&
+		 (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) &&
+		 (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
 		start_freeze_handling(dd->pport, 0);
 
 	while (reg_copy) {
 		int posn = fls64(reg_copy);
-		/*
-		 * fls64() returns a 1-based offset, but we generally
-		 * want 0-based offsets.
-		 */
+		/* fls64() returns a 1-based offset, we want it zero based */
 		int shift = posn - 1;
+		u64 mask = 1ULL << shift;
 
 		if (port_inactive_err(shift)) {
 			count_port_inactive(dd);
-			handled |= (1ULL << shift);
+			handled |= mask;
 		} else if (disallowed_pkt_err(shift)) {
-			handle_send_egress_err_info(dd);
-			handled |= (1ULL << shift);
+			int vl = engine_to_vl(dd, disallowed_pkt_engine(shift));
+
+			handle_send_egress_err_info(dd, vl);
+			handled |= mask;
 		}
-		clear_bit(shift, (unsigned long *)&reg_copy);
+		reg_copy &= ~mask;
 	}
 
 	reg &= ~handled;
 
 	if (reg)
 		dd_dev_info(dd, "Egress Error: %s\n",
-			egress_err_status_string(buf, sizeof(buf), reg));
+			    egress_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5551,7 +5725,7 @@ static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Send Error: %s\n",
-		send_err_status_string(buf, sizeof(buf), reg));
+		    send_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5597,7 +5771,7 @@ static void interrupt_clear_down(struct hfi1_devdata *dd,
 			u64 mask;
 
 			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
-				eri->desc, reg);
+				   eri->desc, reg);
 			/*
 			 * Read-modify-write so any other masked bits
 			 * remain masked.
@@ -5621,14 +5795,15 @@ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
 		interrupt_clear_down(dd, 0, eri);
 	} else {
 		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
-			source);
+			   source);
 	}
 }
 
 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
+			   sc_err_status_flags,
+			   ARRAY_SIZE(sc_err_status_flags));
 }
 
 /*
@@ -5653,15 +5828,15 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	sw_index = dd->hw_to_sw[hw_context];
 	if (sw_index >= dd->num_send_contexts) {
 		dd_dev_err(dd,
-			"out of range sw index %u for send context %u\n",
-			sw_index, hw_context);
+			   "out of range sw index %u for send context %u\n",
+			   sw_index, hw_context);
 		return;
 	}
 	sci = &dd->send_contexts[sw_index];
 	sc = sci->sc;
 	if (!sc) {
 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
-			sw_index, hw_context);
+			   sw_index, hw_context);
 		return;
 	}
 
@@ -5671,10 +5846,11 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
 
 	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
-		send_context_err_status_string(flags, sizeof(flags), status));
+		    send_context_err_status_string(flags, sizeof(flags),
+						   status));
 
 	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
-		handle_send_egress_err_info(dd);
+		handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index));
 
 	/*
 	 * Automatically restart halted kernel contexts out of interrupt
@@ -5707,6 +5883,7 @@ static void handle_sdma_eng_err(struct hfi1_devdata *dd,
 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
 		   sde->this_idx, source, (unsigned long long)status);
 #endif
+	sde->err_cnt++;
 	sdma_engine_error(sde, status);
 
 	/*
@@ -5755,23 +5932,22 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
 		interrupt_clear_down(dd, 0, eri);
 	else
 		dd_dev_info(dd,
-			"%s: Unimplemented/reserved interrupt %d\n",
-			__func__, source);
+			    "%s: Unimplemented/reserved interrupt %d\n",
+			    __func__, source);
 }
 
 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 {
-	/* source is always zero */
+	/* src_ctx is always zero */
 	struct hfi1_pportdata *ppd = dd->pport;
 	unsigned long flags;
 	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 
 	if (reg & QSFP_HFI0_MODPRST_N) {
-
-		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
-				__func__);
-
 		if (!qsfp_mod_present(ppd)) {
+			dd_dev_info(dd, "%s: QSFP module removed\n",
+				    __func__);
+
 			ppd->driver_link_ready = 0;
 			/*
 			 * Cable removed, reset all our information about the
@@ -5784,14 +5960,23 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 			 * an interrupt when a cable is inserted
 			 */
 			ppd->qsfp_info.cache_valid = 0;
-			ppd->qsfp_info.qsfp_interrupt_functional = 0;
+			ppd->qsfp_info.reset_needed = 0;
+			ppd->qsfp_info.limiting_active = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+					       flags);
+			/* Invert the ModPresent pin now to detect plug-in */
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			if ((ppd->offline_disabled_reason >
+			  HFI1_ODR_MASK(
+			  OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) ||
+			  (ppd->offline_disabled_reason ==
+			  HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
+				ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+
 			if (ppd->host_link_state == HLS_DN_POLL) {
 				/*
 				 * The link is still in POLL. This means
@@ -5802,28 +5987,33 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 			}
 		} else {
+			dd_dev_info(dd, "%s: QSFP module inserted\n",
+				    __func__);
+
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.cache_valid = 0;
 			ppd->qsfp_info.cache_refresh_required = 1;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
+					       flags);
 
+			/*
+			 * Stop inversion of ModPresent pin to detect
+			 * removal of the cable
+			 */
 			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 		}
 	}
 
 	if (reg & QSFP_HFI0_INT_N) {
-
-		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
-				__func__);
+		dd_dev_info(dd, "%s: Interrupt received from QSFP module\n",
+			    __func__);
 		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 		ppd->qsfp_info.check_interrupt_flags = 1;
-		ppd->qsfp_info.qsfp_interrupt_functional = 1;
 		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
 	}
 
@@ -5837,11 +6027,11 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5851,11 +6041,11 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_GRANT_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5867,8 +6057,8 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
-				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK |
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
 }
 
 /*
@@ -5878,7 +6068,7 @@ static inline void set_host_lcb_access(struct hfi1_devdata *dd)
 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
 }
 
 /*
@@ -5912,7 +6102,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 	/* this access is valid only when the link is up */
 	if ((ppd->host_link_state & HLS_UP) == 0) {
 		dd_dev_info(dd, "%s: link state %s not up\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 		ret = -EBUSY;
 		goto done;
 	}
@@ -5921,8 +6111,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 		ret = request_host_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to acquire LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to acquire LCB access, err %d\n",
+				   __func__, ret);
 			goto done;
 		}
 		set_host_lcb_access(dd);
@@ -5959,7 +6149,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 
 	if (dd->lcb_access_count == 0) {
 		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
-			__func__);
+			   __func__);
 		goto done;
 	}
 
@@ -5968,8 +6158,8 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 		ret = request_8051_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to release LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to release LCB access, err %d\n",
+				   __func__, ret);
 			/* restore host access if the grant didn't work */
 			set_host_lcb_access(dd);
 			goto done;
@@ -6001,19 +6191,26 @@ static void init_lcb_access(struct hfi1_devdata *dd)
 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
 {
 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
-		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
-		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
-		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
+		  DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK |
+		  (u64)return_code <<
+		  DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT |
+		  (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
 }
 
 /*
- * Handle requests from the 8051.
+ * Handle host requests from the 8051.
+ *
+ * This is a work-queue function outside of the interrupt.
  */
-static void handle_8051_request(struct hfi1_devdata *dd)
+void handle_8051_request(struct work_struct *work)
 {
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+							dc_host_req_work);
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 reg;
-	u16 data;
-	u8 type;
+	u16 data = 0;
+	u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
 
 	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
 	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@ -6034,12 +6231,46 @@ static void handle_8051_request(struct hfi1_devdata *dd)
 	case HREQ_READ_CONFIG:
 	case HREQ_SET_TX_EQ_ABS:
 	case HREQ_SET_TX_EQ_REL:
-	case HREQ_ENABLE:
 		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
-			type);
+			    type);
 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
 		break;
 
+	case HREQ_ENABLE:
+		lanes = data & 0xF;
+		for (i = 0; lanes; lanes >>= 1, i++) {
+			if (!(lanes & 1))
+				continue;
+			if (data & 0x200) {
+				/* enable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte |= (1 << (i + 4));
+			} else {
+				/* disable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte &= ~(1 << (i + 4));
+			}
+
+			if (data & 0x800) {
+				/* enable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte |= (1 << i);
+			} else {
+				/* disable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte &= ~(1 << i);
+			}
+		}
+		one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+			       &cdr_ctrl_byte, 1);
+		hreq_response(dd, HREQ_SUCCESS, data);
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+		break;
+
 	case HREQ_CONFIG_DONE:
 		hreq_response(dd, HREQ_SUCCESS, 0);
 		break;
@@ -6059,11 +6290,11 @@ static void write_global_credit(struct hfi1_devdata *dd,
 				u8 vau, u16 total, u16 shared)
 {
 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-		((u64)total
-			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
-		| ((u64)shared
-			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
-		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+		  ((u64)total <<
+		   SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
+		  ((u64)shared <<
+		   SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
+		  ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
 }
 
 /*
@@ -6100,7 +6331,7 @@ void reset_link_credits(struct hfi1_devdata *dd)
 
 	/* remove all previous VL credit limits */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	write_global_credit(dd, 0, 0, 0);
 	/* reset the CM block */
@@ -6142,15 +6373,14 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
 	write_csr(dd, DC_LCB_CFG_RUN, 0);
 	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
-		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
+		  1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
 	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
 	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
 	reg = read_csr(dd, DCC_CFG_RESET);
-	write_csr(dd, DCC_CFG_RESET,
-		reg
-		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
-		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
-	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
+	write_csr(dd, DCC_CFG_RESET, reg |
+		  (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
+		  (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+	(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
 	if (!abort) {
 		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
 		write_csr(dd, DCC_CFG_RESET, reg);
@@ -6179,14 +6409,18 @@ static void dc_shutdown(struct hfi1_devdata *dd)
 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
 	/* Shutdown the LCB */
 	lcb_shutdown(dd, 1);
-	/* Going to OFFLINE would have causes the 8051 to put the
+	/*
+	 * Going to OFFLINE would have causes the 8051 to put the
 	 * SerDes into reset already. Just need to shut down the 8051,
-	 * itself. */
+	 * itself.
+	 */
 	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
 }
 
-/* Calling this after the DC has been brought out of reset should not
- * do any damage. */
+/*
+ * Calling this after the DC has been brought out of reset should not
+ * do any damage.
+ */
 static void dc_start(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -6202,7 +6436,7 @@ static void dc_start(struct hfi1_devdata *dd)
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) {
 		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
-			__func__);
+			   __func__);
 	}
 	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
 	write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -6295,7 +6529,7 @@ static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
 	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
 	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
 	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
-		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+		  DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
 }
 
@@ -6312,8 +6546,10 @@ void handle_sma_message(struct work_struct *work)
 	u64 msg;
 	int ret;
 
-	/* msg is bytes 1-4 of the 40-bit idle message - the command code
-	   is stripped off */
+	/*
+	 * msg is bytes 1-4 of the 40-bit idle message - the command code
+	 * is stripped off
+	 */
 	ret = read_idle_sma(dd, &msg);
 	if (ret)
 		return;
@@ -6339,8 +6575,8 @@ void handle_sma_message(struct work_struct *work)
 		 *
 		 * Can activate the node.  Discard otherwise.
 		 */
-		if (ppd->host_link_state == HLS_UP_ARMED
-					&& ppd->is_active_optimize_enabled) {
+		if (ppd->host_link_state == HLS_UP_ARMED &&
+		    ppd->is_active_optimize_enabled) {
 			ppd->neighbor_normal = 1;
 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
 			if (ret)
@@ -6352,8 +6588,8 @@ void handle_sma_message(struct work_struct *work)
 		break;
 	default:
 		dd_dev_err(dd,
-			"%s: received unexpected SMA idle message 0x%llx\n",
-			__func__, msg);
+			   "%s: received unexpected SMA idle message 0x%llx\n",
+			   __func__, msg);
 		break;
 	}
 }
@@ -6445,10 +6681,9 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
 
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
-				freeze ? "" : "un",
-				reg & ALL_FROZE,
-				freeze ? ALL_FROZE : 0ull);
+				   "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
+				   freeze ? "" : "un", reg & ALL_FROZE,
+				   freeze ? ALL_FROZE : 0ull);
 			return;
 		}
 		usleep_range(80, 120);
@@ -6478,11 +6713,17 @@ static void rxe_freeze(struct hfi1_devdata *dd)
  */
 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
 {
+	u32 rcvmask;
 	int i;
 
 	/* enable all kernel contexts */
-	for (i = 0; i < dd->n_krcv_queues; i++)
-		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
+	for (i = 0; i < dd->n_krcv_queues; i++) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
+		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
+		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
+		hfi1_rcvctrl(dd, rcvmask, i);
+	}
 
 	/* enable port */
 	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -6567,7 +6808,7 @@ void handle_freeze(struct work_struct *work)
 void handle_link_up(struct work_struct *work)
 {
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
-								link_up_work);
+						  link_up_work);
 	set_link_state(ppd, HLS_UP_INIT);
 
 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
@@ -6586,17 +6827,20 @@ void handle_link_up(struct work_struct *work)
 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
 		/* oops - current speed is not enabled, bounce */
 		dd_dev_err(ppd->dd,
-			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
-			ppd->link_speed_active, ppd->link_speed_enabled);
+			   "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
+			   ppd->link_speed_active, ppd->link_speed_enabled);
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
-			OPA_LINKDOWN_REASON_SPEED_POLICY);
+				     OPA_LINKDOWN_REASON_SPEED_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
 
-/* Several pieces of LNI information were cached for SMA in ppd.
- * Reset these on link down */
+/*
+ * Several pieces of LNI information were cached for SMA in ppd.
+ * Reset these on link down
+ */
 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
 {
 	ppd->neighbor_guid = 0;
@@ -6616,7 +6860,13 @@ void handle_link_down(struct work_struct *work)
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
 								link_down_work);
 
-	/* go offline first, then deal with reasons */
+	if ((ppd->host_link_state &
+	     (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
+	     ppd->port_type == PORT_TYPE_FIXED)
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
+
+	/* Go offline first, then deal with reading/writing through 8051 */
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	lcl_reason = 0;
@@ -6636,12 +6886,16 @@ void handle_link_down(struct work_struct *work)
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
 
-	/* If there is no cable attached, turn the DC off. Otherwise,
-	 * start the link bring up. */
-	if (!qsfp_mod_present(ppd))
+	/*
+	 * If there is no cable attached, turn the DC off. Otherwise,
+	 * start the link bring up.
+	 */
+	if (!qsfp_mod_present(ppd)) {
 		dc_shutdown(ppd->dd);
-	else
+	} else {
+		tune_serdes(ppd);
 		start_link(ppd);
+	}
 }
 
 void handle_link_bounce(struct work_struct *work)
@@ -6654,10 +6908,11 @@ void handle_link_bounce(struct work_struct *work)
 	 */
 	if (ppd->host_link_state & HLS_UP) {
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	} else {
 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 	}
 }
 
@@ -6754,7 +7009,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
 	case 3: return OPA_LINK_WIDTH_3X;
 	default:
 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
-			__func__, width);
+			    __func__, width);
 		/* fall through */
 	case 4: return OPA_LINK_WIDTH_4X;
 	}
@@ -6766,6 +7021,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
 static const u8 bit_counts[16] = {
 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
 };
+
 static inline u8 nibble_to_count(u8 nibble)
 {
 	return bit_counts[nibble & 0xf];
@@ -6791,7 +7047,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 
 	/* read the active lanes */
 	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-				&rx_polarity_inversion, &max_rate);
+			 &rx_polarity_inversion, &max_rate);
 	read_local_lni(dd, &enable_lane_rx);
 
 	/* convert to counts */
@@ -6803,8 +7059,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
 	 * set the max_rate field in handle_verify_cap until v0.19.
 	 */
-	if ((dd->icode == ICODE_RTL_SILICON)
-				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
+	if ((dd->icode == ICODE_RTL_SILICON) &&
+	    (dd->dc8051_ver < dc8051_ver(0, 19))) {
 		/* max_rate: 0 = 12.5G, 1 = 25G */
 		switch (max_rate) {
 		case 0:
@@ -6812,8 +7068,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 			break;
 		default:
 			dd_dev_err(dd,
-				"%s: unexpected max rate %d, using 25Gb\n",
-				__func__, (int)max_rate);
+				   "%s: unexpected max rate %d, using 25Gb\n",
+				   __func__, (int)max_rate);
 			/* fall through */
 		case 1:
 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
@@ -6822,8 +7078,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 	}
 
 	dd_dev_info(dd,
-		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
-		enable_lane_tx, tx, enable_lane_rx, rx);
+		    "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
+		    enable_lane_tx, tx, enable_lane_rx, rx);
 	*tx_width = link_width_to_bits(dd, tx);
 	*rx_width = link_width_to_bits(dd, rx);
 }
@@ -6926,13 +7182,8 @@ void handle_verify_cap(struct work_struct *work)
 	 */
 
 	read_vc_remote_phy(dd, &power_management, &continious);
-	read_vc_remote_fabric(
-		dd,
-		&vau,
-		&z,
-		&vcu,
-		&vl15buf,
-		&partner_supported_crc);
+	read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
+			      &partner_supported_crc);
 	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
 	read_remote_device_id(dd, &device_id, &device_rev);
 	/*
@@ -6943,19 +7194,16 @@ void handle_verify_cap(struct work_struct *work)
 	/* print the active widths */
 	get_link_widths(dd, &active_tx, &active_rx);
 	dd_dev_info(dd,
-		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
-		(int)power_management, (int)continious);
+		    "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
+		    (int)power_management, (int)continious);
 	dd_dev_info(dd,
-		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
-		(int)vau,
-		(int)z,
-		(int)vcu,
-		(int)vl15buf,
-		(int)partner_supported_crc);
+		    "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
+		    (int)vau, (int)z, (int)vcu, (int)vl15buf,
+		    (int)partner_supported_crc);
 	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
-		(u32)remote_tx_rate, (u32)link_widths);
+		    (u32)remote_tx_rate, (u32)link_widths);
 	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
-		(u32)device_id, (u32)device_rev);
+		    (u32)device_id, (u32)device_rev);
 	/*
 	 * The peer vAU value just read is the peer receiver value.  HFI does
 	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
@@ -6990,10 +7238,10 @@ void handle_verify_cap(struct work_struct *work)
 	reg = read_csr(dd, SEND_CM_CTRL);
 	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
 		write_csr(dd, SEND_CM_CTRL,
-			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	} else {
 		write_csr(dd, SEND_CM_CTRL,
-			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	}
 
 	ppd->link_speed_active = 0;	/* invalid value */
@@ -7018,7 +7266,7 @@ void handle_verify_cap(struct work_struct *work)
 	}
 	if (ppd->link_speed_active == 0) {
 		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
-			__func__, (int)remote_tx_rate);
+			   __func__, (int)remote_tx_rate);
 		ppd->link_speed_active = OPA_LINK_SPEED_25G;
 	}
 
@@ -7074,9 +7322,9 @@ void handle_verify_cap(struct work_struct *work)
 		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
 		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
 	dd_dev_info(dd,
-		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
-		ppd->neighbor_guid, ppd->neighbor_type,
-		ppd->mgmt_allowed, ppd->neighbor_fm_security);
+		    "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
+		    ppd->neighbor_guid, ppd->neighbor_type,
+		    ppd->mgmt_allowed, ppd->neighbor_fm_security);
 	if (ppd->mgmt_allowed)
 		add_full_mgmt_pkey(ppd);
 
@@ -7130,28 +7378,27 @@ retry:
 
 		/* bounce if not at starting active width */
 		if ((ppd->link_width_active !=
-					ppd->link_width_downgrade_tx_active)
-				|| (ppd->link_width_active !=
-					ppd->link_width_downgrade_rx_active)) {
+		     ppd->link_width_downgrade_tx_active) ||
+		    (ppd->link_width_active !=
+		     ppd->link_width_downgrade_rx_active)) {
 			dd_dev_err(ppd->dd,
-				"Link downgrade is disabled and link has downgraded, downing link\n");
+				   "Link downgrade is disabled and link has downgraded, downing link\n");
 			dd_dev_err(ppd->dd,
-				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
-				ppd->link_width_active,
-				ppd->link_width_downgrade_tx_active,
-				ppd->link_width_downgrade_rx_active);
+				   "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
+				   ppd->link_width_active,
+				   ppd->link_width_downgrade_tx_active,
+				   ppd->link_width_downgrade_rx_active);
 			do_bounce = 1;
 		}
-	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
-		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
+	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
+		   (lwde & ppd->link_width_downgrade_rx_active) == 0) {
 		/* Tx or Rx is outside the enabled policy */
 		dd_dev_err(ppd->dd,
-			"Link is outside of downgrade allowed, downing link\n");
+			   "Link is outside of downgrade allowed, downing link\n");
 		dd_dev_err(ppd->dd,
-			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
-			lwde,
-			ppd->link_width_downgrade_tx_active,
-			ppd->link_width_downgrade_rx_active);
+			   "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
+			   lwde, ppd->link_width_downgrade_tx_active,
+			   ppd->link_width_downgrade_rx_active);
 		do_bounce = 1;
 	}
 
@@ -7160,8 +7407,9 @@ done:
 
 	if (do_bounce) {
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
-		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
+				     OPA_LINKDOWN_REASON_WIDTH_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
@@ -7242,9 +7490,10 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
 				queue_link_down = 1;
 				dd_dev_info(dd, "Link error: %s\n",
-					dc8051_info_err_string(buf,
-						sizeof(buf),
-						err & FAILED_LNI));
+					    dc8051_info_err_string(buf,
+								   sizeof(buf),
+								   err &
+								   FAILED_LNI));
 			}
 			err &= ~(u64)FAILED_LNI;
 		}
@@ -7256,7 +7505,8 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		if (err) {
 			/* report remaining errors, but do not do anything */
 			dd_dev_err(dd, "8051 info error: %s\n",
-				dc8051_info_err_string(buf, sizeof(buf), err));
+				   dc8051_info_err_string(buf, sizeof(buf),
+							  err));
 		}
 
 		/*
@@ -7284,7 +7534,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			host_msg &= ~(u64)LINKUP_ACHIEVED;
 		}
 		if (host_msg & EXT_DEVICE_CFG_REQ) {
-			handle_8051_request(dd);
+			queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
 			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
 		}
 		if (host_msg & VERIFY_CAP_FRAME) {
@@ -7309,8 +7559,9 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		if (host_msg) {
 			/* report remaining messages, but do not do anything */
 			dd_dev_info(dd, "8051 info host message: %s\n",
-				dc8051_info_host_msg_string(buf, sizeof(buf),
-					host_msg));
+				    dc8051_info_host_msg_string(buf,
+								sizeof(buf),
+								host_msg));
 		}
 
 		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
@@ -7323,25 +7574,27 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		 */
 		dd_dev_err(dd, "Lost 8051 heartbeat\n");
 		write_csr(dd, DC_DC8051_ERR_EN,
-			read_csr(dd, DC_DC8051_ERR_EN)
-			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
+			  read_csr(dd, DC_DC8051_ERR_EN) &
+			  ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
 
 		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
 	}
 	if (reg) {
 		/* report the error, but do not do anything */
 		dd_dev_err(dd, "8051 error: %s\n",
-			dc8051_err_string(buf, sizeof(buf), reg));
+			   dc8051_err_string(buf, sizeof(buf), reg));
 	}
 
 	if (queue_link_down) {
-		/* if the link is already going down or disabled, do not
-		 * queue another */
-		if ((ppd->host_link_state
-				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
-				|| ppd->link_enabled == 0) {
+		/*
+		 * if the link is already going down or disabled, do not
+		 * queue another
+		 */
+		if ((ppd->host_link_state &
+		    (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
+		    ppd->link_enabled == 0) {
 			dd_dev_info(dd, "%s: not queuing link down\n",
-				__func__);
+				    __func__);
 		} else {
 			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 		}
@@ -7483,8 +7736,10 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			/* set status bit */
 			dd->err_info_rcvport.status_and_code |=
 				OPA_EI_STATUS_SMASK;
-			/* save first 2 flits in the packet that caused
-			 * the error */
+			/*
+			 * save first 2 flits in the packet that caused
+			 * the error
+			 */
 			 dd->err_info_rcvport.packet_flit1 = hdr0;
 			 dd->err_info_rcvport.packet_flit2 = hdr1;
 		}
@@ -7517,7 +7772,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		/* just report this */
 		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
 		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
-			hdr0, hdr1);
+			    hdr0, hdr1);
 
 		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
 	}
@@ -7536,7 +7791,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	/* report any remaining errors */
 	if (reg)
 		dd_dev_info(dd, "DCC Error: %s\n",
-			dcc_err_string(buf, sizeof(buf), reg));
+			    dcc_err_string(buf, sizeof(buf), reg));
 
 	if (lcl_reason == 0)
 		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
@@ -7553,7 +7808,7 @@ static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	char buf[96];
 
 	dd_dev_info(dd, "LCB Error: %s\n",
-		lcb_err_string(buf, sizeof(buf), reg));
+		    lcb_err_string(buf, sizeof(buf), reg));
 }
 
 /*
@@ -7643,7 +7898,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7669,7 +7924,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7680,12 +7935,14 @@ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
 	char name[64];
 
 	dd_dev_err(dd, "unexpected %s interrupt\n",
-				is_reserved_name(name, sizeof(name), source));
+		   is_reserved_name(name, sizeof(name), source));
 }
 
 static const struct is_table is_table[] = {
-/* start		     end
-				name func		interrupt func */
+/*
+ * start		 end
+ *				name func		interrupt func
+ */
 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
 				is_misc_err_name,	is_misc_err_int },
 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
@@ -7756,7 +8013,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
 
 	/* phase 2: call the appropriate handler */
 	for_each_set_bit(bit, (unsigned long *)&regs[0],
-						CCE_NUM_INT_CSRS*64) {
+			 CCE_NUM_INT_CSRS * 64) {
 		is_interrupt(dd, bit);
 	}
 
@@ -7779,27 +8036,27 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
 
 	/* This read_csr is really bad in the hot path */
 	status = read_csr(dd,
-			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
-			& sde->imask;
+			  CCE_INT_STATUS + (8 * (IS_SDMA_START / 64)))
+			  & sde->imask;
 	if (likely(status)) {
 		/* clear the interrupt(s) */
 		write_csr(dd,
-			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
-			status);
+			  CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)),
+			  status);
 
 		/* handle the interrupt(s) */
 		sdma_engine_interrupt(sde, status);
 	} else
 		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
-			sde->this_idx);
+			   sde->this_idx);
 
 	return IRQ_HANDLED;
 }
 
 /*
- * Clear the receive interrupt, forcing the write and making sure
- * we have data from the chip, pushing everything in front of it
- * back to the host.
+ * Clear the receive interrupt.  Use a read of the interrupt clear CSR
+ * to insure that the write completed.  This does NOT guarantee that
+ * queued DMA writes to memory from the chip are pushed.
  */
 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 {
@@ -7813,27 +8070,45 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 }
 
 /* force the receive interrupt */
-static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+void force_recv_intr(struct hfi1_ctxtdata *rcd)
 {
 	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
 }
 
-/* return non-zero if a packet is present */
+/*
+ * Return non-zero if a packet is present.
+ *
+ * This routine is called when rechecking for packets after the RcvAvail
+ * interrupt has been cleared down.  First, do a quick check of memory for
+ * a packet present.  If not found, use an expensive CSR read of the context
+ * tail to determine the actual tail.  The CSR read is necessary because there
+ * is no method to push pending DMAs to memory other than an interrupt and we
+ * are trying to determine if we need to force an interrupt.
+ */
 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 {
+	u32 tail;
+	int present;
+
 	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
-		return (rcd->seq_cnt ==
+		present = (rcd->seq_cnt ==
 				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+	else /* is RDMA rtail */
+		present = (rcd->head != get_rcvhdrtail(rcd));
 
-	/* else is RDMA rtail */
-	return (rcd->head != get_rcvhdrtail(rcd));
+	if (present)
+		return 1;
+
+	/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
+	tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
+	return rcd->head != tail;
 }
 
 /*
  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
  * This routine will try to handle packets immediately (latency), but if
  * it finds too many, it will invoke the thread handler (bandwitdh).  The
- * chip receive interupt is *not* cleared down until this or the thread (if
+ * chip receive interrupt is *not* cleared down until this or the thread (if
  * invoked) is finished.  The intent is to avoid extra interrupts while we
  * are processing packets anyway.
  */
@@ -7846,6 +8121,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
 
 	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
 	this_cpu_inc(*dd->int_counter);
+	aspm_ctx_disable(rcd);
 
 	/* receive interrupt remains blocked while processing packets */
 	disposition = rcd->do_interrupt(rcd, 0);
@@ -7912,7 +8188,7 @@ u32 read_physical_state(struct hfi1_devdata *dd)
 				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
 }
 
-static u32 read_logical_state(struct hfi1_devdata *dd)
+u32 read_logical_state(struct hfi1_devdata *dd)
 {
 	u64 reg;
 
@@ -8160,8 +8436,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
 	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
 }
 
-static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-			    u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+		     u8 lane_id, u32 config_data)
 {
 	u64 data;
 	int ret;
@@ -8172,8 +8448,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
 	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"load 8051 config: field id %d, lane %d, err %d\n",
-			(int)field_id, (int)lane_id, ret);
+			   "load 8051 config: field id %d, lane %d, err %d\n",
+			   (int)field_id, (int)lane_id, ret);
 	}
 	return ret;
 }
@@ -8183,8 +8459,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
  * set the result, even on error.
  * Return 0 on success, -errno on failure
  */
-static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
-			    u32 *result)
+int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
+		     u32 *result)
 {
 	u64 big_data;
 	u32 addr;
@@ -8210,7 +8486,7 @@ static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
 	} else {
 		*result = 0;
 		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
-			__func__, lane_id, field_id);
+			   __func__, lane_id, field_id);
 	}
 
 	return ret;
@@ -8247,7 +8523,7 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
 	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8329,7 +8605,7 @@ static void read_vc_remote_link_width(struct hfi1_devdata *dd,
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
 				& REMOTE_TX_RATE_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8369,7 +8645,7 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
 	*link_quality = 0;
 	if (dd->pport->host_link_state & HLS_UP) {
 		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
-					&frame);
+				       &frame);
 		if (ret == 0)
 			*link_quality = (frame >> LINK_QUALITY_SHIFT)
 						& LINK_QUALITY_MASK;
@@ -8429,10 +8705,9 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
 	for (lane = 0; lane < 4; lane++) {
 		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
 		if (ret) {
-			dd_dev_err(
-				dd,
-				"Unable to read lane %d firmware details\n",
-				lane);
+			dd_dev_err(dd,
+				   "Unable to read lane %d firmware details\n",
+				   lane);
 			continue;
 		}
 		version = (frame >> SPICO_ROM_VERSION_SHIFT)
@@ -8440,8 +8715,8 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
 		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
 					& SPICO_ROM_PROD_ID_MASK;
 		dd_dev_info(dd,
-			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
-			lane, version, prod_id);
+			    "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
+			    lane, version, prod_id);
 	}
 }
 
@@ -8454,11 +8729,10 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
 {
 	int ret;
 
-	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
-		type, data_out);
+	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "read idle message: type %d, err %d\n",
-			(u32)type, ret);
+			   (u32)type, ret);
 		return -EINVAL;
 	}
 	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
@@ -8475,8 +8749,8 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
  */
 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
 {
-	return read_idle_message(dd,
-			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
+	return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT,
+				 data);
 }
 
 /*
@@ -8492,7 +8766,7 @@ static int send_idle_message(struct hfi1_devdata *dd, u64 data)
 	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
-			data, ret);
+			   data, ret);
 		return -EINVAL;
 	}
 	return 0;
@@ -8507,8 +8781,8 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
 {
 	u64 data;
 
-	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
-		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
+	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) |
+		((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
 	return send_idle_message(dd, data);
 }
 
@@ -8530,7 +8804,7 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 		/* LCB_CFG_LOOPBACK.VAL = 2 */
 		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
 		write_csr(dd, DC_LCB_CFG_LOOPBACK,
-			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
+			  IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
 		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
 	}
 
@@ -8542,25 +8816,24 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 		/* LCB_CFG_RUN.EN = 1 */
 		write_csr(dd, DC_LCB_CFG_RUN,
-			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+			  1ull << DC_LCB_CFG_RUN_EN_SHIFT);
 
 		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
 		timeout = jiffies + msecs_to_jiffies(10);
 		while (1) {
-			reg = read_csr(dd,
-				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+			reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
 			if (reg)
 				break;
 			if (time_after(jiffies, timeout)) {
 				dd_dev_err(dd,
-					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
+					   "timeout waiting for LINK_TRANSFER_ACTIVE\n");
 				return -ETIMEDOUT;
 			}
 			udelay(2);
 		}
 
 		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
-			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
+			  1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
 	}
 
 	if (!loopback) {
@@ -8572,10 +8845,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 		 * done with LCB set up before resuming.
 		 */
 		dd_dev_err(dd,
-			"Pausing for peer to be finished with LCB set up\n");
+			   "Pausing for peer to be finished with LCB set up\n");
 		msleep(5000);
-		dd_dev_err(dd,
-			"Continuing with quick linkup\n");
+		dd_dev_err(dd, "Continuing with quick linkup\n");
 	}
 
 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
@@ -8589,8 +8861,8 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"%s: set physical link state to quick LinkUp failed with return %d\n",
-			__func__, ret);
+			   "%s: set physical link state to quick LinkUp failed with return %d\n",
+			   __func__, ret);
 
 		set_host_lcb_access(dd);
 		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
@@ -8615,8 +8887,8 @@ static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
 	if (ret == HCMD_SUCCESS)
 		return 0;
 	dd_dev_err(dd,
-		"Set physical link state to SerDes Loopback failed with return %d\n",
-		ret);
+		   "Set physical link state to SerDes Loopback failed with return %d\n",
+		   ret);
 	if (ret >= 0)
 		ret = -EINVAL;
 	return ret;
@@ -8631,7 +8903,7 @@ static int init_loopback(struct hfi1_devdata *dd)
 
 	/* all loopbacks should disable self GUID check */
 	write_csr(dd, DC_DC8051_CFG_MODE,
-		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
+		  (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
 
 	/*
 	 * The simulator has only one loopback option - LCB.  Switch
@@ -8639,10 +8911,9 @@ static int init_loopback(struct hfi1_devdata *dd)
 	 *
 	 * Accept all valid loopback values.
 	 */
-	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
-		&& (loopback == LOOPBACK_SERDES
-			|| loopback == LOOPBACK_LCB
-			|| loopback == LOOPBACK_CABLE)) {
+	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) &&
+	    (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
+	     loopback == LOOPBACK_CABLE)) {
 		loopback = LOOPBACK_LCB;
 		quick_linkup = 1;
 		return 0;
@@ -8663,7 +8934,7 @@ static int init_loopback(struct hfi1_devdata *dd)
 		/* not supported in emulation due to emulation RTL changes */
 		if (dd->icode == ICODE_FPGA_EMULATION) {
 			dd_dev_err(dd,
-				"LCB loopback not supported in emulation\n");
+				   "LCB loopback not supported in emulation\n");
 			return -EINVAL;
 		}
 		return 0;
@@ -8690,10 +8961,10 @@ static u16 opa_to_vc_link_widths(u16 opa_widths)
 		u16 from;
 		u16 to;
 	} opa_link_xlate[] = {
-		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
-		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
-		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
-		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
+		{ OPA_LINK_WIDTH_1X, 1 << (1 - 1)  },
+		{ OPA_LINK_WIDTH_2X, 1 << (2 - 1)  },
+		{ OPA_LINK_WIDTH_3X, 1 << (3 - 1)  },
+		{ OPA_LINK_WIDTH_4X, 1 << (4 - 1)  },
 	};
 
 	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
@@ -8719,7 +8990,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 	/* set the local tx rate - need to read-modify-write */
 	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-		&rx_polarity_inversion, &ppd->local_tx_rate);
+			       &rx_polarity_inversion, &ppd->local_tx_rate);
 	if (ret)
 		goto set_local_link_attributes_fail;
 
@@ -8740,15 +9011,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 	enable_lane_tx = 0xF; /* enable all four lanes */
 	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
-		     rx_polarity_inversion, ppd->local_tx_rate);
+				rx_polarity_inversion, ppd->local_tx_rate);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
 	/*
 	 * DC supports continuous updates.
 	 */
-	ret = write_vc_local_phy(dd, 0 /* no power management */,
-				     1 /* continuous updates */);
+	ret = write_vc_local_phy(dd,
+				 0 /* no power management */,
+				 1 /* continuous updates */);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8759,7 +9031,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 		goto set_local_link_attributes_fail;
 
 	ret = write_vc_local_link_width(dd, 0, 0,
-		     opa_to_vc_link_widths(ppd->link_width_enabled));
+					opa_to_vc_link_widths(
+						ppd->link_width_enabled));
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8770,8 +9043,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 set_local_link_attributes_fail:
 	dd_dev_err(dd,
-		"Failed to set local link attributes, return 0x%x\n",
-		ret);
+		   "Failed to set local link attributes, return 0x%x\n",
+		   ret);
 	return ret;
 }
 
@@ -8784,54 +9057,101 @@ int start_link(struct hfi1_pportdata *ppd)
 {
 	if (!ppd->link_enabled) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because link is disabled\n",
-			__func__);
+			    "%s: stopping link start because link is disabled\n",
+			    __func__);
 		return 0;
 	}
 	if (!ppd->driver_link_ready) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because driver is not ready\n",
-			__func__);
+			    "%s: stopping link start because driver is not ready\n",
+			    __func__);
 		return 0;
 	}
 
 	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
-			loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+	    loopback == LOOPBACK_LCB ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
 		return set_link_state(ppd, HLS_DN_POLL);
 
 	dd_dev_info(ppd->dd,
-		"%s: stopping link start because no cable is present\n",
-		__func__);
+		    "%s: stopping link start because no cable is present\n",
+		    __func__);
 	return -EAGAIN;
 }
 
-static void reset_qsfp(struct hfi1_pportdata *ppd)
+static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+	unsigned long timeout;
+
+	/*
+	 * Check for QSFP interrupt for t_init (SFF 8679)
+	 */
+	timeout = jiffies + msecs_to_jiffies(2000);
+	while (1) {
+		mask = read_csr(dd, dd->hfi1_id ?
+				ASIC_QSFP2_IN : ASIC_QSFP1_IN);
+		if (!(mask & QSFP_HFI0_INT_N)) {
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
+				  ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+			break;
+		}
+		if (time_after(jiffies, timeout)) {
+			dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
+				    __func__);
+			break;
+		}
+		udelay(2);
+	}
+}
+
+static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+
+	mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
+	if (enable)
+		mask |= (u64)QSFP_HFI0_INT_N;
+	else
+		mask &= ~(u64)QSFP_HFI0_INT_N;
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
+}
+
+void reset_qsfp(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	u64 mask, qsfp_mask;
 
+	/* Disable INT_N from triggering QSFP interrupts */
+	set_qsfp_int_n(ppd, 0);
+
+	/* Reset the QSFP */
 	mask = (u64)QSFP_HFI0_RESET_N;
-	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
+	qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
 	qsfp_mask |= mask;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
 
 	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
+			     dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
 	qsfp_mask &= ~mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
 
 	udelay(10);
 
 	qsfp_mask |= mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
+
+	wait_for_qsfp_init(ppd);
+
+	/*
+	 * Allow INT_N to trigger the QSFP interrupt to watch
+	 * for alarms and warnings
+	 */
+	set_qsfp_int_n(ppd, 1);
 }
 
 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -8840,102 +9160,86 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 	struct hfi1_devdata *dd = ppd->dd;
 
 	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable on fire\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable on fire\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable temperature too low\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too high\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too low\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
+			    __func__);
 
 	/* Byte 2 is vendor specific */
 
 	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
+			    __func__);
 
 	/* Bytes 9-10 and 11-12 are reserved */
 	/* Bytes 13-15 are vendor specific */
@@ -8943,35 +9247,8 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 	return 0;
 }
 
-static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
-{
-	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
-
-	return 0;
-}
-
-static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
-{
-	struct hfi1_devdata *dd = ppd->dd;
-	u8 qsfp_interrupt_status = 0;
-
-	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
-		!= 1) {
-		dd_dev_info(dd,
-			"%s: Failed to read status of QSFP module\n",
-			__func__);
-		return -EIO;
-	}
-
-	/* We don't care about alarms & warnings with a non-functional INT_N */
-	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
-		do_pre_lni_host_behaviors(ppd);
-
-	return 0;
-}
-
 /* This routine will only be scheduled if the QSFP module is present */
-static void qsfp_event(struct work_struct *work)
+void qsfp_event(struct work_struct *work)
 {
 	struct qsfp_data *qd;
 	struct hfi1_pportdata *ppd;
@@ -8993,76 +9270,75 @@ static void qsfp_event(struct work_struct *work)
 	dc_start(dd);
 
 	if (qd->cache_refresh_required) {
-		msleep(3000);
-		reset_qsfp(ppd);
+		set_qsfp_int_n(ppd, 0);
+
+		wait_for_qsfp_init(ppd);
 
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
+		/*
+		 * Allow INT_N to trigger the QSFP interrupt to watch
+		 * for alarms and warnings
 		 */
-		msleep(3000);
-		if (!qd->qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd, "%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-			start_link(ppd);
-		}
+		set_qsfp_int_n(ppd, 1);
+
+		tune_serdes(ppd);
+
+		start_link(ppd);
 	}
 
 	if (qd->check_interrupt_flags) {
 		u8 qsfp_interrupt_status[16] = {0,};
 
-		if (qsfp_read(ppd, dd->hfi1_id, 6,
-			      &qsfp_interrupt_status[0], 16) != 16) {
+		if (one_qsfp_read(ppd, dd->hfi1_id, 6,
+				  &qsfp_interrupt_status[0], 16) != 16) {
 			dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-				__func__);
+				    "%s: Failed to read status of QSFP module\n",
+				    __func__);
 		} else {
 			unsigned long flags;
-			u8 data_status;
 
+			handle_qsfp_error_conditions(
+					ppd, qsfp_interrupt_status);
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.check_interrupt_flags = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-								flags);
-
-			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
-				 != 1) {
-				dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-					__func__);
-			}
-			if (!(data_status & QSFP_DATA_NOT_READY)) {
-				do_pre_lni_host_behaviors(ppd);
-				start_link(ppd);
-			} else
-				handle_qsfp_error_conditions(ppd,
-						qsfp_interrupt_status);
+					       flags);
 		}
 	}
 }
 
-void init_qsfp(struct hfi1_pportdata *ppd)
+static void init_qsfp_int(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	u64 qsfp_mask;
+	struct hfi1_pportdata *ppd = dd->pport;
+	u64 qsfp_mask, cce_int_mask;
+	const int qsfp1_int_smask = QSFP1_INT % 64;
+	const int qsfp2_int_smask = QSFP2_INT % 64;
 
-	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-		ppd->driver_link_ready = 1;
-		return;
+	/*
+	 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
+	 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
+	 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
+	 * the index of the appropriate CSR in the CCEIntMask CSR array
+	 */
+	cce_int_mask = read_csr(dd, CCE_INT_MASK +
+				(8 * (QSFP1_INT / 64)));
+	if (dd->hfi1_id) {
+		cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
+			  cce_int_mask);
+	} else {
+		cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
+			  cce_int_mask);
 	}
 
-	ppd->qsfp_info.ppd = ppd;
-	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
-
 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 	/* Clear current status to avoid spurious interrupts */
-	write_csr(dd,
-			dd->hfi1_id ?
-				ASIC_QSFP2_CLEAR :
-				ASIC_QSFP1_CLEAR,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+		  qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
+		  qsfp_mask);
+
+	set_qsfp_int_n(ppd, 0);
 
 	/* Handle active low nature of INT_N and MODPRST_N pins */
 	if (qsfp_mod_present(ppd))
@@ -9070,29 +9346,6 @@ void init_qsfp(struct hfi1_pportdata *ppd)
 	write_csr(dd,
 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
 		  qsfp_mask);
-
-	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
-	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
-		qsfp_mask);
-
-	if (qsfp_mod_present(ppd)) {
-		msleep(3000);
-		reset_qsfp(ppd);
-
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
-		 */
-		msleep(3000);
-		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd,
-					"%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-		}
-	}
 }
 
 /*
@@ -9100,6 +9353,10 @@ void init_qsfp(struct hfi1_pportdata *ppd)
  */
 static void init_lcb(struct hfi1_devdata *dd)
 {
+	/* simulator does not correctly handle LCB cclk loopback, skip */
+	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+		return;
+
 	/* the DC has been reset earlier in the driver load */
 
 	/* set LCB for cclk loopback on the port */
@@ -9128,8 +9385,6 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
 		ppd->guid = guid;
 	}
 
-	/* the link defaults to enabled */
-	ppd->link_enabled = 1;
 	/* Set linkinit_reason on power up per OPA spec */
 	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
 
@@ -9142,6 +9397,12 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
 			return ret;
 	}
 
+	/* tune the SERDES to a ballpark setting for
+	 * optimal signal and bit error rate
+	 * Needs to be done before starting the link
+	 */
+	tune_serdes(ppd);
+
 	return start_link(ppd);
 }
 
@@ -9159,8 +9420,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
 
+	ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
-	  OPA_LINKDOWN_REASON_SMA_DISABLED);
+			     OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	/* disable the port */
@@ -9174,14 +9437,14 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_acks = alloc_percpu(u64);
-		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
-		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
-		if ((ppd->ibport_data.rc_acks == NULL) ||
-		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
-		    (ppd->ibport_data.rc_qacks == NULL))
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+		if (!ppd->ibport_data.rvp.rc_acks ||
+		    !ppd->ibport_data.rvp.rc_delayed_comp ||
+		    !ppd->ibport_data.rvp.rc_qacks)
 			return -ENOMEM;
 	}
 
@@ -9216,8 +9479,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
 		pa = 0;
 	} else if (type > PT_INVALID) {
 		dd_dev_err(dd,
-			"unexpected receive array type %u for index %u, not handled\n",
-			type, index);
+			   "unexpected receive array type %u for index %u, not handled\n",
+			   type, index);
 		goto done;
 	}
 
@@ -9432,12 +9695,15 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	/* all kernel receive contexts have the same hdrqentsize */
 	for (i = 0; i < ppd->vls_supported; i++) {
 		sc_set_cr_threshold(dd->vld[i].sc,
-			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
-				dd->rcd[0]->rcvhdrqentsize));
+				    sc_mtu_to_threshold(dd->vld[i].sc,
+							dd->vld[i].mtu,
+							dd->rcd[0]->
+							rcvhdrqentsize));
 	}
 	sc_set_cr_threshold(dd->vld[15].sc,
-		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
-			dd->rcd[0]->rcvhdrqentsize));
+			    sc_mtu_to_threshold(dd->vld[15].sc,
+						dd->vld[15].mtu,
+						dd->rcd[0]->rcvhdrqentsize));
 
 	/* Adjust maximum MTU for the port in DC */
 	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -9463,7 +9729,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
-			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
+			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
 	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
 			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
@@ -9498,8 +9764,8 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
-				state, curr_state);
+				   "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+				   state, curr_state);
 			return -ETIMEDOUT;
 		}
 		usleep_range(1950, 2050); /* sleep 2ms-ish */
@@ -9542,17 +9808,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 
 	if (do_transition) {
 		ret = set_physical_link_state(dd,
-			PLS_OFFLINE | (rem_reason << 8));
+					      (rem_reason << 8) | PLS_OFFLINE);
 
 		if (ret != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Offline link state, return %d\n",
-				ret);
+				   "Failed to transition to Offline link state, return %d\n",
+				   ret);
 			return -EINVAL;
 		}
-		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
+		if (ppd->offline_disabled_reason ==
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_TRANSIENT;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 	}
 
 	if (do_wait) {
@@ -9573,6 +9840,22 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
 	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
 
+	if (ppd->port_type == PORT_TYPE_QSFP &&
+	    ppd->qsfp_info.limiting_active &&
+	    qsfp_mod_present(ppd)) {
+		int ret;
+
+		ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT);
+		if (ret == 0) {
+			set_qsfp_tx(ppd, 0);
+			release_chip_resource(dd, qsfp_resource(dd));
+		} else {
+			/* not fatal, but should warn */
+			dd_dev_err(dd,
+				   "Unable to acquire lock to turn off QSFP TX\n");
+		}
+	}
+
 	/*
 	 * The LNI has a mandatory wait time after the physical state
 	 * moves to Offline.Quiet.  The wait time may be different
@@ -9585,7 +9868,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 	ret = wait_fm_ready(dd, 7000);
 	if (ret) {
 		dd_dev_err(dd,
-			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
+			   "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
 		/* state is really offline, so make it so */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		return ret;
@@ -9608,8 +9891,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 		read_last_local_state(dd, &last_local_state);
 		read_last_remote_state(dd, &last_remote_state);
 		dd_dev_err(dd,
-			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
-			last_local_state, last_remote_state);
+			   "LNI failure last states: local 0x%08x, remote 0x%08x\n",
+			   last_local_state, last_remote_state);
 	}
 
 	/* the active link width (downgrade) is 0 on link down */
@@ -9757,14 +10040,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		state = dd->link_default;
 
 	/* interpret poll -> poll as a link bounce */
-	poll_bounce = ppd->host_link_state == HLS_DN_POLL
-				&& state == HLS_DN_POLL;
+	poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
+		      state == HLS_DN_POLL;
 
 	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
-		link_state_name(ppd->host_link_state),
-		link_state_name(orig_new_state),
-		poll_bounce ? "(bounce) " : "",
-		link_state_reason_name(ppd, state));
+		    link_state_name(ppd->host_link_state),
+		    link_state_name(orig_new_state),
+		    poll_bounce ? "(bounce) " : "",
+		    link_state_reason_name(ppd, state));
 
 	was_up = !!(ppd->host_link_state & HLS_UP);
 
@@ -9785,8 +10068,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 
 	switch (state) {
 	case HLS_UP_INIT:
-		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
+		if (ppd->host_link_state == HLS_DN_POLL &&
+		    (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
 			/*
 			 * Quick link up jumps from polling to here.
 			 *
@@ -9794,7 +10077,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			 * simulator jumps from polling to link up.
 			 * Accept that here.
 			 */
-			/* OK */;
+			/* OK */
 		} else if (ppd->host_link_state != HLS_GOING_UP) {
 			goto unexpected;
 		}
@@ -9805,8 +10088,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at going_up */
 			ppd->host_link_state = HLS_GOING_UP;
 			dd_dev_err(dd,
-				"%s: logical state did not change to INIT\n",
-				__func__);
+				   "%s: logical state did not change to INIT\n",
+				   __func__);
 		} else {
 			/* clear old transient LINKINIT_REASON code */
 			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
@@ -9830,8 +10113,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at init */
 			ppd->host_link_state = HLS_UP_INIT;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ARMED\n",
-				__func__);
+				   "%s: logical state did not change to ARMED\n",
+				   __func__);
 		}
 		/*
 		 * The simulator does not currently implement SMA messages,
@@ -9852,15 +10135,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at armed */
 			ppd->host_link_state = HLS_UP_ARMED;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ACTIVE\n",
-				__func__);
+				   "%s: logical state did not change to ACTIVE\n",
+				   __func__);
 		} else {
-
 			/* tell all engines to go running */
 			sdma_all_running(dd);
 
 			/* Signal the IB layer that the port has went active */
-			event.device = &dd->verbs_dev.ibdev;
+			event.device = &dd->verbs_dev.rdi.ibdev;
 			event.element.port_num = ppd->port;
 			event.event = IB_EVENT_PORT_ACTIVE;
 		}
@@ -9887,6 +10169,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 				ppd->link_enabled = 1;
 		}
 
+		set_all_slowpath(ppd->dd);
 		ret = set_local_link_attributes(ppd);
 		if (ret)
 			break;
@@ -9901,12 +10184,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			ret1 = set_physical_link_state(dd, PLS_POLLING);
 			if (ret1 != HCMD_SUCCESS) {
 				dd_dev_err(dd,
-					"Failed to transition to Polling link state, return 0x%x\n",
-					ret1);
+					   "Failed to transition to Polling link state, return 0x%x\n",
+					   ret1);
 				ret = -EINVAL;
 			}
 		}
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 		/*
 		 * If an error occurred above, go back to offline.  The
 		 * caller may reschedule another attempt.
@@ -9931,8 +10215,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		ret1 = set_physical_link_state(dd, PLS_DISABLED);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Disabled link state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to Disabled link state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9960,8 +10244,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		ret1 = set_physical_link_state(dd, PLS_LINKUP);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to link up state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to link up state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9972,7 +10256,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
 	default:
 		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
-			__func__, state);
+			    __func__, state);
 		ret = -EINVAL;
 		break;
 	}
@@ -9992,8 +10276,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 
 unexpected:
 	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
-		__func__, link_state_name(ppd->host_link_state),
-		link_state_name(state));
+		   __func__, link_state_name(ppd->host_link_state),
+		   link_state_name(state));
 	ret = -EINVAL;
 
 done:
@@ -10019,7 +10303,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 		 * The VL Arbitrator high limit is sent in units of 4k
 		 * bytes, while HFI stores it in units of 64 bytes.
 		 */
-		val *= 4096/64;
+		val *= 4096 / 64;
 		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
 			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
 		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
@@ -10034,12 +10318,6 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 			ppd->vls_operational = val;
 			if (!ppd->port)
 				ret = -EINVAL;
-			else
-				ret = sdma_map_init(
-					ppd->dd,
-					ppd->port - 1,
-					val,
-					NULL);
 		}
 		break;
 	/*
@@ -10087,8 +10365,8 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 	default:
 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
 			dd_dev_info(ppd->dd,
-			  "%s: which %s, val 0x%x: not implemented\n",
-			  __func__, ib_cfg_name(which), val);
+				    "%s: which %s, val 0x%x: not implemented\n",
+				    __func__, ib_cfg_name(which), val);
 		break;
 	}
 	return ret;
@@ -10155,6 +10433,7 @@ static int vl_arb_match_cache(struct vl_arb_cache *cache,
 {
 	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
 }
+
 /* end functions related to vl arbitration table caching */
 
 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
@@ -10242,7 +10521,7 @@ static int get_buffer_control(struct hfi1_devdata *dd,
 
 	/* OPA and HFI have a 1-1 mapping */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
+		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]);
 
 	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
 	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
@@ -10296,41 +10575,41 @@ static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
 {
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
-		DC_SC_VL_VAL(15_0,
-		0, dp->vlnt[0] & 0xf,
-		1, dp->vlnt[1] & 0xf,
-		2, dp->vlnt[2] & 0xf,
-		3, dp->vlnt[3] & 0xf,
-		4, dp->vlnt[4] & 0xf,
-		5, dp->vlnt[5] & 0xf,
-		6, dp->vlnt[6] & 0xf,
-		7, dp->vlnt[7] & 0xf,
-		8, dp->vlnt[8] & 0xf,
-		9, dp->vlnt[9] & 0xf,
-		10, dp->vlnt[10] & 0xf,
-		11, dp->vlnt[11] & 0xf,
-		12, dp->vlnt[12] & 0xf,
-		13, dp->vlnt[13] & 0xf,
-		14, dp->vlnt[14] & 0xf,
-		15, dp->vlnt[15] & 0xf));
+		  DC_SC_VL_VAL(15_0,
+			       0, dp->vlnt[0] & 0xf,
+			       1, dp->vlnt[1] & 0xf,
+			       2, dp->vlnt[2] & 0xf,
+			       3, dp->vlnt[3] & 0xf,
+			       4, dp->vlnt[4] & 0xf,
+			       5, dp->vlnt[5] & 0xf,
+			       6, dp->vlnt[6] & 0xf,
+			       7, dp->vlnt[7] & 0xf,
+			       8, dp->vlnt[8] & 0xf,
+			       9, dp->vlnt[9] & 0xf,
+			       10, dp->vlnt[10] & 0xf,
+			       11, dp->vlnt[11] & 0xf,
+			       12, dp->vlnt[12] & 0xf,
+			       13, dp->vlnt[13] & 0xf,
+			       14, dp->vlnt[14] & 0xf,
+			       15, dp->vlnt[15] & 0xf));
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
-		DC_SC_VL_VAL(31_16,
-		16, dp->vlnt[16] & 0xf,
-		17, dp->vlnt[17] & 0xf,
-		18, dp->vlnt[18] & 0xf,
-		19, dp->vlnt[19] & 0xf,
-		20, dp->vlnt[20] & 0xf,
-		21, dp->vlnt[21] & 0xf,
-		22, dp->vlnt[22] & 0xf,
-		23, dp->vlnt[23] & 0xf,
-		24, dp->vlnt[24] & 0xf,
-		25, dp->vlnt[25] & 0xf,
-		26, dp->vlnt[26] & 0xf,
-		27, dp->vlnt[27] & 0xf,
-		28, dp->vlnt[28] & 0xf,
-		29, dp->vlnt[29] & 0xf,
-		30, dp->vlnt[30] & 0xf,
-		31, dp->vlnt[31] & 0xf));
+		  DC_SC_VL_VAL(31_16,
+			       16, dp->vlnt[16] & 0xf,
+			       17, dp->vlnt[17] & 0xf,
+			       18, dp->vlnt[18] & 0xf,
+			       19, dp->vlnt[19] & 0xf,
+			       20, dp->vlnt[20] & 0xf,
+			       21, dp->vlnt[21] & 0xf,
+			       22, dp->vlnt[22] & 0xf,
+			       23, dp->vlnt[23] & 0xf,
+			       24, dp->vlnt[24] & 0xf,
+			       25, dp->vlnt[25] & 0xf,
+			       26, dp->vlnt[26] & 0xf,
+			       27, dp->vlnt[27] & 0xf,
+			       28, dp->vlnt[28] & 0xf,
+			       29, dp->vlnt[29] & 0xf,
+			       30, dp->vlnt[30] & 0xf,
+			       31, dp->vlnt[31] & 0xf));
 }
 
 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
@@ -10338,7 +10617,7 @@ static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
 {
 	if (limit != 0)
 		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
-			what, (int)limit, idx);
+			    what, (int)limit, idx);
 }
 
 /* change only the shared limit portion of SendCmGLobalCredit */
@@ -10416,14 +10695,14 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
 	}
 
 	dd_dev_err(dd,
-		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
-		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
+		   "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
+		   which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
 	/*
 	 * If this occurs, it is likely there was a credit loss on the link.
 	 * The only recovery from that is a link bounce.
 	 */
 	dd_dev_err(dd,
-		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
+		   "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
 }
 
 /*
@@ -10450,13 +10729,15 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
  * raise = if the new limit is higher than the current value (may be changed
  *	earlier in the algorithm), set the new limit to the new value
  */
-static int set_buffer_control(struct hfi1_devdata *dd,
-			      struct buffer_control *new_bc)
+int set_buffer_control(struct hfi1_pportdata *ppd,
+		       struct buffer_control *new_bc)
 {
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 changing_mask, ld_mask, stat_mask;
 	int change_count;
 	int i, use_all_mask;
 	int this_shared_changing;
+	int vl_count = 0, ret;
 	/*
 	 * A0: add the variable any_shared_limit_changing below and in the
 	 * algorithm above.  If removing A0 support, it can be removed.
@@ -10481,7 +10762,6 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
 #define NUM_USABLE_VLS 16	/* look at VL15 and less */
 
-
 	/* find the new total credits, do sanity check on unused VLs */
 	for (i = 0; i < OPA_MAX_VLS; i++) {
 		if (valid_vl(i)) {
@@ -10489,9 +10769,9 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 			continue;
 		}
 		nonzero_msg(dd, i, "dedicated",
-			be16_to_cpu(new_bc->vl[i].dedicated));
+			    be16_to_cpu(new_bc->vl[i].dedicated));
 		nonzero_msg(dd, i, "shared",
-			be16_to_cpu(new_bc->vl[i].shared));
+			    be16_to_cpu(new_bc->vl[i].shared));
 		new_bc->vl[i].dedicated = 0;
 		new_bc->vl[i].shared = 0;
 	}
@@ -10505,8 +10785,10 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 	 */
 	memset(changing, 0, sizeof(changing));
 	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
-	/* NOTE: Assumes that the individual VL bits are adjacent and in
-	   increasing order */
+	/*
+	 * NOTE: Assumes that the individual VL bits are adjacent and in
+	 * increasing order
+	 */
 	stat_mask =
 		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
 	changing_mask = 0;
@@ -10520,8 +10802,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 						!= cur_bc.vl[i].shared;
 		if (this_shared_changing)
 			any_shared_limit_changing = 1;
-		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
-				|| this_shared_changing) {
+		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated ||
+		    this_shared_changing) {
 			changing[i] = 1;
 			changing_mask |= stat_mask;
 			change_count++;
@@ -10560,7 +10842,7 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 	}
 
 	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
-		"shared");
+				 "shared");
 
 	if (change_count > 0) {
 		for (i = 0; i < NUM_USABLE_VLS; i++) {
@@ -10569,7 +10851,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 
 			if (lowering_dedicated[i]) {
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 				cur_bc.vl[i].dedicated =
 						new_bc->vl[i].dedicated;
 			}
@@ -10585,7 +10868,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 			if (be16_to_cpu(new_bc->vl[i].dedicated) >
 					be16_to_cpu(cur_bc.vl[i].dedicated))
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 		}
 	}
 
@@ -10601,13 +10885,35 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 
 	/* finally raise the global shared */
 	if (be16_to_cpu(new_bc->overall_shared_limit) >
-			be16_to_cpu(cur_bc.overall_shared_limit))
+	    be16_to_cpu(cur_bc.overall_shared_limit))
 		set_global_shared(dd,
-			be16_to_cpu(new_bc->overall_shared_limit));
+				  be16_to_cpu(new_bc->overall_shared_limit));
 
 	/* bracket the credit change with a total adjustment */
 	if (new_total < cur_total)
 		set_global_limit(dd, new_total);
+
+	/*
+	 * Determine the actual number of operational VLS using the number of
+	 * dedicated and shared credits for each VL.
+	 */
+	if (change_count > 0) {
+		for (i = 0; i < TXE_NUM_DATA_VL; i++)
+			if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 ||
+			    be16_to_cpu(new_bc->vl[i].shared) > 0)
+				vl_count++;
+		ppd->actual_vls_operational = vl_count;
+		ret = sdma_map_init(dd, ppd->port - 1, vl_count ?
+				    ppd->actual_vls_operational :
+				    ppd->vls_operational,
+				    NULL);
+		if (ret == 0)
+			ret = pio_map_init(dd, ppd->port - 1, vl_count ?
+					   ppd->actual_vls_operational :
+					   ppd->vls_operational, NULL);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
@@ -10699,7 +11005,7 @@ int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
 				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
 		break;
 	case FM_TBL_BUFFER_CONTROL:
-		ret = set_buffer_control(ppd->dd, t);
+		ret = set_buffer_control(ppd, t);
 		break;
 	case FM_TBL_SC2VLNT:
 		set_sc2vlnt(ppd->dd, t);
@@ -10849,10 +11155,13 @@ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
 	}
 
 	rcd->rcvavail_timeout = timeout;
-	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
-	   been verified to be in range */
+	/*
+	 * timeout cannot be larger than rcv_intr_timeout_csr which has already
+	 * been verified to be in range
+	 */
 	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
-		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
+			(u64)timeout <<
+			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 }
 
 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
@@ -10918,16 +11227,16 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
 static u32 encoded_size(u32 size)
 {
 	switch (size) {
-	case   4*1024: return 0x1;
-	case   8*1024: return 0x2;
-	case  16*1024: return 0x3;
-	case  32*1024: return 0x4;
-	case  64*1024: return 0x5;
-	case 128*1024: return 0x6;
-	case 256*1024: return 0x7;
-	case 512*1024: return 0x8;
-	case   1*1024*1024: return 0x9;
-	case   2*1024*1024: return 0xa;
+	case   4 * 1024: return 0x1;
+	case   8 * 1024: return 0x2;
+	case  16 * 1024: return 0x3;
+	case  32 * 1024: return 0x4;
+	case  64 * 1024: return 0x5;
+	case 128 * 1024: return 0x6;
+	case 256 * 1024: return 0x7;
+	case 512 * 1024: return 0x8;
+	case   1 * 1024 * 1024: return 0x9;
+	case   2 * 1024 * 1024: return 0xa;
 	}
 	return 0x1;	/* if invalid, go with the minimum size */
 }
@@ -10946,8 +11255,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 
 	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
 	/* if the context already enabled, don't do the extra steps */
-	if ((op & HFI1_RCVCTRL_CTXT_ENB)
-			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
+	if ((op & HFI1_RCVCTRL_CTXT_ENB) &&
+	    !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_phys);
@@ -11021,6 +11330,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		if (dd->rcvhdrtail_dummy_physaddr) {
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					dd->rcvhdrtail_dummy_physaddr);
+			/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
 			rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 		}
 
@@ -11032,15 +11342,20 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
-	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
-		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
+		/* See comment on RcvCtxtCtrl.TailUpd above */
+		if (!(op & HFI1_RCVCTRL_CTXT_DIS))
+			rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	}
 	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
 		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
-		/* In one-packet-per-eager mode, the size comes from
-		   the RcvArray entry. */
+		/*
+		 * In one-packet-per-eager mode, the size comes from
+		 * the RcvArray entry.
+		 */
 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
 		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
 	}
@@ -11059,19 +11374,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
 
 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
-	if (did_enable
-	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
+	if (did_enable &&
+	    (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
 		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 		if (reg != 0) {
 			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
-				ctxt, reg);
+				    ctxt, reg);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
-				ctxt, reg, reg == 0 ? "not" : "still");
+				    ctxt, reg, reg == 0 ? "not" : "still");
 		}
 	}
 
@@ -11082,7 +11397,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		 */
 		/* set interrupt timeout */
 		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
-			(u64)rcd->rcvavail_timeout <<
+				(u64)rcd->rcvavail_timeout <<
 				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 
 		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
@@ -11100,28 +11415,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 				dd->rcvhdrtail_dummy_physaddr);
 }
 
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp)
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
 		ret = dd->cntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 		*namep = dd->cntrnames;
 	} else {
 		const struct cntr_entry *entry;
 		int i, j;
 
 		ret = (dd->ndevcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 
 		/* Get the start of the block of counters */
 		*cntrp = dd->cntrs;
@@ -11150,6 +11456,20 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
 						dd->cntrs[entry->offset + j] =
 									    val;
 					}
+				} else if (entry->flags & CNTR_SDMA) {
+					hfi1_cdbg(CNTR,
+						  "\t Per SDMA Engine\n");
+					for (j = 0; j < dd->chip_sdma_engines;
+					     j++) {
+						val =
+						entry->rw_cntr(entry, dd, j,
+							       CNTR_MODE_R, 0);
+						hfi1_cdbg(CNTR,
+							  "\t\tRead 0x%llx for %d\n",
+							  val, j);
+						dd->cntrs[entry->offset + j] =
+									val;
+					}
 				} else {
 					val = entry->rw_cntr(entry, dd,
 							CNTR_INVALID_VL,
@@ -11166,30 +11486,19 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
 /*
  * Used by sysfs to create files for hfi stats to read
  */
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp)
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
-		ret = dd->portcntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "index not supported");
-			return 0;
-		}
-		*namep = dd->portcntrnames;
+		ret = ppd->dd->portcntrnameslen;
+		*namep = ppd->dd->portcntrnames;
 	} else {
 		const struct cntr_entry *entry;
-		struct hfi1_pportdata *ppd;
 		int i, j;
 
-		ret = (dd->nportcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "indexing not supported");
-			return 0;
-		}
-		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
+		ret = ppd->dd->nportcntrs * sizeof(u64);
 		*cntrp = ppd->cntrs;
 
 		for (i = 0; i < PORT_CNTR_LAST; i++) {
@@ -11238,14 +11547,14 @@ static void free_cntrs(struct hfi1_devdata *dd)
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
 		kfree(ppd->cntrs);
 		kfree(ppd->scntrs);
-		free_percpu(ppd->ibport_data.rc_acks);
-		free_percpu(ppd->ibport_data.rc_qacks);
-		free_percpu(ppd->ibport_data.rc_delayed_comp);
+		free_percpu(ppd->ibport_data.rvp.rc_acks);
+		free_percpu(ppd->ibport_data.rvp.rc_qacks);
+		free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 		ppd->cntrs = NULL;
 		ppd->scntrs = NULL;
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_delayed_comp = NULL;
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_delayed_comp = NULL;
 	}
 	kfree(dd->portcntrnames);
 	dd->portcntrnames = NULL;
@@ -11513,11 +11822,13 @@ mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
-	int i, rcv_ctxts, index, j;
+	int i, rcv_ctxts, j;
 	size_t sz;
 	char *p;
 	char name[C_MAX_NAME];
 	struct hfi1_pportdata *ppd;
+	const char *bit_type_32 = ",32";
+	const int bit_type_32_sz = strlen(bit_type_32);
 
 	/* set up the stats timer; the add_timer is done at the end */
 	setup_timer(&dd->synth_stats_timer, update_synth_timer,
@@ -11530,49 +11841,57 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	/* size names and determine how many we have*/
 	dd->ndevcntrs = 0;
 	sz = 0;
-	index = 0;
 
 	for (i = 0; i < DEV_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
 			continue;
 		}
 
 		if (dev_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
-			dev_cntrs[i].offset = index;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					dev_cntrs[i].name,
-					vl_from_idx(j));
+					 dev_cntrs[i].name, vl_from_idx(j));
+				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
+				sz++;
+				dd->ndevcntrs++;
+			}
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			dev_cntrs[i].offset = dd->ndevcntrs;
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->ndevcntrs++;
-				index++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline. */
 			sz += strlen(dev_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (dev_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			dd->ndevcntrs++;
-			dev_cntrs[i].offset = index;
-			index++;
-			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
 		}
 	}
 
 	/* allocate space for the counter values */
-	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->cntrs)
 		goto bail;
 
-	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->scntrs)
 		goto bail;
 
-
 	/* allocate space for the counter names */
 	dd->cntrnameslen = sz;
 	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
@@ -11580,27 +11899,51 @@ static int init_cntrs(struct hfi1_devdata *dd)
 		goto bail;
 
 	/* fill in the names */
-	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
+	for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) {
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			/* Nothing */
-		} else {
-			if (dev_cntrs[i].flags & CNTR_VL) {
-				for (j = 0; j < C_VL_COUNT; j++) {
-					memset(name, '\0', C_MAX_NAME);
-					snprintf(name, C_MAX_NAME, "%s%d",
-						dev_cntrs[i].name,
-						vl_from_idx(j));
-					memcpy(p, name, strlen(name));
-					p += strlen(name);
-					*p++ = '\n';
+		} else if (dev_cntrs[i].flags & CNTR_VL) {
+			for (j = 0; j < C_VL_COUNT; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name,
+					 vl_from_idx(j));
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
 				}
-			} else {
-				memcpy(p, dev_cntrs[i].name,
-				       strlen(dev_cntrs[i].name));
-				p += strlen(dev_cntrs[i].name);
+
 				*p++ = '\n';
 			}
-			index++;
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
+				*p++ = '\n';
+			}
+		} else {
+			memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name));
+			p += strlen(dev_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (dev_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
+			*p++ = '\n';
 		}
 	}
 
@@ -11623,31 +11966,31 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	sz = 0;
 	dd->nportcntrs = 0;
 	for (i = 0; i < PORT_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
 		if (port_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
 			continue;
 		}
 
 		if (port_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
 			port_cntrs[i].offset = dd->nportcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (port_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->nportcntrs++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline */
 			sz += strlen(port_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (port_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
 			port_cntrs[i].offset = dd->nportcntrs;
 			dd->nportcntrs++;
-			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
 		}
 	}
 
@@ -11664,18 +12007,30 @@ static int init_cntrs(struct hfi1_devdata *dd)
 
 		if (port_cntrs[i].flags & CNTR_VL) {
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				memcpy(p, name, strlen(name));
 				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (port_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
 				*p++ = '\n';
 			}
 		} else {
 			memcpy(p, port_cntrs[i].name,
 			       strlen(port_cntrs[i].name));
 			p += strlen(port_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (port_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
 			*p++ = '\n';
 		}
 	}
@@ -11703,14 +12058,13 @@ bail:
 	return -ENOMEM;
 }
 
-
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
 {
 	switch (chip_lstate) {
 	default:
 		dd_dev_err(dd,
-			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
-			 chip_lstate);
+			   "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+			   chip_lstate);
 		/* fall through */
 	case LSTATE_DOWN:
 		return IB_PORT_DOWN;
@@ -11729,7 +12083,7 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
 	switch (chip_pstate & 0xf0) {
 	default:
 		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
-			chip_pstate);
+			   chip_pstate);
 		/* fall through */
 	case PLS_DISABLED:
 		return IB_PORTPHYSSTATE_DISABLED;
@@ -11795,7 +12149,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
 	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
 	if (new_state != ppd->lstate) {
 		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
-			opa_lstate_name(new_state), new_state);
+			    opa_lstate_name(new_state), new_state);
 		ppd->lstate = new_state;
 	}
 	/*
@@ -11854,18 +12208,17 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
 
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
 {
-	static u32 remembered_state = 0xff;
 	u32 pstate;
 	u32 ib_pstate;
 
 	pstate = read_physical_state(ppd->dd);
 	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
-	if (remembered_state != ib_pstate) {
+	if (ppd->last_pstate != ib_pstate) {
 		dd_dev_info(ppd->dd,
-			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
-			__func__, opa_pstate_name(ib_pstate), ib_pstate,
-			pstate);
-		remembered_state = ib_pstate;
+			    "%s: physical state changed to %s (0x%x), phy 0x%x\n",
+			    __func__, opa_pstate_name(ib_pstate), ib_pstate,
+			    pstate);
+		ppd->last_pstate = ib_pstate;
 	}
 	return ib_pstate;
 }
@@ -11909,7 +12262,7 @@ u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
 
 int hfi1_init_ctxt(struct send_context *sc)
 {
-	if (sc != NULL) {
+	if (sc) {
 		struct hfi1_devdata *dd = sc->dd;
 		u64 reg;
 		u8 set = (sc->type == SC_USER ?
@@ -11966,34 +12319,14 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable)
 	 * In HFI, the mask needs to be 1 to allow interrupts.
 	 */
 	if (enable) {
-		u64 cce_int_mask;
-		const int qsfp1_int_smask = QSFP1_INT % 64;
-		const int qsfp2_int_smask = QSFP2_INT % 64;
-
 		/* enable all interrupts */
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
+			write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
 
-		/*
-		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
-		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
-		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
-		 * the index of the appropriate CSR in the CCEIntMask CSR array
-		 */
-		cce_int_mask = read_csr(dd, CCE_INT_MASK +
-						(8*(QSFP1_INT/64)));
-		if (dd->hfi1_id) {
-			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
-					cce_int_mask);
-		} else {
-			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
-					cce_int_mask);
-		}
+		init_qsfp_int(dd);
 	} else {
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+			write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 	}
 }
 
@@ -12005,7 +12338,7 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
 	int i;
 
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
+		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0);
 
 	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
 	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
@@ -12040,10 +12373,9 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
 		struct hfi1_msix_entry *me = dd->msix_entries;
 
 		for (i = 0; i < dd->num_msix_entries; i++, me++) {
-			if (me->arg == NULL) /* => no irq, no affinity */
-				break;
-			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
-					NULL);
+			if (!me->arg) /* => no irq, no affinity */
+				continue;
+			hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
 			free_irq(me->msix.vector, me->arg);
 		}
 	} else {
@@ -12064,8 +12396,6 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
 	}
 
 	/* clean structures */
-	for (i = 0; i < dd->num_msix_entries; i++)
-		free_cpumask_var(dd->msix_entries[i].mask);
 	kfree(dd->msix_entries);
 	dd->msix_entries = NULL;
 	dd->num_msix_entries = 0;
@@ -12088,10 +12418,10 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
 	/* direct the chip source to the given MSI-X interrupt */
 	m = isrc / 8;
 	n = isrc % 8;
-	reg = read_csr(dd, CCE_INT_MAP + (8*m));
-	reg &= ~((u64)0xff << (8*n));
-	reg |= ((u64)msix_intr & 0xff) << (8*n);
-	write_csr(dd, CCE_INT_MAP + (8*m), reg);
+	reg = read_csr(dd, CCE_INT_MAP + (8 * m));
+	reg &= ~((u64)0xff << (8 * n));
+	reg |= ((u64)msix_intr & 0xff) << (8 * n);
+	write_csr(dd, CCE_INT_MAP + (8 * m), reg);
 }
 
 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
@@ -12104,12 +12434,12 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
 	 *	SDMAProgress
 	 *	SDMAIdle
 	 */
-	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
+	remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
 }
 
 static int request_intx_irq(struct hfi1_devdata *dd)
@@ -12119,10 +12449,10 @@ static int request_intx_irq(struct hfi1_devdata *dd)
 	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
 		 dd->unit);
 	ret = request_irq(dd->pcidev->irq, general_interrupt,
-				  IRQF_SHARED, dd->intx_name, dd);
+			  IRQF_SHARED, dd->intx_name, dd);
 	if (ret)
 		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
-				ret);
+			   ret);
 	else
 		dd->requested_intx_irq = 1;
 	return ret;
@@ -12130,70 +12460,20 @@ static int request_intx_irq(struct hfi1_devdata *dd)
 
 static int request_msix_irqs(struct hfi1_devdata *dd)
 {
-	const struct cpumask *local_mask;
-	cpumask_var_t def, rcv;
-	bool def_ret, rcv_ret;
 	int first_general, last_general;
 	int first_sdma, last_sdma;
 	int first_rx, last_rx;
-	int first_cpu, curr_cpu;
-	int rcv_cpu, sdma_cpu;
-	int i, ret = 0, possible;
-	int ht;
+	int i, ret = 0;
 
 	/* calculate the ranges we are going to use */
 	first_general = 0;
-	first_sdma = last_general = first_general + 1;
-	first_rx = last_sdma = first_sdma + dd->num_sdma;
+	last_general = first_general + 1;
+	first_sdma = last_general;
+	last_sdma = first_sdma + dd->num_sdma;
+	first_rx = last_sdma;
 	last_rx = first_rx + dd->n_krcv_queues;
 
 	/*
-	 * Interrupt affinity.
-	 *
-	 * non-rcv avail gets a default mask that
-	 * starts as possible cpus with threads reset
-	 * and each rcv avail reset.
-	 *
-	 * rcv avail gets node relative 1 wrapping back
-	 * to the node relative 1 as necessary.
-	 *
-	 */
-	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
-	/* if first cpu is invalid, use NUMA 0 */
-	if (cpumask_first(local_mask) >= nr_cpu_ids)
-		local_mask = topology_core_cpumask(0);
-
-	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
-	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
-	if (!def_ret || !rcv_ret)
-		goto bail;
-	/* use local mask as default */
-	cpumask_copy(def, local_mask);
-	possible = cpumask_weight(def);
-	/* disarm threads from default */
-	ht = cpumask_weight(
-			topology_sibling_cpumask(cpumask_first(local_mask)));
-	for (i = possible/ht; i < possible; i++)
-		cpumask_clear_cpu(i, def);
-	/* def now has full cores on chosen node*/
-	first_cpu = cpumask_first(def);
-	if (nr_cpu_ids >= first_cpu)
-		first_cpu++;
-	curr_cpu = first_cpu;
-
-	/*  One context is reserved as control context */
-	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
-		cpumask_clear_cpu(curr_cpu, def);
-		cpumask_set_cpu(curr_cpu, rcv);
-		curr_cpu = cpumask_next(curr_cpu, def);
-		if (curr_cpu >= nr_cpu_ids)
-			break;
-	}
-	/* def mask has non-rcv, rcv has recv mask */
-	rcv_cpu = cpumask_first(rcv);
-	sdma_cpu = cpumask_first(def);
-
-	/*
 	 * Sanity check - the code expects all SDMA chip source
 	 * interrupts to be in the same CSR, starting at bit 0.  Verify
 	 * that this is true by checking the bit location of the start.
@@ -12218,6 +12498,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 			snprintf(me->name, sizeof(me->name),
 				 DRIVER_NAME "_%d", dd->unit);
 			err_info = "general";
+			me->type = IRQ_GENERAL;
 		} else if (first_sdma <= i && i < last_sdma) {
 			idx = i - first_sdma;
 			sde = &dd->per_sdma[idx];
@@ -12227,6 +12508,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 				 DRIVER_NAME "_%d sdma%d", dd->unit, idx);
 			err_info = "sdma";
 			remap_sdma_interrupts(dd, idx, i);
+			me->type = IRQ_SDMA;
 		} else if (first_rx <= i && i < last_rx) {
 			idx = i - first_rx;
 			rcd = dd->rcd[idx];
@@ -12237,9 +12519,9 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 			 * Set the interrupt register and mask for this
 			 * context's interrupt.
 			 */
-			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
+			rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
 			rcd->imask = ((u64)1) <<
-					((IS_RCVAVAIL_START+idx) % 64);
+					((IS_RCVAVAIL_START + idx) % 64);
 			handler = receive_context_interrupt;
 			thread = receive_context_thread;
 			arg = rcd;
@@ -12247,25 +12529,27 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 				 DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
 			err_info = "receive context";
 			remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+			me->type = IRQ_RCVCTXT;
 		} else {
 			/* not in our expected range - complain, then
-			   ignore it */
+			 * ignore it
+			 */
 			dd_dev_err(dd,
-				"Unexpected extra MSI-X interrupt %d\n", i);
+				   "Unexpected extra MSI-X interrupt %d\n", i);
 			continue;
 		}
 		/* no argument, no interrupt */
-		if (arg == NULL)
+		if (!arg)
 			continue;
 		/* make sure the name is terminated */
-		me->name[sizeof(me->name)-1] = 0;
+		me->name[sizeof(me->name) - 1] = 0;
 
 		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
-						me->name, arg);
+					   me->name, arg);
 		if (ret) {
 			dd_dev_err(dd,
-				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
-				 err_info, me->msix.vector, idx, ret);
+				   "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
+				   err_info, me->msix.vector, idx, ret);
 			return ret;
 		}
 		/*
@@ -12274,52 +12558,13 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 		 */
 		me->arg = arg;
 
-		if (!zalloc_cpumask_var(
-			&dd->msix_entries[i].mask,
-			GFP_KERNEL))
-			goto bail;
-		if (handler == sdma_interrupt) {
-			dd_dev_info(dd, "sdma engine %d cpu %d\n",
-				sde->this_idx, sdma_cpu);
-			sde->cpu = sdma_cpu;
-			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
-			sdma_cpu = cpumask_next(sdma_cpu, def);
-			if (sdma_cpu >= nr_cpu_ids)
-				sdma_cpu = cpumask_first(def);
-		} else if (handler == receive_context_interrupt) {
-			dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
-				    (rcd->ctxt == HFI1_CTRL_CTXT) ?
-					    cpumask_first(def) : rcv_cpu);
-			if (rcd->ctxt == HFI1_CTRL_CTXT) {
-				/* map to first default */
-				cpumask_set_cpu(cpumask_first(def),
-						dd->msix_entries[i].mask);
-			} else {
-				cpumask_set_cpu(rcv_cpu,
-						dd->msix_entries[i].mask);
-				rcv_cpu = cpumask_next(rcv_cpu, rcv);
-				if (rcv_cpu >= nr_cpu_ids)
-					rcv_cpu = cpumask_first(rcv);
-			}
-		} else {
-			/* otherwise first def */
-			dd_dev_info(dd, "%s cpu %d\n",
-				err_info, cpumask_first(def));
-			cpumask_set_cpu(
-				cpumask_first(def), dd->msix_entries[i].mask);
-		}
-		irq_set_affinity_hint(
-			dd->msix_entries[i].msix.vector,
-			dd->msix_entries[i].mask);
+		ret = hfi1_get_irq_affinity(dd, me);
+		if (ret)
+			dd_dev_err(dd,
+				   "unable to pin IRQ %d\n", ret);
 	}
 
-out:
-	free_cpumask_var(def);
-	free_cpumask_var(rcv);
 	return ret;
-bail:
-	ret = -ENOMEM;
-	goto  out;
 }
 
 /*
@@ -12336,7 +12581,7 @@ static void reset_interrupts(struct hfi1_devdata *dd)
 
 	/* all chip interrupts map to MSI-X 0 */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP + (8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 }
 
 static int set_up_interrupts(struct hfi1_devdata *dd)
@@ -12445,7 +12690,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		 */
 		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
 	else
-		num_kernel_contexts = num_online_nodes();
+		num_kernel_contexts = num_online_nodes() + 1;
 	num_kernel_contexts =
 		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
 	/*
@@ -12486,13 +12731,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	dd->num_rcv_contexts = total_contexts;
 	dd->n_krcv_queues = num_kernel_contexts;
 	dd->first_user_ctxt = num_kernel_contexts;
+	dd->num_user_contexts = num_user_contexts;
 	dd->freectxts = num_user_contexts;
 	dd_dev_info(dd,
-		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
-		(int)dd->chip_rcv_contexts,
-		(int)dd->num_rcv_contexts,
-		(int)dd->n_krcv_queues,
-		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
+		    "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+		    (int)dd->chip_rcv_contexts,
+		    (int)dd->num_rcv_contexts,
+		    (int)dd->n_krcv_queues,
+		    (int)dd->num_rcv_contexts - dd->n_krcv_queues);
 
 	/*
 	 * Receive array allocation:
@@ -12518,8 +12764,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
 			dd->rcv_entries.group_size;
 		dd_dev_info(dd,
-		   "RcvArray group count too high, change to %u\n",
-		   dd->rcv_entries.ngroups);
+			    "RcvArray group count too high, change to %u\n",
+			    dd->rcv_entries.ngroups);
 		dd->rcv_entries.nctxt_extra = 0;
 	}
 	/*
@@ -12585,7 +12831,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 
 	/* CceIntMap */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP+(8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 
 	/* SendCtxtCreditReturnAddr */
 	for (i = 0; i < dd->chip_send_contexts; i++)
@@ -12593,8 +12839,10 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 
 	/* PIO Send buffers */
 	/* SDMA Send buffers */
-	/* These are not normally read, and (presently) have no method
-	   to be read, so are not pre-initialized */
+	/*
+	 * These are not normally read, and (presently) have no method
+	 * to be read, so are not pre-initialized
+	 */
 
 	/* RcvHdrAddr */
 	/* RcvHdrTailAddr */
@@ -12603,13 +12851,13 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
+			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0);
 	}
 
 	/* RcvArray */
 	for (i = 0; i < dd->chip_rcv_array_count; i++)
-		write_csr(dd, RCV_ARRAY + (8*i),
-					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+		write_csr(dd, RCV_ARRAY + (8 * i),
+			  RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
 
 	/* RcvQPMapTable */
 	for (i = 0; i < 32; i++)
@@ -12641,8 +12889,8 @@ static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
 			return;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
-				status_bits, reg & status_bits);
+				   "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
+				   status_bits, reg & status_bits);
 			return;
 		}
 		udelay(1);
@@ -12674,7 +12922,7 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
 	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
 		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
 		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
-					CCE_MSIX_TABLE_UPPER_RESETCSR);
+			  CCE_MSIX_TABLE_UPPER_RESETCSR);
 	}
 	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
 		/* CCE_MSIX_PBA read-only */
@@ -12694,91 +12942,6 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
 		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
 }
 
-/* set ASIC CSRs to chip reset defaults */
-static void reset_asic_csrs(struct hfi1_devdata *dd)
-{
-	int i;
-
-	/*
-	 * If the HFIs are shared between separate nodes or VMs,
-	 * then more will need to be done here.  One idea is a module
-	 * parameter that returns early, letting the first power-on or
-	 * a known first load do the reset and blocking all others.
-	 */
-
-	if (!(dd->flags & HFI1_DO_INIT_ASIC))
-		return;
-
-	if (dd->icode != ICODE_FPGA_EMULATION) {
-		/* emulation does not have an SBus - leave these alone */
-		/*
-		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
-		 * Notes:
-		 * o The reset is not zero if aimed at the core.  See the
-		 *   SBus documentation for details.
-		 * o If the SBus firmware has been updated (e.g. by the BIOS),
-		 *   will the reset revert that?
-		 */
-		/* ASIC_CFG_SBUS_REQUEST leave alone */
-		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
-	}
-	/* ASIC_SBUS_RESULT read-only */
-	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
-	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
-		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
-	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
-
-	/* We might want to retain this state across FLR if we ever use it */
-	write_csr(dd, ASIC_CFG_DRV_STR, 0);
-
-	/* ASIC_CFG_THERM_POLL_EN leave alone */
-	/* ASIC_STS_THERM read-only */
-	/* ASIC_CFG_RESET leave alone */
-
-	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
-	/* ASIC_PCIE_SD_HOST_STATUS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
-	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
-	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	for (i = 0; i < 16; i++)
-		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
-
-	/* ASIC_GPIO_IN read-only */
-	write_csr(dd, ASIC_GPIO_OE, 0);
-	write_csr(dd, ASIC_GPIO_INVERT, 0);
-	write_csr(dd, ASIC_GPIO_OUT, 0);
-	write_csr(dd, ASIC_GPIO_MASK, 0);
-	/* ASIC_GPIO_STATUS read-only */
-	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
-	/* ASIC_GPIO_FORCE leave alone */
-
-	/* ASIC_QSFP1_IN read-only */
-	write_csr(dd, ASIC_QSFP1_OE, 0);
-	write_csr(dd, ASIC_QSFP1_INVERT, 0);
-	write_csr(dd, ASIC_QSFP1_OUT, 0);
-	write_csr(dd, ASIC_QSFP1_MASK, 0);
-	/* ASIC_QSFP1_STATUS read-only */
-	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
-	/* ASIC_QSFP1_FORCE leave alone */
-
-	/* ASIC_QSFP2_IN read-only */
-	write_csr(dd, ASIC_QSFP2_OE, 0);
-	write_csr(dd, ASIC_QSFP2_INVERT, 0);
-	write_csr(dd, ASIC_QSFP2_OUT, 0);
-	write_csr(dd, ASIC_QSFP2_MASK, 0);
-	/* ASIC_QSFP2_STATUS read-only */
-	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
-	/* ASIC_QSFP2_FORCE leave alone */
-
-	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
-	/* this also writes a NOP command, clearing paging mode */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
-	write_csr(dd, ASIC_EEP_DATA, 0);
-}
-
 /* set MISC CSRs to chip reset defaults */
 static void reset_misc_csrs(struct hfi1_devdata *dd)
 {
@@ -12789,8 +12952,10 @@ static void reset_misc_csrs(struct hfi1_devdata *dd)
 		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
 		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
 	}
-	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
-	   only be written 128-byte chunks */
+	/*
+	 * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
+	 * only be written 128-byte chunks
+	 */
 	/* init RSA engine to clear lingering errors */
 	write_csr(dd, MISC_CFG_RSA_CMD, 1);
 	write_csr(dd, MISC_CFG_RSA_MU, 0);
@@ -12846,18 +13011,17 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
 	/* SEND_ERR_FORCE read-only */
 	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
+		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
-	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
-		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
+		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
+	for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0);
 	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
-	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-					SEND_CM_GLOBAL_CREDIT_RESETCSR);
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR);
 	/* SEND_CM_CREDIT_USED_STATUS read-only */
 	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
@@ -12865,7 +13029,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	/* SEND_CM_CREDIT_USED_VL read-only */
 	/* SEND_CM_CREDIT_USED_VL15 read-only */
@@ -12951,8 +13115,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
 		 */
 		if (count++ > 500) {
 			dd_dev_err(dd,
-				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
-				__func__, reg);
+				   "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
+				   __func__, reg);
 			break;
 		}
 		udelay(2); /* do not busy-wait the CSR */
@@ -12981,8 +13145,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
 		/* give up after 100us - slowest possible at 33MHz is 73us */
 		if (count++ > 50) {
 			dd_dev_err(dd,
-				"%s: RcvStatus.RxRbufInit not set, continuing\n",
-				__func__);
+				   "%s: RcvStatus.RxRbufInit not set, continuing\n",
+				   __func__);
 			break;
 		}
 	}
@@ -13008,7 +13172,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, RCV_VL15, 0);
 	/* this is a clear-down */
 	write_csr(dd, RCV_ERR_INFO,
-			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
+		  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 	/* RCV_ERR_STATUS read-only */
 	write_csr(dd, RCV_ERR_MASK, 0);
 	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
@@ -13054,8 +13218,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
 		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
 		/* RCV_EGR_OFFSET_TAIL read-only */
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
-				0);
+			write_uctxt_csr(dd, i,
+					RCV_TID_FLOW_TABLE + (8 * j), 0);
 		}
 	}
 }
@@ -13157,7 +13321,7 @@ static void init_chip(struct hfi1_devdata *dd)
 		write_csr(dd, RCV_CTXT_CTRL, 0);
 	/* mask all interrupt sources */
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+		write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 
 	/*
 	 * DC Reset: do a full DC reset before the register clear.
@@ -13166,7 +13330,7 @@ static void init_chip(struct hfi1_devdata *dd)
 	 * across the clear.
 	 */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	if (use_flr) {
 		/*
@@ -13187,22 +13351,19 @@ static void init_chip(struct hfi1_devdata *dd)
 			hfi1_pcie_flr(dd);
 			restore_pci_variables(dd);
 		}
-
-		reset_asic_csrs(dd);
 	} else {
 		dd_dev_info(dd, "Resetting CSRs with writes\n");
 		reset_cce_csrs(dd);
 		reset_txe_csrs(dd);
 		reset_rxe_csrs(dd);
-		reset_asic_csrs(dd);
 		reset_misc_csrs(dd);
 	}
 	/* clear the DC reset */
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
+
 	/*
 	 * Clear the QSFP reset.
 	 * An FLR enforces a 0 on all out pins. The driver does not touch
@@ -13215,6 +13376,7 @@ static void init_chip(struct hfi1_devdata *dd)
 	 */
 	write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
 	write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
+	init_chip_resources(dd);
 }
 
 static void init_early_variables(struct hfi1_devdata *dd)
@@ -13255,12 +13417,12 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
 		kdeth_qp = DEFAULT_KDETH_QP;
 
 	write_csr(dd, SEND_BTH_QP,
-			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
-				<< SEND_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+		  SEND_BTH_QP_KDETH_QP_SHIFT);
 
 	write_csr(dd, RCV_BTH_QP,
-			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
-				<< RCV_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+		  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
 /**
@@ -13385,22 +13547,21 @@ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
 	/* add rule0 */
 	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
-		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
-			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
-		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
+		  2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
 	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
-		LRH_BTH_MATCH_OFFSET
-			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
-		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
-		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
-		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
-		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
-		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+		  LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+		  LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+		  LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+		  ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+		  QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+		  ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
 	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
-		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
-		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
-		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
-		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
+		  LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
+		  LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
+		  LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
+		  LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
 	/* Enable RSM */
 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 	kfree(rsmmap);
@@ -13418,9 +13579,8 @@ static void init_rxe(struct hfi1_devdata *dd)
 	/* enable all receive errors */
 	write_csr(dd, RCV_ERR_MASK, ~0ull);
 	/* setup QPN map table - start where VL15 context leaves off */
-	init_qos(
-		dd,
-		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
+	init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
+		 MIN_KERNEL_KCTXTS : 0);
 	/*
 	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
 	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@ -13457,36 +13617,33 @@ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
 			       u32 csr0to3, u32 csr4to7)
 {
 	write_csr(dd, csr0to3,
-		   0ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
-		|  1ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
-		|  2ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
-		|  4ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
+		  0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT |
+		  1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT |
+		  2ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT |
+		  4ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
 	write_csr(dd, csr4to7,
-		   8ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
-		| 16ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
-		| 32ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
-		| 64ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
-
+		  8ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT |
+		  16ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT |
+		  32ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT |
+		  64ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
 }
 
 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
-					SEND_CM_LOCAL_AU_TABLE4_TO7);
+			   SEND_CM_LOCAL_AU_TABLE4_TO7);
 }
 
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
-					SEND_CM_REMOTE_AU_TABLE4_TO7);
+			   SEND_CM_REMOTE_AU_TABLE4_TO7);
 }
 
 static void init_txe(struct hfi1_devdata *dd)
@@ -13537,7 +13694,6 @@ int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
 	write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
 	/*
 	 * Enable send-side J_KEY integrity check, unless this is A0 h/w
-	 * (due to A0 erratum).
 	 */
 	if (!is_ax(dd)) {
 		reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
@@ -13590,9 +13746,9 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13618,9 +13774,9 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13643,24 +13799,26 @@ done:
  */
 void hfi1_start_cleanup(struct hfi1_devdata *dd)
 {
+	aspm_exit(dd);
 	free_cntrs(dd);
 	free_rcverr(dd);
 	clean_up_interrupts(dd);
+	finish_chip_resources(dd);
 }
 
 #define HFI_BASE_GUID(dev) \
 	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
 
 /*
- * Certain chip functions need to be initialized only once per asic
- * instead of per-device. This function finds the peer device and
- * checks whether that chip initialization needs to be done by this
- * device.
+ * Information can be shared between the two HFIs on the same ASIC
+ * in the same OS.  This function finds the peer device and sets
+ * up a shared structure.
  */
-static void asic_should_init(struct hfi1_devdata *dd)
+static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	int ret = 0;
 
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
@@ -13672,13 +13830,21 @@ static void asic_should_init(struct hfi1_devdata *dd)
 		}
 	}
 
-	/*
-	 * "Claim" the ASIC for initialization if it hasn't been
-	 " "claimed" yet.
-	 */
-	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
-		dd->flags |= HFI1_DO_INIT_ASIC;
+	if (peer) {
+		dd->asic_data = peer->asic_data;
+	} else {
+		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+		if (!dd->asic_data) {
+			ret = -ENOMEM;
+			goto done;
+		}
+		mutex_init(&dd->asic_data->asic_resource_mutex);
+	}
+	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
+
+done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+	return ret;
 }
 
 /*
@@ -13698,7 +13864,7 @@ static int obtain_boardname(struct hfi1_devdata *dd)
 	ret = read_hfi1_efi_var(dd, "description", &size,
 				(void **)&dd->boardname);
 	if (ret) {
-		dd_dev_err(dd, "Board description not found\n");
+		dd_dev_info(dd, "Board description not found\n");
 		/* use generic description */
 		dd->boardname = kstrdup(generic, GFP_KERNEL);
 		if (!dd->boardname)
@@ -13707,6 +13873,50 @@ static int obtain_boardname(struct hfi1_devdata *dd)
 	return 0;
 }
 
+/*
+ * Check the interrupt registers to make sure that they are mapped correctly.
+ * It is intended to help user identify any mismapping by VMM when the driver
+ * is running in a VM. This function should only be called before interrupt
+ * is set up properly.
+ *
+ * Return 0 on success, -EINVAL on failure.
+ */
+static int check_int_registers(struct hfi1_devdata *dd)
+{
+	u64 reg;
+	u64 all_bits = ~(u64)0;
+	u64 mask;
+
+	/* Clear CceIntMask[0] to avoid raising any interrupts */
+	mask = read_csr(dd, CCE_INT_MASK);
+	write_csr(dd, CCE_INT_MASK, 0ull);
+	reg = read_csr(dd, CCE_INT_MASK);
+	if (reg)
+		goto err_exit;
+
+	/* Clear all interrupt status bits */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg)
+		goto err_exit;
+
+	/* Set all interrupt status bits */
+	write_csr(dd, CCE_INT_FORCE, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg != all_bits)
+		goto err_exit;
+
+	/* Restore the interrupt mask */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	write_csr(dd, CCE_INT_MASK, mask);
+
+	return 0;
+err_exit:
+	write_csr(dd, CCE_INT_MASK, mask);
+	dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n");
+	return -EINVAL;
+}
+
 /**
  * Allocate and initialize the device structure for the hfi.
  * @dev: the pci_dev for hfi1_ib device
@@ -13731,9 +13941,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		"RTL FPGA emulation",
 		"Functional simulator"
 	};
+	struct pci_dev *parent = pdev->bus->self;
 
-	dd = hfi1_alloc_devdata(pdev,
-		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
+	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+				sizeof(struct hfi1_pportdata));
 	if (IS_ERR(dd))
 		goto bail;
 	ppd = dd->pport;
@@ -13754,8 +13965,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		/* link width active is 0 when link is down */
 		/* link width downgrade active is 0 when link is down */
 
-		if (num_vls < HFI1_MIN_VLS_SUPPORTED
-			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
+		if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
+		    num_vls > HFI1_MAX_VLS_SUPPORTED) {
 			hfi1_early_err(&pdev->dev,
 				       "Invalid num_vls %u, using %u VLs\n",
 				    num_vls, HFI1_MAX_VLS_SUPPORTED);
@@ -13763,6 +13974,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		}
 		ppd->vls_supported = num_vls;
 		ppd->vls_operational = ppd->vls_supported;
+		ppd->actual_vls_operational = ppd->vls_supported;
 		/* Set the default MTU. */
 		for (vl = 0; vl < num_vls; vl++)
 			dd->vld[vl].mtu = hfi1_max_mtu;
@@ -13782,6 +13994,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		/* start in offline */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		init_vl_arb_caches(ppd);
+		ppd->last_pstate = 0xff; /* invalid value */
 	}
 
 	dd->link_default = HLS_DN_POLL;
@@ -13807,8 +14020,21 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
 
-	/* obtain the hardware ID - NOT related to unit, which is a
-	   software enumeration */
+	/*
+	 * Check interrupt registers mapping if the driver has no access to
+	 * the upstream component. In this case, it is likely that the driver
+	 * is running in a VM.
+	 */
+	if (!parent) {
+		ret = check_int_registers(dd);
+		if (ret)
+			goto bail_cleanup;
+	}
+
+	/*
+	 * obtain the hardware ID - NOT related to unit, which is a
+	 * software enumeration
+	 */
 	reg = read_csr(dd, CCE_REVISION2);
 	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
 					& CCE_REVISION2_HFI_ID_MASK;
@@ -13816,8 +14042,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
 	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
 	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
-		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
-		(int)dd->irev);
+		    dd->icode < ARRAY_SIZE(inames) ?
+		    inames[dd->icode] : "unknown", (int)dd->irev);
 
 	/* speeds the hardware can support */
 	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
@@ -13846,6 +14072,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 			   num_vls, dd->chip_sdma_engines);
 		num_vls = dd->chip_sdma_engines;
 		ppd->vls_supported = dd->chip_sdma_engines;
+		ppd->vls_operational = ppd->vls_supported;
 	}
 
 	/*
@@ -13867,8 +14094,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	/* needs to be done before we look for the peer device */
 	read_guid(dd);
 
-	/* should this device init the ASIC block? */
-	asic_should_init(dd);
+	/* set up shared ASIC data with peer device */
+	ret = init_asic_data(dd);
+	if (ret)
+		goto bail_cleanup;
 
 	/* obtain chip sizes, reset chip CSRs */
 	init_chip(dd);
@@ -13878,6 +14107,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	if (ret)
 		goto bail_cleanup;
 
+	/* Needs to be called before hfi1_firmware_init */
+	get_platform_config(dd);
+
 	/* read in firmware */
 	ret = hfi1_firmware_init(dd);
 	if (ret)
@@ -13929,6 +14161,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	/* set up KDETH QP prefix in both RX and TX CSRs */
 	init_kdeth_qp(dd);
 
+	ret = hfi1_dev_affinity_init(dd);
+	if (ret)
+		goto bail_cleanup;
+
 	/* send contexts must be set up before receive contexts */
 	ret = init_send_contexts(dd);
 	if (ret)
@@ -14026,7 +14262,6 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
 	return (u16)delta_cycles;
 }
 
-
 /**
  * create_pbc - build a pbc for transmission
  * @flags: special case flags or-ed in built pbc
@@ -14082,10 +14317,15 @@ static int thermal_init(struct hfi1_devdata *dd)
 	int ret = 0;
 
 	if (dd->icode != ICODE_RTL_SILICON ||
-	    !(dd->flags & HFI1_DO_INIT_ASIC))
+	    check_chip_resource(dd, CR_THERM_INIT, NULL))
 		return ret;
 
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		THERM_FAILURE(dd, ret, "Acquire SBus");
+		return ret;
+	}
+
 	dd_dev_info(dd, "Initializing thermal sensor\n");
 	/* Disable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
@@ -14132,8 +14372,14 @@ static int thermal_init(struct hfi1_devdata *dd)
 
 	/* Enable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
+
+	/* Set initialized flag */
+	ret = acquire_chip_resource(dd, CR_THERM_INIT, 0);
+	if (ret)
+		THERM_FAILURE(dd, ret, "Unable to set thermal init flag");
+
 done:
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 	return ret;
 }
 
@@ -14148,7 +14394,7 @@ static void handle_temp_err(struct hfi1_devdata *dd)
 	dd_dev_emerg(dd,
 		     "Critical temperature reached! Forcing device into freeze mode!\n");
 	dd->flags |= HFI1_FORCED_FREEZE;
-	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
+	start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT);
 	/*
 	 * Shut DC down as much and as quickly as possible.
 	 *
@@ -14162,8 +14408,8 @@ static void handle_temp_err(struct hfi1_devdata *dd)
 	 */
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
-	set_physical_link_state(dd, PLS_OFFLINE |
-				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
+	set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) |
+				PLS_OFFLINE);
 	/*
 	 * Step 2: Shutdown LCB and 8051
 	 *         After shutdown, do not restore DC_CFG_RESET value.
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h
index 5b375ddc345d..4f3b878e43eb 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/staging/rdma/hfi1/chip.h
@@ -1,14 +1,13 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -79,8 +76,10 @@
 #define PIO_CMASK 0x7ff	/* counter mask for free and fill counters */
 #define MAX_EAGER_ENTRIES    2048	/* max receive eager entries */
 #define MAX_TID_PAIR_ENTRIES 1024	/* max receive expected pairs */
-/* Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
-   at 64 bytes for all generation one devices */
+/*
+ * Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
+ * at 64 bytes for all generation one devices
+ */
 #define CM_VAU 3
 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
 #define CM_GLOBAL_CREDITS 0x940
@@ -93,15 +92,15 @@
 #define TXE_PIO_SEND (TXE + TXE_PIO_SEND_OFFSET)
 
 /* PBC flags */
-#define PBC_INTR		(1ull << 31)
+#define PBC_INTR		BIT_ULL(31)
 #define PBC_DC_INFO_SHIFT	(30)
-#define PBC_DC_INFO		(1ull << PBC_DC_INFO_SHIFT)
-#define PBC_TEST_EBP	(1ull << 29)
-#define PBC_PACKET_BYPASS	(1ull << 28)
-#define PBC_CREDIT_RETURN	(1ull << 25)
-#define PBC_INSERT_BYPASS_ICRC (1ull << 24)
-#define PBC_TEST_BAD_ICRC	(1ull << 23)
-#define PBC_FECN		(1ull << 22)
+#define PBC_DC_INFO		BIT_ULL(PBC_DC_INFO_SHIFT)
+#define PBC_TEST_EBP		BIT_ULL(29)
+#define PBC_PACKET_BYPASS	BIT_ULL(28)
+#define PBC_CREDIT_RETURN	BIT_ULL(25)
+#define PBC_INSERT_BYPASS_ICRC	BIT_ULL(24)
+#define PBC_TEST_BAD_ICRC	BIT_ULL(23)
+#define PBC_FECN		BIT_ULL(22)
 
 /* PbcInsertHcrc field settings */
 #define PBC_IHCRC_LKDETH 0x0	/* insert @ local KDETH offset */
@@ -212,7 +211,7 @@
 #define PLS_CONFIGPHY_DEBOUCE		   0x40
 #define PLS_CONFIGPHY_ESTCOMM		   0x41
 #define PLS_CONFIGPHY_ESTCOMM_TXRX_HUNT	   0x42
-#define PLS_CONFIGPHY_ESTcOMM_LOCAL_COMPLETE   0x43
+#define PLS_CONFIGPHY_ESTCOMM_LOCAL_COMPLETE   0x43
 #define PLS_CONFIGPHY_OPTEQ			   0x44
 #define PLS_CONFIGPHY_OPTEQ_OPTIMIZING	   0x44
 #define PLS_CONFIGPHY_OPTEQ_LOCAL_COMPLETE	   0x45
@@ -242,36 +241,37 @@
 #define HCMD_SUCCESS 2
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR - error flags */
-#define SPICO_ROM_FAILED		    (1 <<  0)
-#define UNKNOWN_FRAME		    (1 <<  1)
-#define TARGET_BER_NOT_MET		    (1 <<  2)
-#define FAILED_SERDES_INTERNAL_LOOPBACK (1 <<  3)
-#define FAILED_SERDES_INIT		    (1 <<  4)
-#define FAILED_LNI_POLLING		    (1 <<  5)
-#define FAILED_LNI_DEBOUNCE		    (1 <<  6)
-#define FAILED_LNI_ESTBCOMM		    (1 <<  7)
-#define FAILED_LNI_OPTEQ		    (1 <<  8)
-#define FAILED_LNI_VERIFY_CAP1	    (1 <<  9)
-#define FAILED_LNI_VERIFY_CAP2	    (1 << 10)
-#define FAILED_LNI_CONFIGLT		    (1 << 11)
+#define SPICO_ROM_FAILED		BIT(0)
+#define UNKNOWN_FRAME			BIT(1)
+#define TARGET_BER_NOT_MET		BIT(2)
+#define FAILED_SERDES_INTERNAL_LOOPBACK	BIT(3)
+#define FAILED_SERDES_INIT		BIT(4)
+#define FAILED_LNI_POLLING		BIT(5)
+#define FAILED_LNI_DEBOUNCE		BIT(6)
+#define FAILED_LNI_ESTBCOMM		BIT(7)
+#define FAILED_LNI_OPTEQ		BIT(8)
+#define FAILED_LNI_VERIFY_CAP1		BIT(9)
+#define FAILED_LNI_VERIFY_CAP2		BIT(10)
+#define FAILED_LNI_CONFIGLT		BIT(11)
+#define HOST_HANDSHAKE_TIMEOUT		BIT(12)
 
 #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
 			| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
 			| FAILED_LNI_VERIFY_CAP1 \
 			| FAILED_LNI_VERIFY_CAP2 \
-			| FAILED_LNI_CONFIGLT)
+			| FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
-#define HOST_REQ_DONE	   (1 << 0)
-#define BC_PWR_MGM_MSG	   (1 << 1)
-#define BC_SMA_MSG		   (1 << 2)
-#define BC_BCC_UNKOWN_MSG	   (1 << 3)
-#define BC_IDLE_UNKNOWN_MSG	   (1 << 4)
-#define EXT_DEVICE_CFG_REQ	   (1 << 5)
-#define VERIFY_CAP_FRAME	   (1 << 6)
-#define LINKUP_ACHIEVED	   (1 << 7)
-#define LINK_GOING_DOWN	   (1 << 8)
-#define LINK_WIDTH_DOWNGRADED  (1 << 9)
+#define HOST_REQ_DONE		BIT(0)
+#define BC_PWR_MGM_MSG		BIT(1)
+#define BC_SMA_MSG		BIT(2)
+#define BC_BCC_UNKNOWN_MSG	BIT(3)
+#define BC_IDLE_UNKNOWN_MSG	BIT(4)
+#define EXT_DEVICE_CFG_REQ	BIT(5)
+#define VERIFY_CAP_FRAME	BIT(6)
+#define LINKUP_ACHIEVED		BIT(7)
+#define LINK_GOING_DOWN		BIT(8)
+#define LINK_WIDTH_DOWNGRADED	BIT(9)
 
 /* DC_DC8051_CFG_EXT_DEV_1.REQ_TYPE - 8051 host requests */
 #define HREQ_LOAD_CONFIG	0x01
@@ -335,14 +335,14 @@
  * the CSR fields hold multiples of this value.
  */
 #define RCV_SHIFT 3
-#define RCV_INCREMENT (1 << RCV_SHIFT)
+#define RCV_INCREMENT BIT(RCV_SHIFT)
 
 /*
  * Receive header queue entry increment - the CSR holds multiples of
  * this value.
  */
 #define HDRQ_SIZE_SHIFT 5
-#define HDRQ_INCREMENT (1 << HDRQ_SIZE_SHIFT)
+#define HDRQ_INCREMENT BIT(HDRQ_SIZE_SHIFT)
 
 /*
  * Freeze handling flags
@@ -371,6 +371,9 @@
 #define NUM_LANE_FIELDS    0x8
 
 /* 8051 general register Field IDs */
+#define LINK_OPTIMIZATION_SETTINGS   0x00
+#define LINK_TUNING_PARAMETERS	     0x02
+#define DC_HOST_COMM_SETTINGS	     0x03
 #define TX_SETTINGS		     0x06
 #define VERIFY_CAP_LOCAL_PHY	     0x07
 #define VERIFY_CAP_LOCAL_FABRIC	     0x08
@@ -387,6 +390,10 @@
 #define LINK_QUALITY_INFO            0x14
 #define REMOTE_DEVICE_ID	     0x15
 
+/* 8051 lane specific register field IDs */
+#define TX_EQ_SETTINGS		0x00
+#define CHANNEL_LOSS_SETTINGS	0x05
+
 /* Lane ID for general configuration registers */
 #define GENERAL_CONFIG 4
 
@@ -511,8 +518,10 @@ enum {
 #define LCB_CRC_48B			0x2	/* 48b CRC */
 #define LCB_CRC_12B_16B_PER_LANE	0x3	/* 12b-16b per lane CRC */
 
-/* the following enum is (almost) a copy/paste of the definition
- * in the OPA spec, section 20.2.2.6.8 (PortInfo) */
+/*
+ * the following enum is (almost) a copy/paste of the definition
+ * in the OPA spec, section 20.2.2.6.8 (PortInfo)
+ */
 enum {
 	PORT_LTP_CRC_MODE_NONE = 0,
 	PORT_LTP_CRC_MODE_14 = 1, /* 14-bit LTP CRC mode (optional) */
@@ -614,6 +623,8 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
 #define NUM_PCIE_SERDES 16	/* number of PCIe serdes on the SBus */
 extern const u8 pcie_serdes_broadcast[];
 extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES];
+extern uint platform_config_load;
+
 /* SBus commands */
 #define RESET_SBUS_RECEIVER 0x20
 #define WRITE_SBUS_RECEIVER 0x21
@@ -629,6 +640,42 @@ int load_firmware(struct hfi1_devdata *dd);
 void dispose_firmware(void);
 int acquire_hw_mutex(struct hfi1_devdata *dd);
 void release_hw_mutex(struct hfi1_devdata *dd);
+
+/*
+ * Bitmask of dynamic access for ASIC block chip resources.  Each HFI has its
+ * own range of bits for the resource so it can clear its own bits on
+ * starting and exiting.  If either HFI has the resource bit set, the
+ * resource is in use.  The separate bit ranges are:
+ *	HFI0 bits  7:0
+ *	HFI1 bits 15:8
+ */
+#define CR_SBUS  0x01	/* SBUS, THERM, and PCIE registers */
+#define CR_EPROM 0x02	/* EEP, GPIO registers */
+#define CR_I2C1  0x04	/* QSFP1_OE register */
+#define CR_I2C2  0x08	/* QSFP2_OE register */
+#define CR_DYN_SHIFT 8	/* dynamic flag shift */
+#define CR_DYN_MASK  ((1ull << CR_DYN_SHIFT) - 1)
+
+/*
+ * Bitmask of static ASIC states these are outside of the dynamic ASIC
+ * block chip resources above.  These are to be set once and never cleared.
+ * Must be holding the SBus dynamic flag when setting.
+ */
+#define CR_THERM_INIT	0x010000
+
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait);
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource);
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func);
+void init_chip_resources(struct hfi1_devdata *dd);
+void finish_chip_resources(struct hfi1_devdata *dd);
+
+/* ms wait time for access to an SBus resoure */
+#define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */
+
+/* ms wait time for a qsfp (i2c) chain to become available */
+#define QSFP_WAIT 20000 /* long enough for FW update to the F4 uc */
+
 void fabric_serdes_reset(struct hfi1_devdata *dd);
 int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
 
@@ -644,13 +691,17 @@ void handle_verify_cap(struct work_struct *work);
 void handle_freeze(struct work_struct *work);
 void handle_link_up(struct work_struct *work);
 void handle_link_down(struct work_struct *work);
+void handle_8051_request(struct work_struct *work);
 void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
+void reset_qsfp(struct hfi1_pportdata *ppd);
+void qsfp_event(struct work_struct *work);
 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
 int send_idle_sma(struct hfi1_devdata *dd, u64 message);
+int load_8051_config(struct hfi1_devdata *, u8, u8, u32);
+int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *);
 int start_link(struct hfi1_pportdata *ppd);
-void init_qsfp(struct hfi1_pportdata *ppd);
 int bringup_serdes(struct hfi1_pportdata *ppd);
 void set_intr_state(struct hfi1_devdata *dd, u32 enable);
 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
@@ -690,6 +741,8 @@ u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl);
 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data);
 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl);
 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data);
+u32 read_logical_state(struct hfi1_devdata *dd);
+void force_recv_intr(struct hfi1_ctxtdata *rcd);
 
 /* Per VL indexes */
 enum {
@@ -785,8 +838,14 @@ enum {
 	C_SW_CPU_RCV_LIM,
 	C_SW_VTX_WAIT,
 	C_SW_PIO_WAIT,
+	C_SW_PIO_DRAIN,
 	C_SW_KMEM_WAIT,
 	C_SW_SEND_SCHED,
+	C_SDMA_DESC_FETCHED_CNT,
+	C_SDMA_INT_CNT,
+	C_SDMA_ERR_CNT,
+	C_SDMA_IDLE_INT_CNT,
+	C_SDMA_PROGRESS_INT_CNT,
 /* MISC_ERR_STATUS */
 	C_MISC_PLL_LOCK_FAIL_ERR,
 	C_MISC_MBIST_FAIL_ERR,
@@ -1275,10 +1334,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
 		  u32 type, unsigned long pa, u16 order);
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp);
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp);
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h
index 701e9e1012a6..770f05c9b8de 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/staging/rdma/hfi1/chip_registers.h
@@ -2,14 +2,13 @@
 #define DEF_CHIP_REG
 
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -21,8 +20,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -551,6 +548,17 @@
 #define CCE_MSIX_TABLE_UPPER (CCE + 0x000000100008)
 #define CCE_MSIX_TABLE_UPPER_RESETCSR 0x0000000100000000ull
 #define CCE_MSIX_VEC_CLR_WITHOUT_INT (CCE + 0x000000110400)
+#define CCE_PCIE_CTRL (CCE + 0x0000000000C0)
+#define CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_MASK 0x3ull
+#define CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_SHIFT 0
+#define CCE_PCIE_CTRL_PCIE_LANE_DELAY_MASK 0xFull
+#define CCE_PCIE_CTRL_PCIE_LANE_DELAY_SHIFT 2
+#define CCE_PCIE_CTRL_XMT_MARGIN_OVERWRITE_ENABLE_SHIFT 8
+#define CCE_PCIE_CTRL_XMT_MARGIN_SHIFT 9
+#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_MASK 0x1ull
+#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_SHIFT 12
+#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_MASK 0x7ull
+#define CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_SHIFT 13
 #define CCE_REVISION (CCE + 0x000000000000)
 #define CCE_REVISION2 (CCE + 0x000000000008)
 #define CCE_REVISION2_HFI_ID_MASK 0x1ull
@@ -1270,6 +1278,9 @@
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT 0
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK 0xFFFFull
 #define PCIE_CFG_REG_PL2 (PCIE + 0x000000000708)
+#define PCIE_CFG_REG_PL3 (PCIE + 0x00000000070C)
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT 27
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK 0x38000000
 #define PCIE_CFG_REG_PL102 (PCIE + 0x000000000898)
 #define PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT 12
 #define PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT 6
@@ -1290,5 +1301,6 @@
 #define CCE_INT_BLOCKED (CCE + 0x000000110C00)
 #define SEND_DMA_IDLE_CNT (TXE + 0x000000200040)
 #define SEND_DMA_DESC_FETCHED_CNT (TXE + 0x000000200058)
+#define CCE_MSIX_PBA_OFFSET 0X0110000
 
 #endif          /* DEF_CHIP_REG */
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h
index 5dd92720faae..e9b6bb322025 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/staging/rdma/hfi1/common.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -341,19 +338,16 @@ struct hfi1_message_header {
 #define FULL_MGMT_P_KEY      0xFFFF
 
 #define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_PERMISSIVE_LID 0xFFFF
 #define HFI1_AETH_CREDIT_SHIFT 24
 #define HFI1_AETH_CREDIT_MASK 0x1F
 #define HFI1_AETH_CREDIT_INVAL 0x1F
 #define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_QPN_MASK 0xFFFFFF
 #define HFI1_FECN_SHIFT 31
 #define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT)
+#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
 #define HFI1_BECN_SHIFT 30
 #define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT)
-#define HFI1_MULTICAST_LID_BASE 0xC000
+#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
 
 static inline __u64 rhf_to_cpu(const __le32 *rbuf)
 {
diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/staging/rdma/hfi1/cq.c
deleted file mode 100644
index 4f046ffe7e60..000000000000
--- a/drivers/staging/rdma/hfi1/cq.c
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/kthread.h>
-
-#include "verbs.h"
-#include "hfi.h"
-
-/**
- * hfi1_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicited entry
- *
- * This may be called with qp->s_lock held.
- */
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct hfi1_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data =
-			(__u32 __force)entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED &&
-	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		struct kthread_worker *worker;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		smp_read_barrier_depends(); /* see hfi1_cq_exit */
-		worker = cq->dd->worker;
-		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
-			cq->triggered++;
-			queue_kthread_work(worker, &cq->comptask);
-		}
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-}
-
-/**
- * hfi1_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(struct kthread_work *work)
-{
-	struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, queue_work()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		/*
-		 * IPoIB connected mode assumes the callback is from a
-		 * soft IRQ. We simulate this by blocking "bottom halves".
-		 * See the implementation for ipoib_cm_handle_tx_wc(),
-		 * netif_tx_lock_bh() and netif_tx_lock().
-		 */
-		local_bh_disable();
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-		local_bh_enable();
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * hfi1_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the driver
- * @udata: user data for libibverbs.so
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata)
-{
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_cq *cq;
-	struct hfi1_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-	unsigned int entries = attr->cqe;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > hfi1_max_cqes)
-		return ERR_PTR(-EINVAL);
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See hfi1_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == hfi1_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->dd = dd_from_dev(dev);
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * hfi1_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int hfi1_destroy_cq(struct ib_cq *ibcq)
-{
-	struct hfi1_ibdev *dev = to_idev(ibcq->device);
-	struct hfi1_cq *cq = to_icq(ibcq);
-
-	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * hfi1_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * hfi1_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *old_wc;
-	struct hfi1_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > hfi1_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct hfi1_ibdev *dev = to_idev(ibcq->device);
-		struct hfi1_mmap_info *ip = cq->ip;
-
-		hfi1_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See hfi1_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
-
-int hfi1_cq_init(struct hfi1_devdata *dd)
-{
-	int ret = 0;
-	int cpu;
-	struct task_struct *task;
-
-	if (dd->worker)
-		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
-		return -ENOMEM;
-	init_kthread_worker(dd->worker);
-	task = kthread_create_on_node(
-		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"hfi1_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
-	kthread_bind(task, cpu);
-	wake_up_process(task);
-out:
-	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
-}
-
-void hfi1_cq_exit(struct hfi1_devdata *dd)
-{
-	struct kthread_worker *worker;
-
-	worker = dd->worker;
-	if (!worker)
-		return;
-	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb(); /* See hfi1_cq_enter */
-	flush_kthread_worker(worker);
-	kthread_stop(worker->task);
-	kfree(worker);
-}
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c
index acd2269e9f14..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/staging/rdma/hfi1/debugfs.c
@@ -1,13 +1,12 @@
 #ifdef CONFIG_DEBUG_FS
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -19,8 +18,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,6 +49,7 @@
 #include <linux/seq_file.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 #include "debugfs.h"
@@ -71,6 +69,7 @@ static const struct seq_operations _##name##_seq_ops = { \
 	.stop  = _##name##_seq_stop, \
 	.show  = _##name##_seq_show \
 }
+
 #define DEBUGFS_SEQ_FILE_OPEN(name) \
 static int _##name##_open(struct inode *inode, struct file *s) \
 { \
@@ -102,7 +101,6 @@ do { \
 		pr_warn("create of %s failed\n", name); \
 } while (0)
 
-
 #define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
 	DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
 
@@ -127,7 +125,6 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	return pos;
 }
 
-
 static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -151,8 +148,8 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
 	if (!n_packets && !n_bytes)
 		return SEQ_SKIP;
 	seq_printf(s, "%02llx %llu/%llu\n", i,
-		(unsigned long long) n_packets,
-		(unsigned long long) n_bytes);
+		   (unsigned long long)n_packets,
+		   (unsigned long long)n_bytes);
 
 	return 0;
 }
@@ -247,7 +244,7 @@ __acquires(RCU)
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
-				   loff_t *pos)
+				loff_t *pos)
 {
 	struct qp_iter *iter = iter_ptr;
 
@@ -308,7 +305,6 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	return pos;
 }
 
-
 static void _sdes_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -341,7 +337,7 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters);
+	avail = hfi1_read_cntrs(dd, NULL, &counters);
 	rval =  simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
@@ -358,7 +354,7 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, &names, NULL);
+	avail = hfi1_read_cntrs(dd, &names, NULL);
 	rval =  simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -385,8 +381,7 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	/* port number n/a here since names are constant */
-	avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL);
+	avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
 	rval = simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -394,28 +389,150 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
 
 /* read the per-port counters */
 static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
-				size_t count, loff_t *ppos)
+				      size_t count, loff_t *ppos)
 {
 	u64 *counters;
 	size_t avail;
-	struct hfi1_devdata *dd;
 	struct hfi1_pportdata *ppd;
 	ssize_t rval;
 
 	rcu_read_lock();
 	ppd = private2ppd(file);
-	dd = ppd->dd;
-	avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters);
+	avail = hfi1_read_portcntrs(ppd, NULL, &counters);
 	rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
 }
 
+static void check_dyn_flag(u64 scratch0, char *p, int size, int *used,
+			   int this_hfi, int hfi, u32 flag, const char *what)
+{
+	u32 mask;
+
+	mask = flag << (hfi ? CR_DYN_SHIFT : 0);
+	if (scratch0 & mask) {
+		*used += scnprintf(p + *used, size - *used,
+				   "  0x%08x - HFI%d %s in use, %s device\n",
+				   mask, hfi, what,
+				   this_hfi == hfi ? "this" : "other");
+	}
+}
+
+static ssize_t asic_flags_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u64 scratch0;
+	char *tmp;
+	int ret = 0;
+	int size;
+	int used;
+	int i;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+	size = PAGE_SIZE;
+	used = 0;
+	tmp = kmalloc(size, GFP_KERNEL);
+	if (!tmp) {
+		rcu_read_unlock();
+		return -ENOMEM;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	used += scnprintf(tmp + used, size - used,
+			  "Resource flags: 0x%016llx\n", scratch0);
+
+	/* check permanent flag */
+	if (scratch0 & CR_THERM_INIT) {
+		used += scnprintf(tmp + used, size - used,
+				  "  0x%08x - thermal monitoring initialized\n",
+				  (u32)CR_THERM_INIT);
+	}
+
+	/* check each dynamic flag on each HFI */
+	for (i = 0; i < 2; i++) {
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_SBUS, "SBus");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_EPROM, "EPROM");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C1, "i2c chain 1");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C2, "i2c chain 2");
+	}
+	used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
+
+	ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
+	rcu_read_unlock();
+	kfree(tmp);
+	return ret;
+}
+
+static ssize_t asic_flags_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	char *buff;
+	int ret;
+	unsigned long long value;
+	u64 scratch0;
+	u64 clear;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+
+	buff = kmalloc(count + 1, GFP_KERNEL);
+	if (!buff) {
+		ret = -ENOMEM;
+		goto do_return;
+	}
+
+	ret = copy_from_user(buff, buf, count);
+	if (ret > 0) {
+		ret = -EFAULT;
+		goto do_free;
+	}
+
+	/* zero terminate and read the expected integer */
+	buff[count] = 0;
+	ret = kstrtoull(buff, 0, &value);
+	if (ret)
+		goto do_free;
+	clear = value;
+
+	/* obtain exclusive access */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+	acquire_hw_mutex(dd);
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~clear;
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+
+	/* return the number of bytes written */
+	ret = count;
+
+ do_free:
+	kfree(buff);
+ do_return:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * read the per-port QSFP data for ppd
  */
 static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
-			   size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	struct hfi1_pportdata *ppd;
 	char *tmp;
@@ -439,7 +556,7 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
 
 /* Do an i2c write operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -451,6 +568,16 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
@@ -463,9 +590,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 		goto _free;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
 	if (total_written < 0) {
 		ret = total_written;
@@ -485,21 +609,21 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 
 /* Do an i2c write operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c write operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do an i2c read operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				  size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -511,15 +635,22 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
 		goto _return;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
 	if (total_read < 0) {
 		ret = total_read;
@@ -545,21 +676,21 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
 
 /* Do an i2c read operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c read operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP write operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				    size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -605,21 +736,21 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
 
 /* Do a QSFP write operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP write operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP read operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -665,18 +796,116 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
 
 /* Do a QSFP read operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP read operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 1);
 }
 
+static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int i2c1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 0);
+}
+
+static int i2c2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 1);
+}
+
+static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int i2c1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 0);
+}
+
+static int i2c2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 1);
+}
+
+static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 0);
+}
+
+static int qsfp2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 1);
+}
+
+static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int qsfp1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 0);
+}
+
+static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 1);
+}
+
 #define DEBUGFS_OPS(nm, readroutine, writeroutine)	\
 { \
 	.name = nm, \
@@ -687,6 +916,18 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
 	}, \
 }
 
+#define DEBUGFS_XOPS(nm, readf, writef, openf, releasef) \
+{ \
+	.name = nm, \
+	.ops = { \
+		.read = readf, \
+		.write = writef, \
+		.llseek = generic_file_llseek, \
+		.open = openf, \
+		.release = releasef \
+	}, \
+}
+
 static const struct counter_info cntr_ops[] = {
 	DEBUGFS_OPS("counter_names", dev_names_read, NULL),
 	DEBUGFS_OPS("counters", dev_counters_read, NULL),
@@ -695,11 +936,16 @@ static const struct counter_info cntr_ops[] = {
 
 static const struct counter_info port_cntr_ops[] = {
 	DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL),
-	DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write),
-	DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write),
+	DEBUGFS_XOPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write,
+		     i2c1_debugfs_open, i2c1_debugfs_release),
+	DEBUGFS_XOPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write,
+		     i2c2_debugfs_open, i2c2_debugfs_release),
 	DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL),
-	DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write),
-	DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write),
+	DEBUGFS_XOPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write,
+		     qsfp1_debugfs_open, qsfp1_debugfs_release),
+	DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
+		     qsfp2_debugfs_open, qsfp2_debugfs_release),
+	DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
 };
 
 void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -747,8 +993,8 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 					    ibd->hfi1_ibdev_dbg,
 					    ppd,
 					    &port_cntr_ops[i].ops,
-					    port_cntr_ops[i].ops.write == NULL ?
-					    S_IRUGO : S_IRUGO|S_IWUSR);
+					    !port_cntr_ops[i].ops.write ?
+					    S_IRUGO : S_IRUGO | S_IWUSR);
 		}
 }
 
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/staging/rdma/hfi1/debugfs.h
index 92d6fe146714..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/staging/rdma/hfi1/debugfs.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEBUGFS_H
 #define _HFI1_DEBUGFS_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c
index 58472e5ac4e5..c05c39da83b1 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/staging/rdma/hfi1/device.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h
index 2850ff739d81..5bb3e83cf2da 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/staging/rdma/hfi1/device.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEVICE_H
 #define _HFI1_DEVICE_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
index 0c8831705664..c5b520bf610e 100644
--- a/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -70,6 +67,7 @@
 #include "hfi.h"
 #include "device.h"
 #include "common.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 #undef pr_fmt
@@ -80,15 +78,15 @@
 /* Snoop option mask */
 #define SNOOP_DROP_SEND		BIT(0)
 #define SNOOP_USE_METADATA	BIT(1)
+#define SNOOP_SET_VL0TOVL15     BIT(2)
 
 static u8 snoop_flags;
 
 /*
  * Extract packet length from LRH header.
- * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
- * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
+ * This is in Dwords so multiply by 4 to get size in bytes
  */
-#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
+#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
 
 enum hfi1_filter_status {
 	HFI1_FILTER_HIT,
@@ -257,7 +255,7 @@ static int hfi1_filter_ib_service_level(void *ibhdr, void *packet_data,
 static int hfi1_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
 static int hfi1_filter_direction(void *ibhdr, void *packet_data, void *value);
 
-static struct hfi1_filter_array hfi1_filters[] = {
+static const struct hfi1_filter_array hfi1_filters[] = {
 	{ hfi1_filter_lid },
 	{ hfi1_filter_dlid },
 	{ hfi1_filter_mad_mgmt_class },
@@ -860,7 +858,7 @@ static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
 			vl = sc4;
 		} else {
 			sl = (byte_two >> 4) & 0xf;
-			ibp = to_iport(&dd->verbs_dev.ibdev, 1);
+			ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
 			sc5 = ibp->sl_to_sc[sl];
 			vl = sc_to_vlt(dd, sc5);
 			if (vl != sc4) {
@@ -966,6 +964,65 @@ static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
 	return ret;
 }
 
+/**
+ * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
+ * @ppd : ptr to hfi1 port data
+ * @value : options from user space
+ *
+ * Assumes the rest of the CM credit registers are zero from a
+ * previous global or credit reset.
+ * Leave shared count at zero for both global and all vls.
+ * In snoop mode ideally we don't use shared credits
+ * Reserve 8.5k for VL15
+ * If total credits less than 8.5kbytes return error.
+ * Divide the rest of the credits across VL0 to VL7 and if
+ * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
+ * return with an error.
+ * The credit registers will be reset to zero on link negotiation or link up
+ * so this function should be activated from user space only if the port has
+ * gone past link negotiation and link up.
+ *
+ * Return -- 0 if successful else error condition
+ *
+ */
+static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
+					   int value)
+{
+#define  OPA_MIN_PER_VL_CREDITS  34  /* 2048 + 128 bytes */
+	struct buffer_control t;
+	int i;
+	struct hfi1_devdata *dd = ppd->dd;
+	u16  total_credits = (value >> 16) & 0xffff;
+	u16  vl15_credits = dd->vl15_init / 2;
+	u16  per_vl_credits;
+	__be16 be_per_vl_credits;
+
+	if (!(ppd->host_link_state & HLS_UP))
+		goto err_exit;
+	if (total_credits  <  vl15_credits)
+		goto err_exit;
+
+	per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
+
+	if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
+		goto err_exit;
+
+	memset(&t, 0, sizeof(t));
+	be_per_vl_credits = cpu_to_be16(per_vl_credits);
+
+	for (i = 0; i < TXE_NUM_DATA_VL; i++)
+		t.vl[i].dedicated = be_per_vl_credits;
+
+	t.vl[15].dedicated  = cpu_to_be16(vl15_credits);
+	return set_buffer_control(ppd, &t);
+
+err_exit:
+	snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
+		  ppd->host_link_state, total_credits, vl15_credits);
+
+	return -EINVAL;
+}
+
 static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 {
 	struct hfi1_devdata *dd;
@@ -1192,6 +1249,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 			snoop_flags |= SNOOP_DROP_SEND;
 		if (value & SNOOP_USE_METADATA)
 			snoop_flags |= SNOOP_USE_METADATA;
+		if (value & (SNOOP_SET_VL0TOVL15)) {
+			ppd = &dd->pport[0];  /* first port will do */
+			ret = hfi1_assign_snoop_link_credits(ppd, value);
+		}
 		break;
 	default:
 		return -ENOTTY;
@@ -1603,7 +1664,7 @@ int snoop_recv_handler(struct hfi1_packet *packet)
 /*
  * Handle snooping and capturing packets when sdma is being used.
  */
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
 	pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
@@ -1616,20 +1677,19 @@ int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
  * bypass packets. The only way to send a bypass packet currently is to use the
  * diagpkt interface. When that interface is enable snoop/capture is not.
  */
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct snoop_packet *s_packet = NULL;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u32 length = 0;
-	struct hfi1_sge_state temp_ss;
+	struct rvt_sge_state temp_ss;
 	void *data = NULL;
 	void *data_start = NULL;
 	int ret;
@@ -1638,7 +1698,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 	struct capture_md md;
 	u32 vl;
 	u32 hdr_len = hdrwords << 2;
-	u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
+	u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
 
 	md.u.pbc = 0;
 
@@ -1665,7 +1725,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		md.port = 1;
 		md.dir = PKT_DIR_EGRESS;
 		if (likely(pbc == 0)) {
-			vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
+			vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
 			md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
 		} else {
 			md.u.pbc = 0;
@@ -1727,7 +1787,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		ret = HFI1_FILTER_HIT;
 	} else {
 		ret = ppd->dd->hfi1_snoop.filter_callback(
-					&ahdr->ibh,
+					&ps->s_txreq->phdr.hdr,
 					NULL,
 					ppd->dd->hfi1_snoop.filter_value);
 	}
@@ -1759,9 +1819,16 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 				spin_lock_irqsave(&qp->s_lock, flags);
-				hfi1_rc_send_complete(qp, &ahdr->ibh);
+				hfi1_rc_send_complete(qp,
+						      &ps->s_txreq->phdr.hdr);
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			}
+
+			/*
+			 * If snoop is dropping the packet we need to put the
+			 * txreq back because no one else will.
+			 */
+			hfi1_put_txreq(ps->s_txreq);
 			return 0;
 		}
 		break;
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c
index e03bd735173c..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/staging/rdma/hfi1/dma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,7 +49,7 @@
 
 #include "verbs.h"
 
-#define BAD_DMA_ADDRESS ((u64) 0)
+#define BAD_DMA_ADDRESS ((u64)0)
 
 /*
  * The following functions implement driver specific replacements
@@ -74,7 +71,7 @@ static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
 	if (WARN_ON(!valid_dma_direction(direction)))
 		return BAD_DMA_ADDRESS;
 
-	return (u64) cpu_addr;
+	return (u64)cpu_addr;
 }
 
 static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
@@ -95,7 +92,7 @@ static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
 	if (offset + size > PAGE_SIZE)
 		return BAD_DMA_ADDRESS;
 
-	addr = (u64) page_address(page);
+	addr = (u64)page_address(page);
 	if (addr)
 		addr += offset;
 
@@ -120,7 +117,7 @@ static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
 		return BAD_DMA_ADDRESS;
 
 	for_each_sg(sgl, sg, nents, i) {
-		addr = (u64) page_address(sg_page(sg));
+		addr = (u64)page_address(sg_page(sg));
 		if (!addr) {
 			ret = 0;
 			break;
@@ -161,14 +158,14 @@ static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
 	if (p)
 		addr = page_address(p);
 	if (dma_handle)
-		*dma_handle = (u64) addr;
+		*dma_handle = (u64)addr;
 	return addr;
 }
 
 static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
 				   void *cpu_addr, u64 dma_handle)
 {
-	free_pages((unsigned long) cpu_addr, get_order(size));
+	free_pages((unsigned long)cpu_addr, get_order(size));
 }
 
 struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index 8485de1fce08..34511e5df1d5 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/prefetch.h>
+#include <rdma/ib_verbs.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -162,6 +160,22 @@ const char *get_unit_name(int unit)
 	return iname;
 }
 
+const char *get_card_name(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return get_unit_name(dd->unit);
+}
+
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
  * Return count of units with at least one port ACTIVE.
  */
@@ -246,7 +260,7 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
  */
 inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
 {
-	if (unlikely(!IS_ALIGNED(size, PAGE_SIZE)))
+	if (unlikely(!PAGE_ALIGNED(size)))
 		return 0;
 	if (unlikely(size < MIN_EAGER_BUFFER))
 		return 0;
@@ -265,6 +279,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 	u32 rte = rhf_rcv_type_err(packet->rhf);
 	int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct hfi1_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
 	if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
 		return;
@@ -283,9 +299,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			goto drop;
 
 		/* Check for GRH */
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			u32 vtf;
 
 			ohdr = &hdr->u.l.oth;
@@ -295,17 +311,17 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 				goto drop;
 			rcv_flags |= HFI1_HAS_GRH;
-		} else
+		} else {
 			goto drop;
-
+		}
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		if (lid < HFI1_MULTICAST_LID_BASE) {
-			struct hfi1_qp *qp;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+			struct rvt_qp *qp;
 			unsigned long flags;
 
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, qp_num);
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
@@ -318,9 +334,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			spin_lock_irqsave(&qp->r_lock, flags);
 
 			/* Check for valid receive state. */
-			if (!(ib_hfi1_state_ops[qp->state] &
-			      HFI1_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 			}
 
 			switch (qp->ibqp.qp_type) {
@@ -352,7 +368,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 		if (rhf_use_egr_bfr(packet->rhf))
 			ebuf = packet->ebuf;
 
-		if (ebuf == NULL)
+		if (!ebuf)
 			goto drop; /* this should never happen */
 
 		if (lnh == HFI1_LRH_BTH)
@@ -368,9 +384,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 		if (opcode == IB_OPCODE_CNP) {
 			/*
 			 * Only in pre-B0 h/w is the CNP_OPCODE handled
-			 * via this code path (errata 291394).
+			 * via this code path.
 			 */
-			struct hfi1_qp *qp = NULL;
+			struct rvt_qp *qp = NULL;
 			u32 lqpn, rqpn;
 			u16 rlid;
 			u8 svc_type, sl, sc5;
@@ -380,10 +396,10 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 				sc5 |= 0x10;
 			sl = ibp->sc_to_sl[sc5];
 
-			lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK;
+			lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, lqpn);
-			if (qp == NULL) {
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
+			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
 			}
@@ -419,9 +435,8 @@ drop:
 }
 
 static inline void init_packet(struct hfi1_ctxtdata *rcd,
-			      struct hfi1_packet *packet)
+			       struct hfi1_packet *packet)
 {
-
 	packet->rsize = rcd->rcvhdrqentsize; /* words */
 	packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
 	packet->rcd = rcd;
@@ -434,12 +449,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
 	packet->rcv_flags = 0;
 }
 
-#ifndef CONFIG_PRESCAN_RXQ
-static void prescan_rxq(struct hfi1_packet *packet) {}
-#else /* !CONFIG_PRESCAN_RXQ */
-static int prescan_receive_queue;
-
-static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
+static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
 			struct hfi1_other_headers *ohdr,
 			u64 rhf, u32 bth1, struct ib_grh *grh)
 {
@@ -453,7 +463,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		rlid = be16_to_cpu(hdr->lrh[3]);
-		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	case IB_QPT_UC:
@@ -483,7 +493,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
 
 	if (bth1 & HFI1_BECN_SMASK) {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn = bth1 & HFI1_QPN_MASK;
+		u32 lqpn = bth1 & RVT_QPN_MASK;
 		u8 sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
@@ -562,26 +572,31 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
  * containing Excplicit Congestion Notifications (FECNs, or BECNs).
  * When an ECN is found, process the Congestion Notification, and toggle
  * it off.
+ * This is declared as a macro to allow quick checking of the port to avoid
+ * the overhead of a function call if not enabled.
  */
-static void prescan_rxq(struct hfi1_packet *packet)
+#define prescan_rxq(rcd, packet) \
+	do { \
+		if (rcd->ppd->cc_prescan) \
+			__prescan_rxq(packet); \
+	} while (0)
+static void __prescan_rxq(struct hfi1_packet *packet)
 {
 	struct hfi1_ctxtdata *rcd = packet->rcd;
 	struct ps_mdata mdata;
 
-	if (!prescan_receive_queue)
-		return;
-
 	init_ps_mdata(&mdata, packet);
 
 	while (1) {
 		struct hfi1_devdata *dd = rcd->dd;
 		struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
-		__le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head +
+		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 dd->rhf_offset;
-		struct hfi1_qp *qp;
+		struct rvt_qp *qp;
 		struct hfi1_ib_header *hdr;
 		struct hfi1_other_headers *ohdr;
 		struct ib_grh *grh = NULL;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 		int is_ecn = 0;
@@ -600,25 +615,25 @@ static void prescan_rxq(struct hfi1_packet *packet)
 			hfi1_get_msgheader(dd, rhf_addr);
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			ohdr = &hdr->u.l.oth;
 			grh = &hdr->u.l.grh;
-		} else
+		} else {
 			goto next; /* just in case */
-
+		}
 		bth1 = be32_to_cpu(ohdr->bth[1]);
 		is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
 		if (!is_ecn)
 			goto next;
 
-		qpn = bth1 & HFI1_QPN_MASK;
+		qpn = bth1 & RVT_QPN_MASK;
 		rcu_read_lock();
-		qp = hfi1_lookup_qpn(ibp, qpn);
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
 
-		if (qp == NULL) {
+		if (!qp) {
 			rcu_read_unlock();
 			goto next;
 		}
@@ -633,7 +648,6 @@ next:
 		update_ps_mdata(&mdata, rcd);
 	}
 }
-#endif /* CONFIG_PRESCAN_RXQ */
 
 static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 {
@@ -683,8 +697,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 		 * The +2 is the size of the RHF.
 		 */
 		prefetch_range(packet->ebuf,
-			packet->tlen - ((packet->rcd->rcvhdrqentsize -
-				  (rhf_hdrq_offset(packet->rhf)+2)) * 4));
+			       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+					       (rhf_hdrq_offset(packet->rhf)
+						+ 2)) * 4));
 	}
 
 	/*
@@ -712,7 +727,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 		}
 	}
 
-	packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
+	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 				      packet->rcd->dd->rhf_offset;
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
 
@@ -737,7 +752,6 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
 
 static inline void finish_packet(struct hfi1_packet *packet)
 {
-
 	/*
 	 * Nothing we need to free for the packet.
 	 *
@@ -746,14 +760,12 @@ static inline void finish_packet(struct hfi1_packet *packet)
 	 */
 	update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
-
 }
 
 static inline void process_rcv_qp_work(struct hfi1_packet *packet)
 {
-
 	struct hfi1_ctxtdata *rcd;
-	struct hfi1_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 	rcd = packet->rcd;
 	rcd->head = packet->rhqoff;
@@ -764,17 +776,17 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) {
-			qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			hfi1_send_rc_ack(rcd, qp, 0);
 		}
-		if (qp->r_flags & HFI1_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 
-			qp->r_flags &= ~HFI1_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_hfi1_state_ops[qp->state] &
-					HFI1_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				hfi1_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
@@ -799,7 +811,7 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 		goto bail;
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -830,7 +842,7 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 	}
 	smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -862,6 +874,37 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
 			&handle_receive_interrupt_dma_rtail;
 }
 
+void set_all_slowpath(struct hfi1_devdata *dd)
+{
+	int i;
+
+	/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
+	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+		dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+}
+
+static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
+				      struct hfi1_packet packet,
+				      struct hfi1_devdata *dd)
+{
+	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
+	struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
+							     packet.rhf_addr);
+
+	if (hdr2sc(hdr, packet.rhf) != 0xf) {
+		int hwstate = read_logical_state(dd);
+
+		if (hwstate != LSTATE_ACTIVE) {
+			dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+			return 0;
+		}
+
+		queue_work(rcd->ppd->hfi1_wq, lsaw);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * handle_receive_interrupt - receive a packet
  * @rcd: the context
@@ -910,17 +953,17 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 		}
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
-
-		if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
-			DROP_PACKET_OFF) == DROP_PACKET_ON)) {
+		if (unlikely(dd->do_drop &&
+			     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+			     DROP_PACKET_ON)) {
 			dd->do_drop = 0;
 
 			/* On to the next packet */
 			packet.rhqoff += packet.rsize;
-			packet.rhf_addr = (__le32 *) rcd->rcvhdrq +
+			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
 					  packet.rhqoff +
 					  dd->rhf_offset;
 			packet.rhf = rhf_to_cpu(packet.rhf_addr);
@@ -929,6 +972,11 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			last = skip_rcv_packet(&packet, thread);
 			skip_pkt = 0;
 		} else {
+			/* Auto activate link on non-SC15 packet receive */
+			if (unlikely(rcd->ppd->host_link_state ==
+				     HLS_UP_ARMED) &&
+			    set_armed_to_active(rcd, packet, dd))
+				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
 
@@ -940,8 +988,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			if (seq != rcd->seq_cnt)
 				last = RCV_PKT_DONE;
 			if (needset) {
-				dd_dev_info(dd,
-					"Switching to NO_DMA_RTAIL\n");
+				dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
 				set_all_nodma_rtail(dd);
 				needset = 0;
 			}
@@ -984,6 +1031,42 @@ bail:
 }
 
 /*
+ * We may discover in the interrupt that the hardware link state has
+ * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
+ * and we need to update the driver's notion of the link state.  We cannot
+ * run set_link_state from interrupt context, so we queue this function on
+ * a workqueue.
+ *
+ * We delay the regular interrupt processing until after the state changes
+ * so that the link will be in the correct state by the time any application
+ * we wake up attempts to send a reply to any message it received.
+ * (Subsequent receive interrupts may possibly force the wakeup before we
+ * update the link state.)
+ *
+ * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
+ * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
+ * so we're safe from use-after-free of the rcd.
+ */
+void receive_interrupt_work(struct work_struct *work)
+{
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+						  linkstate_active_work);
+	struct hfi1_devdata *dd = ppd->dd;
+	int i;
+
+	/* Received non-SC15 packet implies neighbor_normal */
+	ppd->neighbor_normal = 1;
+	set_link_state(ppd, HLS_UP_ACTIVE);
+
+	/*
+	 * Interrupt all kernel contexts that could have had an
+	 * interrupt during auto activation.
+	 */
+	for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+		force_recv_intr(dd->rcd[i]);
+}
+
+/*
  * Convert a given MTU size to the on-wire MAD packet enumeration.
  * Return -1 if the size is invalid.
  */
@@ -1037,9 +1120,9 @@ int set_mtu(struct hfi1_pportdata *ppd)
 	ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
 
 	mutex_lock(&ppd->hls_lock);
-	if (ppd->host_link_state == HLS_UP_INIT
-			|| ppd->host_link_state == HLS_UP_ARMED
-			|| ppd->host_link_state == HLS_UP_ACTIVE)
+	if (ppd->host_link_state == HLS_UP_INIT ||
+	    ppd->host_link_state == HLS_UP_ARMED ||
+	    ppd->host_link_state == HLS_UP_ACTIVE)
 		is_up = 1;
 
 	drain = !is_ax(dd) && is_up;
@@ -1082,79 +1165,80 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
 	return 0;
 }
 
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDs, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
+void shutdown_led_override(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	if (atomic_read(&ppd->led_override_timer_active)) {
+		del_timer_sync(&ppd->led_override_timer);
+		atomic_set(&ppd->led_override_timer_active, 0);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
+	}
+
+	/* Hand control of the LED to the DC for normal operation */
+	write_csr(dd, DCC_CFG_LED_CNTRL, 0);
+}
 
 static void run_led_override(unsigned long opaque)
 {
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
 	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff;
-	int ph_idx;
+	unsigned long timeout;
+	int phase_idx;
 
 	if (!(dd->flags & HFI1_INITTED))
 		return;
 
-	ph_idx = ppd->led_override_phase++ & 1;
-	ppd->led_override = ppd->led_override_vals[ph_idx];
-	timeoff = ppd->led_override_timeoff;
+	phase_idx = ppd->led_override_phase & 1;
 
-	/*
-	 * don't re-fire the timer if user asked for it to be off; we let
-	 * it fire one more time after they turn it off to simplify
-	 */
-	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+	setextled(dd, phase_idx);
+
+	timeout = ppd->led_override_vals[phase_idx];
+
+	/* Set up for next phase */
+	ppd->led_override_phase = !ppd->led_override_phase;
+
+	mod_timer(&ppd->led_override_timer, jiffies + timeout);
 }
 
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val)
+/*
+ * To have the LED blink in a particular pattern, provide timeon and timeoff
+ * in milliseconds.
+ * To turn off custom blinking and return to normal operation, use
+ * shutdown_led_override()
+ */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff, freq;
-
-	if (!(dd->flags & HFI1_INITTED))
+	if (!(ppd->dd->flags & HFI1_INITTED))
 		return;
 
-	/* First check if we are blinking. If not, use 1HZ polling */
-	timeoff = HZ;
-	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+	/* Convert to jiffies for direct use in timer */
+	ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
+	ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
 
-	if (freq) {
-		/* For blink, set each phase from one nybble of val */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = (val >> 4) & 0xF;
-		timeoff = (HZ << 4)/freq;
-	} else {
-		/* Non-blink set both phases the same. */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = val & 0xF;
-	}
-	ppd->led_override_timeoff = timeoff;
+	/* Arbitrarily start from LED on phase */
+	ppd->led_override_phase = 1;
 
 	/*
 	 * If the timer has not already been started, do so. Use a "quick"
-	 * timeout so the function will be called soon, to look at our request.
+	 * timeout so the handler will be called soon to look at our request.
 	 */
-	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
-		/* Need to start timer */
+	if (!timer_pending(&ppd->led_override_timer)) {
 		setup_timer(&ppd->led_override_timer, run_led_override,
-				(unsigned long)ppd);
-
+			    (unsigned long)ppd);
 		ppd->led_override_timer.expires = jiffies + 1;
 		add_timer(&ppd->led_override_timer);
-	} else {
-		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-			mod_timer(&ppd->led_override_timer, jiffies + 1);
-		atomic_dec(&ppd->led_override_timer_active);
+		atomic_set(&ppd->led_override_timer_active, 1);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
 	}
 }
 
@@ -1184,8 +1268,8 @@ int hfi1_reset_device(int unit)
 
 	if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
 		dd_dev_info(dd,
-			"Invalid unit number %u or not initialized or not present\n",
-			unit);
+			    "Invalid unit number %u or not initialized or not present\n",
+			    unit);
 		ret = -ENXIO;
 		goto bail;
 	}
@@ -1203,14 +1287,8 @@ int hfi1_reset_device(int unit)
 
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
-		if (atomic_read(&ppd->led_override_timer_active)) {
-			/* Need to stop LED timer, _then_ shut off LEDs */
-			del_timer_sync(&ppd->led_override_timer);
-			atomic_set(&ppd->led_override_timer_active, 0);
-		}
 
-		/* Shut off LEDs after we are sure timer is not running */
-		ppd->led_override = LED_OVER_BOTH_OFF;
+		shutdown_led_override(ppd);
 	}
 	if (dd->flags & HFI1_HAS_SEND_DMA)
 		sdma_exit(dd);
@@ -1221,11 +1299,11 @@ int hfi1_reset_device(int unit)
 
 	if (ret)
 		dd_dev_err(dd,
-			"Reinitialize unit %u after reset failed with %d\n",
-			unit, ret);
+			   "Reinitialize unit %u after reset failed with %d\n",
+			   unit, ret);
 	else
 		dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
-			unit);
+			    unit);
 
 bail:
 	return ret;
@@ -1282,7 +1360,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
 		handle_eflags(packet);
 
 	dd_dev_err(packet->rcd->dd,
-	   "Bypass packets are not supported in normal operation. Dropping\n");
+		   "Bypass packets are not supported in normal operation. Dropping\n");
 	return RHF_RCV_CONTINUE;
 }
 
@@ -1320,6 +1398,6 @@ int kdeth_process_eager(struct hfi1_packet *packet)
 int process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
-		rhf_rcv_type(packet->rhf));
+		   rhf_rcv_type(packet->rhf));
 	return RHF_RCV_CONTINUE;
 }
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c
index 7dc5bae220e0..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/staging/rdma/hfi1/efivar.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -83,8 +80,7 @@ static int read_efi_var(const char *name, unsigned long *size,
 	if (!efi_enabled(EFI_RUNTIME_SERVICES))
 		return -EOPNOTSUPP;
 
-	uni_name = kzalloc(sizeof(efi_char16_t) * (strlen(name) + 1),
-			   GFP_KERNEL);
+	uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);
 	temp_buffer = kzalloc(EFI_DATA_SIZE, GFP_KERNEL);
 
 	if (!uni_name || !temp_buffer) {
@@ -128,13 +124,12 @@ static int read_efi_var(const char *name, unsigned long *size,
 	 * temporary buffer.  Now allocate a correctly sized
 	 * buffer.
 	 */
-	data = kmalloc(temp_size, GFP_KERNEL);
+	data = kmemdup(temp_buffer, temp_size, GFP_KERNEL);
 	if (!data) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	memcpy(data, temp_buffer, temp_size);
 	*size = temp_size;
 	*return_data = data;
 
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/staging/rdma/hfi1/efivar.h
index 070706225c51..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/staging/rdma/hfi1/efivar.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
index fb620c97f592..bd8771570f81 100644
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ b/drivers/staging/rdma/hfi1/eprom.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -99,17 +96,17 @@
 
 /* sleep length while waiting for controller */
 #define WAIT_SLEEP_US 100	/* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US))
+#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
 
 /* GPIO pins */
-#define EPROM_WP_N (1ull << 14)	/* EPROM write line */
+#define EPROM_WP_N BIT_ULL(14)	/* EPROM write line */
 
 /*
- * Use the EP mutex to guard against other callers from within the driver.
- * Also covers usage of eprom_available.
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
  */
-static DEFINE_MUTEX(eprom_mutex);
-static int eprom_available;	/* default: not available */
+#define EPROM_TIMEOUT 80000 /* ms */
 
 /*
  * Turn on external enable line that allows writing on the flash.
@@ -117,11 +114,9 @@ static int eprom_available;	/* default: not available */
 static void write_enable(struct hfi1_devdata *dd)
 {
 	/* raise signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
 	/* raise enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
 }
 
 /*
@@ -130,11 +125,9 @@ static void write_enable(struct hfi1_devdata *dd)
 static void write_disable(struct hfi1_devdata *dd)
 {
 	/* lower signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
 	/* lower enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
 }
 
 /*
@@ -212,8 +205,8 @@ static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
 	/* check the end points for the minimum erase */
 	if ((start & MASK_4KB) || (end & MASK_4KB)) {
 		dd_dev_err(dd,
-			"%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
-			__func__, start, end);
+			   "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
+			   __func__, start, end);
 		return -EINVAL;
 	}
 
@@ -256,7 +249,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
 	int i;
 
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
-	for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
 }
@@ -267,7 +260,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
 static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -277,7 +270,7 @@ static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		read_page(dd, start + offset, buffer);
 		if (copy_to_user((void __user *)(addr + offset),
-						buffer, EP_PAGE_SIZE)) {
+				 buffer, EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -298,7 +291,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
 	write_csr(dd, ASIC_EEP_DATA, data[0]);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
-	for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		write_csr(dd, ASIC_EEP_DATA, data[i]);
 	/* will close the open page */
 	return wait_for_not_busy(dd);
@@ -310,7 +303,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
 static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -321,7 +314,7 @@ static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		if (copy_from_user(buffer, (void __user *)(addr + offset),
-						EP_PAGE_SIZE)) {
+				   EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -353,44 +346,42 @@ static inline u32 extract_rstart(u32 composite)
  *
  * Return 0 on success, -ERRNO on error
  */
-int handle_eprom_command(const struct hfi1_cmd *cmd)
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
 {
 	struct hfi1_devdata *dd;
 	u32 dev_id;
 	u32 rlen;	/* range length */
 	u32 rstart;	/* range start */
+	int i_minor;
 	int ret = 0;
 
 	/*
-	 * The EPROM is per-device, so use unit 0 as that will always
-	 * exist.
+	 * Map the device file to device data using the relative minor.
+	 * The device file minor number is the unit number + 1.  0 is
+	 * the generic device file - reject it.
 	 */
-	dd = hfi1_lookup(0);
+	i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
+	if (i_minor <= 0)
+		return -EINVAL;
+	dd = hfi1_lookup(i_minor - 1);
 	if (!dd) {
-		pr_err("%s: cannot find unit 0!\n", __func__);
+		pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
 		return -EINVAL;
 	}
 
-	/* lock against other callers touching the ASIC block */
-	mutex_lock(&eprom_mutex);
-
-	/* some platforms do not have an EPROM */
-	if (!eprom_available) {
-		ret = -ENOSYS;
-		goto done_asic;
-	}
+	/* some devices do not have an EPROM */
+	if (!dd->eprom_available)
+		return -EOPNOTSUPP;
 
-	/* lock against the other HFI on another OS */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
-		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+		dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
-		__func__, cmd->type, cmd->len, cmd->addr);
+		    __func__, cmd->type, cmd->len, cmd->addr);
 
 	switch (cmd->type) {
 	case HFI1_CMD_EP_INFO:
@@ -401,7 +392,7 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
 		dev_id = read_device_id(dd);
 		/* addr points to a u32 user buffer */
 		if (copy_to_user((void __user *)cmd->addr, &dev_id,
-								sizeof(u32)))
+				 sizeof(u32)))
 			ret = -EFAULT;
 		break;
 
@@ -429,14 +420,13 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
 
 	default:
 		dd_dev_err(dd, "%s: unexpected command %d\n",
-			__func__, cmd->type);
+			   __func__, cmd->type);
 		ret = -EINVAL;
 		break;
 	}
 
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }
 
@@ -447,44 +437,35 @@ int eprom_init(struct hfi1_devdata *dd)
 {
 	int ret = 0;
 
-	/* only the discrete chip has an EPROM, nothing to do */
+	/* only the discrete chip has an EPROM */
 	if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
 		return 0;
 
-	/* lock against other callers */
-	mutex_lock(&eprom_mutex);
-	if (eprom_available)	/* already initialized */
-		goto done_asic;
-
 	/*
-	 * Lock against the other HFI on another OS - the mutex above
-	 * would have caught anything in this driver.  It is OK if
-	 * both OSes reset the EPROM - as long as they don't do it at
-	 * the same time.
+	 * It is OK if both HFIs reset the EPROM as long as they don't
+	 * do it at the same time.
 	 */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
 		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+			   "%s: unable to acquire EPROM resource, no EPROM support\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	/* reset EPROM to be sure it is in a good state */
 
 	/* set reset */
-	write_csr(dd, ASIC_EEP_CTL_STAT,
-					ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
 	/* clear reset, set speed */
 	write_csr(dd, ASIC_EEP_CTL_STAT,
-			EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+		  EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
 
 	/* wake the device with command "release powerdown NoID" */
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
 
-	eprom_available = 1;
-	release_hw_mutex(dd);
+	dd->eprom_available = true;
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h
index 64a64276be81..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/staging/rdma/hfi1/eprom.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,4 +49,4 @@ struct hfi1_cmd;
 struct hfi1_devdata;
 
 int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(const struct hfi1_cmd *cmd);
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index d57d549052c8..8396dc5fb6c1 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -60,6 +57,8 @@
 #include "user_sdma.h"
 #include "user_exp_rcv.h"
 #include "eprom.h"
+#include "aspm.h"
+#include "mmu_rb.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -96,9 +95,6 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
 static int vma_fault(struct vm_area_struct *, struct vm_fault *);
-static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
-static int exp_tid_free(struct file *, struct hfi1_tid_info *);
-static void unlock_exp_tids(struct hfi1_ctxtdata *);
 
 static const struct file_operations hfi1_file_ops = {
 	.owner = THIS_MODULE,
@@ -164,7 +160,6 @@ enum mmap_types {
 #define dbg(fmt, ...)				\
 	pr_info(fmt, ##__VA_ARGS__)
 
-
 static inline int is_valid_mmap(u64 token)
 {
 	return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@ -188,6 +183,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	struct hfi1_cmd cmd;
 	struct hfi1_user_info uinfo;
 	struct hfi1_tid_info tinfo;
+	unsigned long addr;
 	ssize_t consumed = 0, copy = 0, ret = 0;
 	void *dest = NULL;
 	__u64 user_val = 0;
@@ -219,6 +215,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 		break;
 	case HFI1_CMD_TID_UPDATE:
 	case HFI1_CMD_TID_FREE:
+	case HFI1_CMD_TID_INVAL_READ:
 		copy = sizeof(tinfo);
 		dest = &tinfo;
 		break;
@@ -294,9 +291,8 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			sc_return_credits(uctxt->sc);
 		break;
 	case HFI1_CMD_TID_UPDATE:
-		ret = exp_tid_setup(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
 		if (!ret) {
-			unsigned long addr;
 			/*
 			 * Copy the number of tidlist entries we used
 			 * and the length of the buffer we registered.
@@ -311,8 +307,25 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 				ret = -EFAULT;
 		}
 		break;
+	case HFI1_CMD_TID_INVAL_READ:
+		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
+		break;
 	case HFI1_CMD_TID_FREE:
-		ret = exp_tid_free(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
 		break;
 	case HFI1_CMD_RECV_CTRL:
 		ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@ -373,8 +386,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 				break;
 			}
 			if (dd->flags & HFI1_FORCED_FREEZE) {
-				/* Don't allow context reset if we are into
-				 * forced freeze */
+				/*
+				 * Don't allow context reset if we are into
+				 * forced freeze
+				 */
 				ret = -ENODEV;
 				break;
 			}
@@ -382,8 +397,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			ret = sc_enable(sc);
 			hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
 				     uctxt->ctxt);
-		} else
+		} else {
 			ret = sc_restart(sc);
+		}
 		if (!ret)
 			sc_return_credits(sc);
 		break;
@@ -393,7 +409,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	case HFI1_CMD_EP_ERASE_RANGE:
 	case HFI1_CMD_EP_READ_RANGE:
 	case HFI1_CMD_EP_WRITE_RANGE:
-		ret = handle_eprom_command(&cmd);
+		ret = handle_eprom_command(fp, &cmd);
 		break;
 	}
 
@@ -487,8 +503,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 		 * Map only the amount allocated to the context, not the
 		 * entire available context's PIO space.
 		 */
-		memlen = ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE,
-			       PAGE_SIZE);
+		memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE);
 		flags &= ~VM_MAYREAD;
 		flags |= VM_DONTCOPY | VM_DONTEXPAND;
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
@@ -638,7 +653,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 			goto done;
 		}
 		memaddr = (u64)cq->comps;
-		memlen = ALIGN(sizeof(*cq->comps) * cq->nentries, PAGE_SIZE);
+		memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries);
 		flags |= VM_IO | VM_DONTEXPAND;
 		vmf = 1;
 		break;
@@ -733,6 +748,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	/* drain user sdma queue */
 	hfi1_user_sdma_free_queues(fdata);
 
+	/* release the cpu */
+	hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+
 	/*
 	 * Clear any left over, unhandled events so the next process that
 	 * gets this context doesn't get confused.
@@ -756,6 +774,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 		     HFI1_RCVCTRL_TIDFLOW_DIS |
 		     HFI1_RCVCTRL_INTRAVAIL_DIS |
+		     HFI1_RCVCTRL_TAILUPD_DIS |
 		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@ -778,14 +797,12 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	uctxt->pionowait = 0;
 	uctxt->event_flags = 0;
 
-	hfi1_clear_tids(uctxt);
+	hfi1_user_exp_rcv_free(fdata);
 	hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
 
-	if (uctxt->tid_pg_list)
-		unlock_exp_tids(uctxt);
-
 	hfi1_stats.sps_ctxts--;
-	dd->freectxts++;
+	if (++dd->freectxts == dd->num_user_contexts)
+		aspm_enable_all(dd);
 	mutex_unlock(&hfi1_mutex);
 	hfi1_free_ctxtdata(dd, uctxt);
 done:
@@ -827,8 +844,16 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 
 	mutex_lock(&hfi1_mutex);
 	/* First, lets check if we need to setup a shared context? */
-	if (uinfo->subctxt_cnt)
+	if (uinfo->subctxt_cnt) {
+		struct hfi1_filedata *fd = fp->private_data;
+
 		ret = find_shared_ctxt(fp, uinfo);
+		if (ret < 0)
+			goto done_unlock;
+		if (ret)
+			fd->rec_cpu_num = hfi1_get_proc_affinity(
+				fd->uctxt->dd, fd->uctxt->numa_id);
+	}
 
 	/*
 	 * We execute the following block if we couldn't find a
@@ -838,6 +863,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 		i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
 		ret = get_user_context(fp, uinfo, i_minor - 1, alg);
 	}
+done_unlock:
 	mutex_unlock(&hfi1_mutex);
 done:
 	return ret;
@@ -963,7 +989,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt;
 	unsigned ctxt;
-	int ret;
+	int ret, numa;
 
 	if (dd->flags & HFI1_FROZEN) {
 		/*
@@ -983,17 +1009,26 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	if (ctxt == dd->num_rcv_contexts)
 		return -EBUSY;
 
-	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+	fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+	if (fd->rec_cpu_num != -1)
+		numa = cpu_to_node(fd->rec_cpu_num);
+	else
+		numa = numa_node_id();
+	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
 	if (!uctxt) {
 		dd_dev_err(dd,
 			   "Unable to allocate ctxtdata memory, failing open\n");
 		return -ENOMEM;
 	}
+	hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+		  uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+		  uctxt->numa_id);
+
 	/*
 	 * Allocate and enable a PIO send context.
 	 */
 	uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
-			     uctxt->numa_id);
+			     uctxt->dd->node);
 	if (!uctxt->sc)
 		return -ENOMEM;
 
@@ -1027,7 +1062,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	INIT_LIST_HEAD(&uctxt->sdma_queues);
 	spin_lock_init(&uctxt->sdma_qlock);
 	hfi1_stats.sps_ctxts++;
-	dd->freectxts--;
+	/*
+	 * Disable ASPM when there are open user/PSM contexts to avoid
+	 * issues with ASPM L1 exit latency
+	 */
+	if (dd->freectxts-- == dd->num_user_contexts)
+		aspm_disable_all(dd);
 	fd->uctxt = uctxt;
 
 	return 0;
@@ -1036,22 +1076,19 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 			 const struct hfi1_user_info *uinfo)
 {
-	int ret = 0;
 	unsigned num_subctxts;
 
 	num_subctxts = uinfo->subctxt_cnt;
-	if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+		return -EINVAL;
 
 	uctxt->subctxt_cnt = uinfo->subctxt_cnt;
 	uctxt->subctxt_id = uinfo->subctxt_id;
 	uctxt->active_slaves = 1;
 	uctxt->redirect_seq_cnt = 1;
 	set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-bail:
-	return ret;
+
+	return 0;
 }
 
 static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@ -1106,10 +1143,10 @@ static int user_init(struct file *fp)
 	 * has done it.
 	 */
 	if (fd->subctxt) {
-		ret = wait_event_interruptible(uctxt->wait,
-			!test_bit(HFI1_CTXT_MASTER_UNINIT,
-			&uctxt->event_flags));
-		goto done;
+		ret = wait_event_interruptible(uctxt->wait, !test_bit(
+					       HFI1_CTXT_MASTER_UNINIT,
+					       &uctxt->event_flags));
+		goto expected;
 	}
 
 	/* initialize poll variables... */
@@ -1147,8 +1184,16 @@ static int user_init(struct file *fp)
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+	/*
+	 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+	 * We can't rely on the correct value to be set from prior
+	 * uses of the chip or ctxt. Therefore, add the rcvctrl op
+	 * for both cases.
+	 */
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
 		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+	else
+		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
 	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
 
 	/* Notify any waiting slaves */
@@ -1156,8 +1201,18 @@ static int user_init(struct file *fp)
 		clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
 		wake_up(&uctxt->wait);
 	}
-	ret = 0;
 
+expected:
+	/*
+	 * Expected receive has to be setup for all processes (including
+	 * shared contexts). However, it has to be done after the master
+	 * context has been fully configured as it depends on the
+	 * eager/expected split of the RcvArray entries.
+	 * Setting it up here ensures that the subcontexts will be waiting
+	 * (due to the above wait_event_interruptible() until the master
+	 * is setup.
+	 */
+	ret = hfi1_user_exp_rcv_init(fp);
 done:
 	return ret;
 }
@@ -1227,46 +1282,6 @@ static int setup_ctxt(struct file *fp)
 			if (ret)
 				goto done;
 		}
-		/* Setup Expected Rcv memories */
-		uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
-					     sizeof(struct page **));
-		if (!uctxt->tid_pg_list) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		uctxt->physshadow = vzalloc(uctxt->expected_count *
-					    sizeof(*uctxt->physshadow));
-		if (!uctxt->physshadow) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/* allocate expected TID map and initialize the cursor */
-		atomic_set(&uctxt->tidcursor, 0);
-		uctxt->numtidgroups = uctxt->expected_count /
-			dd->rcv_entries.group_size;
-		uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
-			!!(uctxt->numtidgroups % BITS_PER_LONG);
-		uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
-						sizeof(*uctxt->tidusemap),
-						GFP_KERNEL, uctxt->numa_id);
-		if (!uctxt->tidusemap) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/*
-		 * In case that the number of groups is not a multiple of
-		 * 64 (the number of groups in a tidusemap element), mark
-		 * the extra ones as used. This will effectively make them
-		 * permanently used and should never be assigned. Otherwise,
-		 * the code which checks how many free groups we have will
-		 * get completely confused about the state of the bits.
-		 */
-		if (uctxt->numtidgroups % BITS_PER_LONG)
-			uctxt->tidusemap[uctxt->tidmapcnt - 1] =
-				~((1ULL << (uctxt->numtidgroups %
-					    BITS_PER_LONG)) - 1);
-		trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
-				       uctxt->tidusemap, uctxt->tidmapcnt);
 	}
 	ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
 	if (ret)
@@ -1392,8 +1407,9 @@ static unsigned int poll_next(struct file *fp,
 		set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
 		pollflag = 0;
-	} else
+	} else {
 		pollflag = POLLIN | POLLRDNORM;
+	}
 	spin_unlock_irq(&dd->uctxt_lock);
 
 	return pollflag;
@@ -1471,8 +1487,9 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
 		if (uctxt->rcvhdrtail_kvaddr)
 			clear_rcvhdrtail(uctxt);
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
-	} else
+	} else {
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+	}
 	hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
 	/* always; new head should be equal to new tail; see above */
 bail:
@@ -1505,367 +1522,6 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
 	return 0;
 }
 
-#define num_user_pages(vaddr, len)					\
-	(1 + (((((unsigned long)(vaddr) +				\
-		 (unsigned long)(len) - 1) & PAGE_MASK) -		\
-	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-/**
- * tzcnt - count the number of trailing zeros in a 64bit value
- * @value: the value to be examined
- *
- * Returns the number of trailing least significant zeros in the
- * the input value. If the value is zero, return the number of
- * bits of the value.
- */
-static inline u8 tzcnt(u64 value)
-{
-	return value ? __builtin_ctzl(value) : sizeof(value) * 8;
-}
-
-static inline unsigned num_free_groups(unsigned long map, u16 *start)
-{
-	unsigned free;
-	u16 bitidx = *start;
-
-	if (bitidx >= BITS_PER_LONG)
-		return 0;
-	/* "Turn off" any bits set before our bit index */
-	map &= ~((1ULL << bitidx) - 1);
-	free = tzcnt(map) - bitidx;
-	while (!free && bitidx < BITS_PER_LONG) {
-		/* Zero out the last set bit so we look at the rest */
-		map &= ~(1ULL << bitidx);
-		/*
-		 * Account for the previously checked bits and advance
-		 * the bit index. We don't have to check for bitidx
-		 * getting bigger than BITS_PER_LONG here as it would
-		 * mean extra instructions that we don't need. If it
-		 * did happen, it would push free to a negative value
-		 * which will break the loop.
-		 */
-		free = tzcnt(map) - ++bitidx;
-	}
-	*start = bitidx;
-	return free;
-}
-
-static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	int ret = 0;
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid, mapped = 0, npages, ngroups, exp_groups,
-		tidpairs = uctxt->expected_count / 2;
-	struct page **pages;
-	unsigned long vaddr, tidmap[uctxt->tidmapcnt];
-	dma_addr_t *phys;
-	u32 tidlist[tidpairs], pairidx = 0, tidcursor;
-	u16 useidx, idx, bitidx, tidcnt = 0;
-
-	vaddr = tinfo->vaddr;
-
-	if (offset_in_page(vaddr)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	npages = num_user_pages(vaddr, tinfo->length);
-	if (!npages) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
-		       npages * PAGE_SIZE)) {
-		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-			   (void *)vaddr, npages);
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
-	memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
-
-	exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
-	/* which group set do we look at first? */
-	tidcursor = atomic_read(&uctxt->tidcursor);
-	useidx = (tidcursor >> 16) & 0xffff;
-	bitidx = tidcursor & 0xffff;
-
-	/*
-	 * Keep going until we've mapped all pages or we've exhausted all
-	 * RcvArray entries.
-	 * This iterates over the number of tidmaps + 1
-	 * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
-	 * started from one more time for any free bits before the
-	 * starting point bit.
-	 */
-	for (mapped = 0, idx = 0;
-	     mapped < npages && idx <= uctxt->tidmapcnt;) {
-		u64 i, offset = 0;
-		unsigned free, pinned, pmapped = 0, bits_used;
-		u16 grp;
-
-		/*
-		 * "Reserve" the needed group bits under lock so other
-		 * processes can't step in the middle of it. Once
-		 * reserved, we don't need the lock anymore since we
-		 * are guaranteed the groups.
-		 */
-		spin_lock(&uctxt->exp_lock);
-		if (uctxt->tidusemap[useidx] == -1ULL ||
-		    bitidx >= BITS_PER_LONG) {
-			/* no free groups in the set, use the next */
-			useidx = (useidx + 1) % uctxt->tidmapcnt;
-			idx++;
-			bitidx = 0;
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
-			!!((npages - mapped) % dd->rcv_entries.group_size);
-
-		/*
-		 * If we've gotten here, the current set of groups does have
-		 * one or more free groups.
-		 */
-		free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
-		if (!free) {
-			/*
-			 * Despite the check above, free could still come back
-			 * as 0 because we don't check the entire bitmap but
-			 * we start from bitidx.
-			 */
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		bits_used = min(free, ngroups);
-		tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
-		uctxt->tidusemap[useidx] |= tidmap[useidx];
-		spin_unlock(&uctxt->exp_lock);
-
-		/*
-		 * At this point, we know where in the map we have free bits.
-		 * properly offset into the various "shadow" arrays and compute
-		 * the RcvArray entry index.
-		 */
-		offset = ((useidx * BITS_PER_LONG) + bitidx) *
-			dd->rcv_entries.group_size;
-		pages = uctxt->tid_pg_list + offset;
-		phys = uctxt->physshadow + offset;
-		tid = uctxt->expected_base + offset;
-
-		/* Calculate how many pages we can pin based on free bits */
-		pinned = min((bits_used * dd->rcv_entries.group_size),
-			     (npages - mapped));
-		/*
-		 * Now that we know how many free RcvArray entries we have,
-		 * we can pin that many user pages.
-		 */
-		ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
-					      pinned, true, pages);
-		if (ret) {
-			/*
-			 * We can't continue because the pages array won't be
-			 * initialized. This should never happen,
-			 * unless perhaps the user has mpin'ed the pages
-			 * themselves.
-			 */
-			dd_dev_info(dd,
-				    "Failed to lock addr %p, %u pages: errno %d\n",
-				    (void *) vaddr, pinned, -ret);
-			/*
-			 * Let go of the bits that we reserved since we are not
-			 * going to use them.
-			 */
-			spin_lock(&uctxt->exp_lock);
-			uctxt->tidusemap[useidx] &=
-				~(((1ULL << bits_used) - 1) << bitidx);
-			spin_unlock(&uctxt->exp_lock);
-			goto done;
-		}
-		/*
-		 * How many groups do we need based on how many pages we have
-		 * pinned?
-		 */
-		ngroups = (pinned / dd->rcv_entries.group_size) +
-			!!(pinned % dd->rcv_entries.group_size);
-		/*
-		 * Keep programming RcvArray entries for all the <ngroups> free
-		 * groups.
-		 */
-		for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
-			unsigned j;
-			u32 pair_size = 0, tidsize;
-			/*
-			 * This inner loop will program an entire group or the
-			 * array of pinned pages (which ever limit is hit
-			 * first).
-			 */
-			for (j = 0; j < dd->rcv_entries.group_size &&
-				     pmapped < pinned; j++, pmapped++, tid++) {
-				tidsize = PAGE_SIZE;
-				phys[pmapped] = hfi1_map_page(dd->pcidev,
-						   pages[pmapped], 0,
-						   tidsize, PCI_DMA_FROMDEVICE);
-				trace_hfi1_exp_rcv_set(uctxt->ctxt,
-						       fd->subctxt,
-						       tid, vaddr,
-						       phys[pmapped],
-						       pages[pmapped]);
-				/*
-				 * Each RcvArray entry is programmed with one
-				 * page * worth of memory. This will handle
-				 * the 8K MTU as well as anything smaller
-				 * due to the fact that both entries in the
-				 * RcvTidPair are programmed with a page.
-				 * PSM currently does not handle anything
-				 * bigger than 8K MTU, so should we even worry
-				 * about 10K here?
-				 */
-				hfi1_put_tid(dd, tid, PT_EXPECTED,
-					     phys[pmapped],
-					     ilog2(tidsize >> PAGE_SHIFT) + 1);
-				pair_size += tidsize >> PAGE_SHIFT;
-				EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
-				if (!(tid % 2)) {
-					tidlist[pairidx] |=
-					   EXP_TID_SET(IDX,
-						(tid - uctxt->expected_base)
-						       / 2);
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 1);
-					tidcnt++;
-				} else {
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 2);
-					pair_size = 0;
-					pairidx++;
-				}
-			}
-			/*
-			 * We've programmed the entire group (or as much of the
-			 * group as we'll use. Now, it's time to push it out...
-			 */
-			flush_wc();
-		}
-		mapped += pinned;
-		atomic_set(&uctxt->tidcursor,
-			   (((useidx & 0xffffff) << 16) |
-			    ((bitidx + bits_used) & 0xffffff)));
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-
-done:
-	/* If we've mapped anything, copy relevant info to user */
-	if (mapped) {
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
-				 tidlist, sizeof(tidlist[0]) * tidcnt)) {
-			ret = -EFAULT;
-			goto done;
-		}
-		/* copy TID info to user */
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
-				 tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
-			ret = -EFAULT;
-	}
-bail:
-	/*
-	 * Calculate mapped length. New Exp TID protocol does not "unwind" and
-	 * report an error if it can't map the entire buffer. It just reports
-	 * the length that was mapped.
-	 */
-	tinfo->length = mapped * PAGE_SIZE;
-	tinfo->tidcnt = tidcnt;
-	return ret;
-}
-
-static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned long tidmap[uctxt->tidmapcnt];
-	struct page **pages;
-	dma_addr_t *phys;
-	u16 idx, bitidx, tid;
-	int ret = 0;
-
-	if (copy_from_user(&tidmap, (void __user *)(unsigned long)
-			   tinfo->tidmap,
-			   sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
-		ret = -EFAULT;
-		goto done;
-	}
-	for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
-		unsigned long map;
-
-		bitidx = 0;
-		if (!tidmap[idx])
-			continue;
-		map = tidmap[idx];
-		while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
-			int i, pcount = 0;
-			struct page *pshadow[dd->rcv_entries.group_size];
-			unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
-				dd->rcv_entries.group_size;
-
-			pages = uctxt->tid_pg_list + offset;
-			phys = uctxt->physshadow + offset;
-			tid = uctxt->expected_base + offset;
-			for (i = 0; i < dd->rcv_entries.group_size;
-			     i++, tid++) {
-				if (pages[i]) {
-					hfi1_put_tid(dd, tid, PT_INVALID,
-						      0, 0);
-					trace_hfi1_exp_rcv_free(uctxt->ctxt,
-								fd->subctxt,
-								tid, phys[i],
-								pages[i]);
-					pci_unmap_page(dd->pcidev, phys[i],
-					      PAGE_SIZE, PCI_DMA_FROMDEVICE);
-					pshadow[pcount] = pages[i];
-					pages[i] = NULL;
-					pcount++;
-					phys[i] = 0;
-				}
-			}
-			flush_wc();
-			hfi1_release_user_pages(pshadow, pcount, true);
-			clear_bit(bitidx, &uctxt->tidusemap[idx]);
-			map &= ~(1ULL<<bitidx);
-		}
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-done:
-	return ret;
-}
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
-{
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid;
-
-	dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
-		    uctxt->ctxt);
-	for (tid = 0; tid < uctxt->expected_count; tid++) {
-		struct page *p = uctxt->tid_pg_list[tid];
-		dma_addr_t phys;
-
-		if (!p)
-			continue;
-
-		phys = uctxt->physshadow[tid];
-		uctxt->physshadow[tid] = 0;
-		uctxt->tid_pg_list[tid] = NULL;
-		pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		hfi1_release_user_pages(&p, 1, true);
-	}
-}
-
 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
 			 u16 pkey)
 {
@@ -1934,10 +1590,9 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
 	return filp->f_pos;
 }
 
-
 /* NOTE: assumes unsigned long is 8 bytes */
 static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
-			loff_t *f_pos)
+		       loff_t *f_pos)
 {
 	struct hfi1_devdata *dd = filp->private_data;
 	void __iomem *base = dd->kregbase;
@@ -1973,12 +1628,12 @@ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
 		 * them.  These registers are defined as having a read value
 		 * of 0.
 		 */
-		else if (csr_off == ASIC_GPIO_CLEAR
-				|| csr_off == ASIC_GPIO_FORCE
-				|| csr_off == ASIC_QSFP1_CLEAR
-				|| csr_off == ASIC_QSFP1_FORCE
-				|| csr_off == ASIC_QSFP2_CLEAR
-				|| csr_off == ASIC_QSFP2_FORCE)
+		else if (csr_off == ASIC_GPIO_CLEAR ||
+			 csr_off == ASIC_GPIO_FORCE ||
+			 csr_off == ASIC_QSFP1_CLEAR ||
+			 csr_off == ASIC_QSFP1_FORCE ||
+			 csr_off == ASIC_QSFP2_CLEAR ||
+			 csr_off == ASIC_QSFP2_FORCE)
 			data = 0;
 		else if (csr_off >= barlen) {
 			/*
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index 28ae42faa018..3040162cb326 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -77,7 +74,13 @@ static uint fw_8051_load = 1;
 static uint fw_fabric_serdes_load = 1;
 static uint fw_pcie_serdes_load = 1;
 static uint fw_sbus_load = 1;
-static uint platform_config_load = 1;
+
+/*
+ * Access required in platform.c
+ * Maintains state of whether the platform config was fetched via the
+ * fallback option
+ */
+uint platform_config_load;
 
 /* Firmware file names get set in hfi1_firmware_init() based on the above */
 static char *fw_8051_name;
@@ -107,6 +110,7 @@ struct css_header {
 	u32 exponent_size;	/* in DWORDs */
 	u32 reserved[22];
 };
+
 /* expected field values */
 #define CSS_MODULE_TYPE	   0x00000006
 #define CSS_HEADER_LEN	   0x000000a1
@@ -166,6 +170,7 @@ enum fw_state {
 	FW_FINAL,
 	FW_ERR
 };
+
 static enum fw_state fw_state = FW_EMPTY;
 static int fw_err;
 static struct firmware_details fw_8051;
@@ -193,7 +198,7 @@ static const struct firmware *platform_config;
 #define RSA_ENGINE_TIMEOUT 100 /* ms */
 
 /* hardware mutex timeout, in ms */
-#define HM_TIMEOUT 4000 /* 4 s */
+#define HM_TIMEOUT 10 /* ms */
 
 /* 8051 memory access timeout, in us */
 #define DC8051_ACCESS_TIMEOUT 100 /* us */
@@ -233,6 +238,8 @@ static const u8 all_pcie_serdes_broadcast = 0xe0;
 
 /* forwards */
 static void dispose_one_firmware(struct firmware_details *fdet);
+static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
+				       struct firmware_details *fdet);
 
 /*
  * Read a single 64-bit value from 8051 data memory.
@@ -372,8 +379,8 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
 		return 0;
 
 	dd_dev_err(dd,
-		"invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
-		what, expected, actual);
+		   "invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
+		   what, expected, actual);
 	return 1;
 }
 
@@ -383,19 +390,19 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
 static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css)
 {
 	/* verify CSS header fields (most sizes are in DW, so add /4) */
-	if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE)
-			|| invalid_header(dd, "header_len", css->header_len,
-					(sizeof(struct firmware_file)/4))
-			|| invalid_header(dd, "header_version",
-					css->header_version, CSS_HEADER_VERSION)
-			|| invalid_header(dd, "module_vendor",
-					css->module_vendor, CSS_MODULE_VENDOR)
-			|| invalid_header(dd, "key_size",
-					css->key_size, KEY_SIZE/4)
-			|| invalid_header(dd, "modulus_size",
-					css->modulus_size, KEY_SIZE/4)
-			|| invalid_header(dd, "exponent_size",
-					css->exponent_size, EXPONENT_SIZE/4)) {
+	if (invalid_header(dd, "module_type", css->module_type,
+			   CSS_MODULE_TYPE) ||
+	    invalid_header(dd, "header_len", css->header_len,
+			   (sizeof(struct firmware_file) / 4)) ||
+	    invalid_header(dd, "header_version", css->header_version,
+			   CSS_HEADER_VERSION) ||
+	    invalid_header(dd, "module_vendor", css->module_vendor,
+			   CSS_MODULE_VENDOR) ||
+	    invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) ||
+	    invalid_header(dd, "modulus_size", css->modulus_size,
+			   KEY_SIZE / 4) ||
+	    invalid_header(dd, "exponent_size", css->exponent_size,
+			   EXPONENT_SIZE / 4)) {
 		return -EINVAL;
 	}
 	return 0;
@@ -410,8 +417,8 @@ static int payload_check(struct hfi1_devdata *dd, const char *name,
 	/* make sure we have some payload */
 	if (prefix_size >= file_size) {
 		dd_dev_err(dd,
-			"firmware \"%s\", size %ld, must be larger than %ld bytes\n",
-			name, file_size, prefix_size);
+			   "firmware \"%s\", size %ld, must be larger than %ld bytes\n",
+			   name, file_size, prefix_size);
 		return -EINVAL;
 	}
 
@@ -433,8 +440,8 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 
 	ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev);
 	if (ret) {
-		dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n",
-			   name, ret);
+		dd_dev_warn(dd, "cannot find firmware \"%s\", err %d\n",
+			    name, ret);
 		return ret;
 	}
 
@@ -480,14 +487,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 	ret = verify_css_header(dd, css);
 	if (ret) {
 		dd_dev_info(dd, "Invalid CSS header for \"%s\"\n", name);
-	} else if ((css->size*4) == fdet->fw->size) {
+	} else if ((css->size * 4) == fdet->fw->size) {
 		/* non-augmented firmware file */
 		struct firmware_file *ff = (struct firmware_file *)
 							fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-						sizeof(struct firmware_file));
+				    sizeof(struct firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = ff->modulus;
@@ -505,14 +512,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 			dd_dev_err(dd, "driver is unable to validate firmware without r2 and mu (not in firmware file)\n");
 			ret = -EINVAL;
 		}
-	} else if ((css->size*4) + AUGMENT_SIZE == fdet->fw->size) {
+	} else if ((css->size * 4) + AUGMENT_SIZE == fdet->fw->size) {
 		/* augmented firmware file */
 		struct augmented_firmware_file *aff =
 			(struct augmented_firmware_file *)fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-					sizeof(struct augmented_firmware_file));
+				    sizeof(struct augmented_firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = aff->modulus;
@@ -527,9 +534,10 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 	} else {
 		/* css->size check failed */
 		dd_dev_err(dd,
-			"invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
-			fdet->fw->size/4, (fdet->fw->size - AUGMENT_SIZE)/4,
-			css->size);
+			   "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
+			   fdet->fw->size / 4,
+			   (fdet->fw->size - AUGMENT_SIZE) / 4,
+			   css->size);
 
 		ret = -EINVAL;
 	}
@@ -572,7 +580,7 @@ retry:
 		 * We tried the original and it failed.  Move to the
 		 * alternate.
 		 */
-		dd_dev_info(dd, "using alternate firmware names\n");
+		dd_dev_warn(dd, "using alternate firmware names\n");
 		/*
 		 * Let others run.  Some systems, when missing firmware, does
 		 * something that holds for 30 seconds.  If we do that twice
@@ -593,27 +601,27 @@ retry:
 		fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
 	}
 
-	if (fw_8051_load) {
-		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
+	if (fw_sbus_load) {
+		err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
 		if (err)
 			goto done;
 	}
 
-	if (fw_fabric_serdes_load) {
-		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
-			&fw_fabric);
+	if (fw_pcie_serdes_load) {
+		err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
 		if (err)
 			goto done;
 	}
 
-	if (fw_sbus_load) {
-		err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
+	if (fw_fabric_serdes_load) {
+		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
+					  &fw_fabric);
 		if (err)
 			goto done;
 	}
 
-	if (fw_pcie_serdes_load) {
-		err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
+	if (fw_8051_load) {
+		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
 		if (err)
 			goto done;
 	}
@@ -621,16 +629,18 @@ retry:
 done:
 	if (err) {
 		/* oops, had problems obtaining a firmware */
-		if (fw_state == FW_EMPTY) {
-			/* retry with alternate */
+		if (fw_state == FW_EMPTY && dd->icode == ICODE_RTL_SILICON) {
+			/* retry with alternate (RTL only) */
 			fw_state = FW_TRY;
 			goto retry;
 		}
+		dd_dev_err(dd, "unable to obtain working firmware\n");
 		fw_state = FW_ERR;
 		fw_err = -ENOENT;
 	} else {
 		/* success */
-		if (fw_state == FW_EMPTY)
+		if (fw_state == FW_EMPTY &&
+		    dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
 			fw_state = FW_TRY;	/* may retry later */
 		else
 			fw_state = FW_FINAL;	/* cannot try again */
@@ -673,10 +683,15 @@ static int obtain_firmware(struct hfi1_devdata *dd)
 	}
 	/* not in FW_TRY state */
 
-	if (fw_state == FW_FINAL)
+	if (fw_state == FW_FINAL) {
+		if (platform_config) {
+			dd->platform_config.data = platform_config->data;
+			dd->platform_config.size = platform_config->size;
+		}
 		goto done;	/* already acquired */
-	else if (fw_state == FW_ERR)
+	} else if (fw_state == FW_ERR) {
 		goto done;	/* already tried and failed */
+	}
 	/* fw_state is FW_EMPTY */
 
 	/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
@@ -685,9 +700,13 @@ static int obtain_firmware(struct hfi1_devdata *dd)
 	if (platform_config_load) {
 		platform_config = NULL;
 		err = request_firmware(&platform_config, platform_config_name,
-						&dd->pcidev->dev);
-		if (err)
+				       &dd->pcidev->dev);
+		if (err) {
 			platform_config = NULL;
+			goto done;
+		}
+		dd->platform_config.data = platform_config->data;
+		dd->platform_config.size = platform_config->size;
 	}
 
 done:
@@ -761,7 +780,7 @@ static int retry_firmware(struct hfi1_devdata *dd, int load_result)
 static void write_rsa_data(struct hfi1_devdata *dd, int what,
 			   const u8 *data, int nbytes)
 {
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 	int i;
 
 	if (((unsigned long)data & 0x7) == 0) {
@@ -769,14 +788,14 @@ static void write_rsa_data(struct hfi1_devdata *dd, int what,
 		u64 *ptr = (u64 *)data;
 
 		for (i = 0; i < qw_size; i++, ptr++)
-			write_csr(dd, what + (8*i), *ptr);
+			write_csr(dd, what + (8 * i), *ptr);
 	} else {
 		/* not aligned */
 		for (i = 0; i < qw_size; i++, data += 8) {
 			u64 value;
 
 			memcpy(&value, data, 8);
-			write_csr(dd, what + (8*i), value);
+			write_csr(dd, what + (8 * i), value);
 		}
 	}
 }
@@ -789,7 +808,7 @@ static void write_streamed_rsa_data(struct hfi1_devdata *dd, int what,
 				    const u8 *data, int nbytes)
 {
 	u64 *ptr = (u64 *)data;
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 
 	for (; qw_size > 0; qw_size--, ptr++)
 		write_csr(dd, what, *ptr);
@@ -822,7 +841,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 			     >> MISC_CFG_FW_CTRL_RSA_STATUS_SHIFT;
 	if (status != RSA_STATUS_IDLE) {
 		dd_dev_err(dd, "%s security engine not idle - giving up\n",
-			who);
+			   who);
 		return -EBUSY;
 	}
 
@@ -859,7 +878,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 		if (status == RSA_STATUS_IDLE) {
 			/* should not happen */
 			dd_dev_err(dd, "%s firmware security bad idle state\n",
-				who);
+				   who);
 			ret = -EINVAL;
 			break;
 		} else if (status == RSA_STATUS_DONE) {
@@ -893,19 +912,20 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 	 * is not keeping the error high.
 	 */
 	write_csr(dd, MISC_ERR_CLEAR,
-			MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK
-			| MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
+		  MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK |
+		  MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
 	/*
-	 * All that is left are the current errors.  Print failure details,
-	 * if any.
+	 * All that is left are the current errors.  Print warnings on
+	 * authorization failure details, if any.  Firmware authorization
+	 * can be retried, so these are only warnings.
 	 */
 	reg = read_csr(dd, MISC_ERR_STATUS);
 	if (ret) {
 		if (reg & MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware authorization failed\n",
-				who);
+			dd_dev_warn(dd, "%s firmware authorization failed\n",
+				    who);
 		if (reg & MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware key mismatch\n", who);
+			dd_dev_warn(dd, "%s firmware key mismatch\n", who);
 	}
 
 	return ret;
@@ -922,7 +942,8 @@ static void load_security_variables(struct hfi1_devdata *dd,
 	write_rsa_data(dd, MISC_CFG_RSA_MU, fdet->mu, MU_SIZE);
 	/* Security variables d.  Write the header */
 	write_streamed_rsa_data(dd, MISC_CFG_SHA_PRELOAD,
-			(u8 *)fdet->css_header, sizeof(struct css_header));
+				(u8 *)fdet->css_header,
+				sizeof(struct css_header));
 }
 
 /* return the 8051 firmware state */
@@ -1002,7 +1023,7 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
 
 	/* Firmware load steps 3-5 */
 	ret = write_8051(dd, 1/*code*/, 0, fdet->firmware_ptr,
-							fdet->firmware_len);
+			 fdet->firmware_len);
 	if (ret)
 		return ret;
 
@@ -1029,13 +1050,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) { /* timed out */
 		dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
-			get_firmware_state(dd));
+			   get_firmware_state(dd));
 		return -ETIMEDOUT;
 	}
 
 	read_misc_status(dd, &ver_a, &ver_b);
 	dd_dev_info(dd, "8051 firmware version %d.%d\n",
-		(int)ver_b, (int)ver_a);
+		    (int)ver_b, (int)ver_a);
 	dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
 
 	return 0;
@@ -1050,11 +1071,11 @@ void sbus_request(struct hfi1_devdata *dd,
 		  u8 receiver_addr, u8 data_addr, u8 command, u32 data_in)
 {
 	write_csr(dd, ASIC_CFG_SBUS_REQUEST,
-		((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT)
-		| ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT)
-		| ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT)
-		| ((u64)receiver_addr
-			<< ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
+		  ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) |
+		  ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) |
+		  ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) |
+		  ((u64)receiver_addr <<
+		   ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
 }
 
 /*
@@ -1072,14 +1093,14 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
 		return;
 
 	dd_dev_info(dd, "Turning off spicos:%s%s\n",
-		flags & SPICO_SBUS ? " SBus" : "",
-		flags & SPICO_FABRIC ? " fabric" : "");
+		    flags & SPICO_SBUS ? " SBus" : "",
+		    flags & SPICO_FABRIC ? " fabric" : "");
 
 	write_csr(dd, MISC_CFG_FW_CTRL, ENABLE_SPICO_SMASK);
 	/* disable SBus spico */
 	if (flags & SPICO_SBUS)
 		sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01,
-			WRITE_SBUS_RECEIVER, 0x00000040);
+			     WRITE_SBUS_RECEIVER, 0x00000040);
 
 	/* disable the fabric serdes spicos */
 	if (flags & SPICO_FABRIC)
@@ -1089,29 +1110,60 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
 }
 
 /*
- *  Reset all of the fabric serdes for our HFI.
+ * Reset all of the fabric serdes for this HFI in preparation to take the
+ * link to Polling.
+ *
+ * To do a reset, we need to write to to the serdes registers.  Unfortunately,
+ * the fabric serdes download to the other HFI on the ASIC will have turned
+ * off the firmware validation on this HFI.  This means we can't write to the
+ * registers to reset the serdes.  Work around this by performing a complete
+ * re-download and validation of the fabric serdes firmware.  This, as a
+ * by-product, will reset the serdes.  NOTE: the re-download requires that
+ * the 8051 be in the Offline state.  I.e. not actively trying to use the
+ * serdes.  This routine is called at the point where the link is Offline and
+ * is getting ready to go to Polling.
  */
 void fabric_serdes_reset(struct hfi1_devdata *dd)
 {
-	u8 ra;
+	int ret;
 
-	if (dd->icode != ICODE_RTL_SILICON) /* only for RTL */
+	if (!fw_fabric_serdes_load)
 		return;
 
-	ra = fabric_serdes_broadcast[dd->hfi1_id];
-
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd,
+			   "Cannot acquire SBus resource to reset fabric SerDes - perhaps you should reboot\n");
+		return;
+	}
 	set_sbus_fast_mode(dd);
-	/* place SerDes in reset and disable SPICO */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
-	/* wait 100 refclk cycles @ 156.25MHz => 640ns */
-	udelay(1);
-	/* remove SerDes reset */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
-	/* turn SPICO enable on */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+
+	if (is_ax(dd)) {
+		/* A0 serdes do not work with a re-download */
+		u8 ra = fabric_serdes_broadcast[dd->hfi1_id];
+
+		/* place SerDes in reset and disable SPICO */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
+		/* wait 100 refclk cycles @ 156.25MHz => 640ns */
+		udelay(1);
+		/* remove SerDes reset */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
+		/* turn SPICO enable on */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+	} else {
+		turn_off_spicos(dd, SPICO_FABRIC);
+		/*
+		 * No need for firmware retry - what to download has already
+		 * been decided.
+		 * No need to pay attention to the load return - the only
+		 * failure is a validation failure, which has already been
+		 * checked by the initial download.
+		 */
+		(void)load_fabric_serdes_firmware(dd, &fw_fabric);
+	}
+
 	clear_sbus_fast_mode(dd);
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 }
 
 /* Access to the SBus in this routine should probably be serialized */
@@ -1120,6 +1172,9 @@ int sbus_request_slow(struct hfi1_devdata *dd,
 {
 	u64 reg, count = 0;
 
+	/* make sure fast mode is clear */
+	clear_sbus_fast_mode(dd);
+
 	sbus_request(dd, receiver_addr, data_addr, command, data_in);
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
 		  ASIC_CFG_SBUS_EXECUTE_EXECUTE_SMASK);
@@ -1177,7 +1232,7 @@ static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
 	/* step 5: download SerDes machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x0a, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: IMEM override off */
 	sbus_request(dd, ra, 0x00, WRITE_SBUS_RECEIVER, 0x00000000);
@@ -1216,7 +1271,7 @@ static int load_sbus_firmware(struct hfi1_devdata *dd,
 	/* step 5: download the SBus Master machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x14, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: set IMEM_CNTL_EN off */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000040);
@@ -1249,19 +1304,23 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd,
 	/* step 3: enable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000d40);
 	/* step 4: load firmware into SBus Master XDMEM */
-	/* NOTE: the dmem address, write_en, and wdata are all pre-packed,
-	   we only need to pick up the bytes and write them */
+	/*
+	 * NOTE: the dmem address, write_en, and wdata are all pre-packed,
+	 * we only need to pick up the bytes and write them
+	 */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 5: disable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000140);
 	/* step 6: allow SBus Spico to run */
 	sbus_request(dd, ra, 0x05, WRITE_SBUS_RECEIVER, 0x00000000);
 
-	/* steps 7-11: run RSA, if it succeeds, firmware is available to
-	   be swapped */
+	/*
+	 * steps 7-11: run RSA, if it succeeds, firmware is available to
+	 * be swapped
+	 */
 	return run_rsa(dd, "PCIe serdes", fdet->signature);
 }
 
@@ -1285,7 +1344,7 @@ static void set_serdes_broadcast(struct hfi1_devdata *dd, u8 bg1, u8 bg2,
 		 *	23:16	BROADCAST_GROUP_2 (default 0xff)
 		 */
 		sbus_request(dd, addrs[count], 0xfd, WRITE_SBUS_RECEIVER,
-				(u32)bg1 << 4 | (u32)bg2 << 16);
+			     (u32)bg1 << 4 | (u32)bg2 << 16);
 	}
 }
 
@@ -1310,8 +1369,8 @@ retry:
 
 	/* timed out */
 	dd_dev_err(dd,
-		"Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
-		(u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
+		   "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
+		   (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
 
 	if (try == 0) {
 		/* break mutex and retry */
@@ -1328,10 +1387,197 @@ void release_hw_mutex(struct hfi1_devdata *dd)
 	write_csr(dd, ASIC_CFG_MUTEX, 0);
 }
 
+/* return the given resource bit(s) as a mask for the given HFI */
+static inline u64 resource_mask(u32 hfi1_id, u32 resource)
+{
+	return ((u64)resource) << (hfi1_id ? CR_DYN_SHIFT : 0);
+}
+
+static void fail_mutex_acquire_message(struct hfi1_devdata *dd,
+				       const char *func)
+{
+	dd_dev_err(dd,
+		   "%s: hardware mutex stuck - suggest rebooting the machine\n",
+		   func);
+}
+
+/*
+ * Acquire access to a chip resource.
+ *
+ * Return 0 on success, -EBUSY if resource busy, -EIO if mutex acquire failed.
+ */
+static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, all_bits, my_bit;
+	int ret;
+
+	if (resource & CR_DYN_MASK) {
+		/* a dynamic resource is in use if either HFI has set the bit */
+		all_bits = resource_mask(0, resource) |
+						resource_mask(1, resource);
+		my_bit = resource_mask(dd->hfi1_id, resource);
+	} else {
+		/* non-dynamic resources are not split between HFIs */
+		all_bits = resource;
+		my_bit = resource;
+	}
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	ret = acquire_hw_mutex(dd);
+	if (ret) {
+		fail_mutex_acquire_message(dd, __func__);
+		ret = -EIO;
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if (scratch0 & all_bits) {
+		ret = -EBUSY;
+	} else {
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0 | my_bit);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+	return ret;
+}
+
+/*
+ * Acquire access to a chip resource, wait up to mswait milliseconds for
+ * the resource to become available.
+ *
+ * Return 0 on success, -EBUSY if busy (even after wait), -EIO if mutex
+ * acquire failed.
+ */
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait)
+{
+	unsigned long timeout;
+	int ret;
+
+	timeout = jiffies + msecs_to_jiffies(mswait);
+	while (1) {
+		ret = __acquire_chip_resource(dd, resource);
+		if (ret != -EBUSY)
+			return ret;
+		/* resource is busy, check our timeout */
+		if (time_after_eq(jiffies, timeout))
+			return -EBUSY;
+		usleep_range(80, 120);	/* arbitrary delay */
+	}
+}
+
+/*
+ * Release access to a chip resource
+ */
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, bit;
+
+	/* only dynamic resources should ever be cleared */
+	if (!(resource & CR_DYN_MASK)) {
+		dd_dev_err(dd, "%s: invalid resource 0x%x\n", __func__,
+			   resource);
+		return;
+	}
+	bit = resource_mask(dd->hfi1_id, resource);
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, __func__);
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) != 0) {
+		scratch0 &= ~bit;
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	} else {
+		dd_dev_warn(dd, "%s: id %d, resource 0x%x: bit not set\n",
+			    __func__, dd->hfi1_id, resource);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+/*
+ * Return true if resource is set, false otherwise.  Print a warning
+ * if not set and a function is supplied.
+ */
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func)
+{
+	u64 scratch0, bit;
+
+	if (resource & CR_DYN_MASK)
+		bit = resource_mask(dd->hfi1_id, resource);
+	else
+		bit = resource;
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) == 0) {
+		if (func)
+			dd_dev_warn(dd,
+				    "%s: id %d, resource 0x%x, not acquired!\n",
+				    func, dd->hfi1_id, resource);
+		return false;
+	}
+	return true;
+}
+
+static void clear_chip_resources(struct hfi1_devdata *dd, const char *func)
+{
+	u64 scratch0;
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, func);
+		goto done;
+	}
+
+	/* clear all dynamic access bits for this HFI */
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~resource_mask(dd->hfi1_id, CR_DYN_MASK);
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+void init_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
+void finish_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
 void set_sbus_fast_mode(struct hfi1_devdata *dd)
 {
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
-				ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
+		  ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
 }
 
 void clear_sbus_fast_mode(struct hfi1_devdata *dd)
@@ -1354,23 +1600,23 @@ int load_firmware(struct hfi1_devdata *dd)
 	int ret;
 
 	if (fw_fabric_serdes_load) {
-		ret = acquire_hw_mutex(dd);
+		ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
 		if (ret)
 			return ret;
 
 		set_sbus_fast_mode(dd);
 
 		set_serdes_broadcast(dd, all_fabric_serdes_broadcast,
-				fabric_serdes_broadcast[dd->hfi1_id],
-				fabric_serdes_addrs[dd->hfi1_id],
-				NUM_FABRIC_SERDES);
+				     fabric_serdes_broadcast[dd->hfi1_id],
+				     fabric_serdes_addrs[dd->hfi1_id],
+				     NUM_FABRIC_SERDES);
 		turn_off_spicos(dd, SPICO_FABRIC);
 		do {
 			ret = load_fabric_serdes_firmware(dd, &fw_fabric);
 		} while (retry_firmware(dd, ret));
 
 		clear_sbus_fast_mode(dd);
-		release_hw_mutex(dd);
+		release_chip_resource(dd, CR_SBUS);
 		if (ret)
 			return ret;
 	}
@@ -1419,18 +1665,57 @@ int hfi1_firmware_init(struct hfi1_devdata *dd)
 	return obtain_firmware(dd);
 }
 
+/*
+ * This function is a helper function for parse_platform_config(...) and
+ * does not check for validity of the platform configuration cache
+ * (because we know it is invalid as we are building up the cache).
+ * As such, this should not be called from anywhere other than
+ * parse_platform_config
+ */
+static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
+{
+	u32 meta_ver, meta_ver_meta, ver_start, ver_len, mask;
+	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+
+	if (!system_table)
+		return -EINVAL;
+
+	meta_ver_meta =
+	*(pcfgcache->config_tables[PLATFORM_CONFIG_SYSTEM_TABLE].table_metadata
+	+ SYSTEM_TABLE_META_VERSION);
+
+	mask = ((1 << METADATA_TABLE_FIELD_START_LEN_BITS) - 1);
+	ver_start = meta_ver_meta & mask;
+
+	meta_ver_meta >>= METADATA_TABLE_FIELD_LEN_SHIFT;
+
+	mask = ((1 << METADATA_TABLE_FIELD_LEN_LEN_BITS) - 1);
+	ver_len = meta_ver_meta & mask;
+
+	ver_start /= 8;
+	meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1);
+
+	if (meta_ver < 5) {
+		dd_dev_info(
+			dd, "%s:Please update platform config\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int parse_platform_config(struct hfi1_devdata *dd)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *ptr = NULL;
-	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0;
+	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0;
 	u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
+	int ret = -EINVAL; /* assume failure */
 
-	if (platform_config == NULL) {
+	if (!dd->platform_config.data) {
 		dd_dev_info(dd, "%s: Missing config file\n", __func__);
 		goto bail;
 	}
-	ptr = (u32 *)platform_config->data;
+	ptr = (u32 *)dd->platform_config.data;
 
 	magic_num = *ptr;
 	ptr++;
@@ -1439,12 +1724,32 @@ int parse_platform_config(struct hfi1_devdata *dd)
 		goto bail;
 	}
 
-	while (ptr < (u32 *)(platform_config->data + platform_config->size)) {
+	/* Field is file size in DWORDs */
+	file_length = (*ptr) * 4;
+	ptr++;
+
+	if (file_length > dd->platform_config.size) {
+		dd_dev_info(dd, "%s:File claims to be larger than read size\n",
+			    __func__);
+		goto bail;
+	} else if (file_length < dd->platform_config.size) {
+		dd_dev_info(dd,
+			    "%s:File claims to be smaller than read size, continuing\n",
+			    __func__);
+	}
+	/* exactly equal, perfection */
+
+	/*
+	 * In both cases where we proceed, using the self-reported file length
+	 * is the safer option
+	 */
+	while (ptr < (u32 *)(dd->platform_config.data + file_length)) {
 		header1 = *ptr;
 		header2 = *(ptr + 1);
 		if (header1 != ~header2) {
 			dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr - (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 
@@ -1467,6 +1772,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
 			case PLATFORM_CONFIG_SYSTEM_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
 									1;
+				ret = check_meta_version(dd, ptr);
+				if (ret)
+					goto bail;
 				break;
 			case PLATFORM_CONFIG_PORT_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
@@ -1484,9 +1792,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
 				break;
 			default:
 				dd_dev_info(dd,
-				      "%s: Unknown data table %d, offset %ld\n",
-					__func__, table_type,
-				       (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown data table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr - (u32 *)
+					     dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table = ptr;
@@ -1507,9 +1816,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
 				break;
 			default:
 				dd_dev_info(dd,
-				  "%s: Unknown metadata table %d, offset %ld\n",
-				  __func__, table_type,
-				  (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown meta table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr -
+					     (u32 *)dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table_metadata =
@@ -1518,14 +1828,16 @@ int parse_platform_config(struct hfi1_devdata *dd)
 
 		/* Calculate and check table crc */
 		crc = crc32_le(~(u32)0, (unsigned char const *)ptr,
-				(table_length_dwords * 4));
+			       (table_length_dwords * 4));
 		crc ^= ~(u32)0;
 
 		/* Jump the table */
 		ptr += table_length_dwords;
 		if (crc != *ptr) {
 			dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr -
+					       (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 		/* Jump the CRC DWORD */
@@ -1536,11 +1848,12 @@ int parse_platform_config(struct hfi1_devdata *dd)
 	return 0;
 bail:
 	memset(pcfgcache, 0, sizeof(struct platform_config_cache));
-	return -EINVAL;
+	return ret;
 }
 
 static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
-		int field, u32 *field_len_bits, u32 *field_start_bits)
+					  int field, u32 *field_len_bits,
+					  u32 *field_start_bits)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *src_ptr = NULL;
@@ -1600,8 +1913,9 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
  * @len: length of memory pointed by @data in bytes.
  */
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len)
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len)
 {
 	int ret = 0, wlen = 0, seek = 0;
 	u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL;
@@ -1613,7 +1927,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 		return -EINVAL;
 
 	ret = get_platform_fw_field_metadata(dd, table_type, field_index,
-					&field_len_bits, &field_start_bits);
+					     &field_len_bits,
+					     &field_start_bits);
 	if (ret)
 		return -EINVAL;
 
@@ -1629,19 +1944,21 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 			if (len < field_len_bits)
 				return -EINVAL;
 
-			seek = field_start_bits/8;
-			wlen = field_len_bits/8;
+			seek = field_start_bits / 8;
+			wlen = field_len_bits / 8;
 
 			src_ptr = (u32 *)((u8 *)src_ptr + seek);
 
-			/* We expect the field to be byte aligned and whole byte
-			 * lengths if we are here */
+			/*
+			 * We expect the field to be byte aligned and whole byte
+			 * lengths if we are here
+			 */
 			memcpy(data, src_ptr, wlen);
 			return 0;
 		}
 		break;
 	case PLATFORM_CONFIG_PORT_TABLE:
-		/* Port table is 4 DWORDS in META_VERSION 0 */
+		/* Port table is 4 DWORDS */
 		src_ptr = dd->hfi1_id ?
 			pcfgcache->config_tables[table_type].table + 4 :
 			pcfgcache->config_tables[table_type].table;
@@ -1669,7 +1986,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 	if (!src_ptr || len < field_len_bits)
 		return -EINVAL;
 
-	src_ptr += (field_start_bits/32);
+	src_ptr += (field_start_bits / 32);
 	*data = (*src_ptr >> (field_start_bits % 32)) &
 			((1 << field_len_bits) - 1);
 
@@ -1680,7 +1997,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
  * Download the firmware needed for the Gen3 PCIe SerDes.  An update
  * to the SBus firmware is needed before updating the PCIe firmware.
  *
- * Note: caller must be holding the HW mutex.
+ * Note: caller must be holding the SBus resource.
  */
 int load_pcie_firmware(struct hfi1_devdata *dd)
 {
@@ -1701,9 +2018,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd)
 	if (fw_pcie_serdes_load) {
 		dd_dev_info(dd, "Setting PCIe SerDes broadcast\n");
 		set_serdes_broadcast(dd, all_pcie_serdes_broadcast,
-					pcie_serdes_broadcast[dd->hfi1_id],
-					pcie_serdes_addrs[dd->hfi1_id],
-					NUM_PCIE_SERDES);
+				     pcie_serdes_broadcast[dd->hfi1_id],
+				     pcie_serdes_addrs[dd->hfi1_id],
+				     NUM_PCIE_SERDES);
 		do {
 			ret = load_pcie_serdes_firmware(dd, &fw_pcie);
 		} while (retry_firmware(dd, ret));
@@ -1724,9 +2041,9 @@ void read_guid(struct hfi1_devdata *dd)
 {
 	/* Take the DC out of reset to get a valid GUID value */
 	write_csr(dd, CCE_DC_CTRL, 0);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID);
 	dd_dev_info(dd, "GUID %llx",
-		(unsigned long long)dd->base_guid);
+		    (unsigned long long)dd->base_guid);
 }
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index 2611bb2e764d..16cbdc4073e0 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -65,6 +62,7 @@
 #include <linux/cdev.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 #include "chip_registers.h"
 #include "common.h"
@@ -73,7 +71,8 @@
 #include "chip.h"
 #include "mad.h"
 #include "qsfp.h"
-#include "platform_config.h"
+#include "platform.h"
+#include "affinity.h"
 
 /* bumped 1 from s/w major version of TrueScale */
 #define HFI1_CHIP_VERS_MAJ 3U
@@ -98,6 +97,8 @@ extern unsigned long hfi1_cap_mask;
 #define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
 #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
 			HFI1_CAP_MISC_MASK)
+/* Offline Disabled Reason is 4-bits */
+#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
 
 /*
  * Control context is always 0 and handles the error packets.
@@ -177,6 +178,11 @@ struct ctxt_eager_bufs {
 	} *rcvtids;
 };
 
+struct exp_tid_set {
+	struct list_head list;
+	u32 count;
+};
+
 struct hfi1_ctxtdata {
 	/* shadow the ctxt's RcvCtrl register */
 	u64 rcvctrl;
@@ -233,20 +239,13 @@ struct hfi1_ctxtdata {
 	u32 expected_count;
 	/* index of first expected TID entry. */
 	u32 expected_base;
-	/* cursor into the exp group sets */
-	atomic_t tidcursor;
-	/* number of exp TID groups assigned to the ctxt */
-	u16 numtidgroups;
-	/* size of exp TID group fields in tidusemap */
-	u16 tidmapcnt;
-	/* exp TID group usage bitfield array */
-	unsigned long *tidusemap;
-	/* pinned pages for exp sends, allocated at open */
-	struct page **tid_pg_list;
-	/* dma handles for exp tid pages */
-	dma_addr_t *physshadow;
+
+	struct exp_tid_set tid_group_list;
+	struct exp_tid_set tid_used_list;
+	struct exp_tid_set tid_full_list;
+
 	/* lock protecting all Expected TID data */
-	spinlock_t exp_lock;
+	struct mutex exp_lock;
 	/* number of pio bufs for this ctxt (all procs, if shared) */
 	u32 piocnt;
 	/* first pio buffer for this ctxt */
@@ -311,8 +310,24 @@ struct hfi1_ctxtdata {
 	 */
 	struct task_struct *progress;
 	struct list_head sdma_queues;
+	/* protect sdma queues */
 	spinlock_t sdma_qlock;
 
+	/* Is ASPM interrupt supported for this context */
+	bool aspm_intr_supported;
+	/* ASPM state (enabled/disabled) for this context */
+	bool aspm_enabled;
+	/* Timer for re-enabling ASPM if interrupt activity quietens down */
+	struct timer_list aspm_timer;
+	/* Lock to serialize between intr, timer intr and user threads */
+	spinlock_t aspm_lock;
+	/* Is ASPM processing enabled for this context (in intr context) */
+	bool aspm_intr_enable;
+	/* Last interrupt timestamp */
+	ktime_t aspm_ts_last_intr;
+	/* Last timestamp at which we scheduled a timer for this context */
+	ktime_t aspm_ts_timer_sched;
+
 	/*
 	 * The interrupt handler for a particular receive context can vary
 	 * throughout it's lifetime. This is not a lock protected data member so
@@ -335,7 +350,7 @@ struct hfi1_packet {
 	void *hdr;
 	struct hfi1_ctxtdata *rcd;
 	__le32 *rhf_addr;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct hfi1_other_headers *ohdr;
 	u64 rhf;
 	u32 maxcnt;
@@ -363,6 +378,7 @@ struct hfi1_snoop_data {
 	int mode_flag;
 	struct cdev cdev;
 	struct device *class_dev;
+	/* protect snoop data */
 	spinlock_t snoop_lock;
 	struct list_head queue;
 	wait_queue_head_t waitq;
@@ -375,7 +391,7 @@ struct hfi1_snoop_data {
 #define HFI1_PORT_SNOOP_MODE     1U
 #define HFI1_PORT_CAPTURE_MODE   2U
 
-struct hfi1_sge_state;
+struct rvt_sge_state;
 
 /*
  * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
@@ -424,17 +440,17 @@ struct hfi1_sge_state;
 #define __HLS_GOING_OFFLINE_BP  9
 #define __HLS_LINK_COOLDOWN_BP 10
 
-#define HLS_UP_INIT	  (1 << __HLS_UP_INIT_BP)
-#define HLS_UP_ARMED	  (1 << __HLS_UP_ARMED_BP)
-#define HLS_UP_ACTIVE	  (1 << __HLS_UP_ACTIVE_BP)
-#define HLS_DN_DOWNDEF	  (1 << __HLS_DN_DOWNDEF_BP) /* link down default */
-#define HLS_DN_POLL	  (1 << __HLS_DN_POLL_BP)
-#define HLS_DN_DISABLE	  (1 << __HLS_DN_DISABLE_BP)
-#define HLS_DN_OFFLINE	  (1 << __HLS_DN_OFFLINE_BP)
-#define HLS_VERIFY_CAP	  (1 << __HLS_VERIFY_CAP_BP)
-#define HLS_GOING_UP	  (1 << __HLS_GOING_UP_BP)
-#define HLS_GOING_OFFLINE (1 << __HLS_GOING_OFFLINE_BP)
-#define HLS_LINK_COOLDOWN (1 << __HLS_LINK_COOLDOWN_BP)
+#define HLS_UP_INIT	  BIT(__HLS_UP_INIT_BP)
+#define HLS_UP_ARMED	  BIT(__HLS_UP_ARMED_BP)
+#define HLS_UP_ACTIVE	  BIT(__HLS_UP_ACTIVE_BP)
+#define HLS_DN_DOWNDEF	  BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
+#define HLS_DN_POLL	  BIT(__HLS_DN_POLL_BP)
+#define HLS_DN_DISABLE	  BIT(__HLS_DN_DISABLE_BP)
+#define HLS_DN_OFFLINE	  BIT(__HLS_DN_OFFLINE_BP)
+#define HLS_VERIFY_CAP	  BIT(__HLS_VERIFY_CAP_BP)
+#define HLS_GOING_UP	  BIT(__HLS_GOING_UP_BP)
+#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
+#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
 
 #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
 
@@ -490,6 +506,7 @@ struct hfi1_sge_state;
 #define CNTR_DISABLED		0x2 /* Disable this counter */
 #define CNTR_32BIT		0x4 /* Simulate 64 bits for this counter */
 #define CNTR_VL			0x8 /* Per VL counter */
+#define CNTR_SDMA              0x10
 #define CNTR_INVALID_VL		-1  /* Specifies invalid VL */
 #define CNTR_MODE_W		0x0
 #define CNTR_MODE_R		0x1
@@ -512,10 +529,11 @@ static inline void incr_cntr32(u32 *cntr)
 
 #define MAX_NAME_SIZE 64
 struct hfi1_msix_entry {
+	enum irq_type type;
 	struct msix_entry msix;
 	void *arg;
 	char name[MAX_NAME_SIZE];
-	cpumask_var_t mask;
+	cpumask_t mask;
 };
 
 /* per-SL CCA information */
@@ -542,6 +560,7 @@ enum {
 };
 
 struct vl_arb_cache {
+	/* protect vl arb cache */
 	spinlock_t lock;
 	struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
 };
@@ -561,7 +580,8 @@ struct hfi1_pportdata {
 	struct kobject sl2sc_kobj;
 	struct kobject vl2mtu_kobj;
 
-	/* QSFP support */
+	/* PHY support */
+	u32 port_type;
 	struct qsfp_data qsfp_info;
 
 	/* GUID for this interface, in host order */
@@ -586,6 +606,7 @@ struct hfi1_pportdata {
 	struct work_struct link_vc_work;
 	struct work_struct link_up_work;
 	struct work_struct link_down_work;
+	struct work_struct dc_host_req_work;
 	struct work_struct sma_message_work;
 	struct work_struct freeze_work;
 	struct work_struct link_downgrade_work;
@@ -623,6 +644,7 @@ struct hfi1_pportdata {
 	u16 link_speed_active;
 	u8 vls_supported;
 	u8 vls_operational;
+	u8 actual_vls_operational;
 	/* LID mask control */
 	u8 lmc;
 	/* Rx Polarity inversion (compensate for ~tx on partner) */
@@ -642,19 +664,23 @@ struct hfi1_pportdata {
 	u8 link_enabled;	/* link enabled? */
 	u8 linkinit_reason;
 	u8 local_tx_rate;	/* rate given to 8051 firmware */
+	u8 last_pstate;		/* info only */
 
 	/* placeholders for IB MAD packet settings */
 	u8 overrun_threshold;
 	u8 phy_error_threshold;
 
-	/* used to override LED behavior */
-	u8 led_override;  /* Substituted for normal value, if non-zero */
-	u16 led_override_timeoff; /* delta to next timer event */
-	u8 led_override_vals[2]; /* Alternates per blink-frame */
-	u8 led_override_phase; /* Just counts, LSB picks from vals[] */
+	/* Used to override LED behavior for things like maintenance beaconing*/
+	/*
+	 * Alternates per phase of blink
+	 * [0] holds LED off duration, [1] holds LED on duration
+	 */
+	unsigned long led_override_vals[2];
+	u8 led_override_phase; /* LSB picks from vals[] */
 	atomic_t led_override_timer_active;
 	/* Used to flash LEDs in override mode */
 	struct timer_list led_override_timer;
+
 	u32 sm_trap_qp;
 	u32 sa_qp;
 
@@ -689,10 +715,12 @@ struct hfi1_pportdata {
 	/* CA's max number of 64 entry units in the congestion control table */
 	u8 cc_max_table_entries;
 
-	/* begin congestion log related entries
-	 * cc_log_lock protects all congestion log related data */
+	/*
+	 * begin congestion log related entries
+	 * cc_log_lock protects all congestion log related data
+	 */
 	spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	u16 threshold_event_counter;
 	struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
 	int cc_log_idx; /* index for logging events */
@@ -705,8 +733,9 @@ struct hfi1_pportdata {
 	u64 *cntrs;
 	/* port relative synthetic counter buffer */
 	u64 *scntrs;
-	/* we synthesize port_xmit_discards from several egress errors */
+	/* port_xmit_discards are synthesized from different egress errors */
 	u64 port_xmit_discards;
+	u64 port_xmit_discards_vl[C_VL_COUNT];
 	u64 port_xmit_constraint_errors;
 	u64 port_rcv_constraint_errors;
 	/* count of 'link_err' interrupts from DC */
@@ -728,6 +757,9 @@ struct hfi1_pportdata {
 	u8 remote_link_down_reason;
 	/* Error events that will cause a port bounce. */
 	u32 port_error_action;
+	struct work_struct linkstate_active_work;
+	/* Does this port need to prescan for FECNs */
+	bool cc_prescan;
 };
 
 typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
@@ -773,6 +805,12 @@ struct hfi1_temp {
 	u8 triggers;      /* temperature triggers */
 };
 
+/* common data between shared ASIC HFIs */
+struct hfi1_asic_data {
+	struct hfi1_devdata *dds[2];	/* back pointers */
+	struct mutex asic_resource_mutex;
+};
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -782,6 +820,7 @@ struct sdma_vl_map;
 #define BOARD_VERS_MAX 96 /* how long the version string can be */
 #define SERIAL_MAX 16 /* length of the serial number */
 
+typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
 struct hfi1_devdata {
 	struct hfi1_ibdev verbs_dev;     /* must be first */
 	struct list_head list;
@@ -811,6 +850,12 @@ struct hfi1_devdata {
 	spinlock_t sc_lock;
 	/* Per VL data. Enough for all VLs but not all elements are set/used. */
 	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
+	/* lock for pio_map */
+	spinlock_t pio_map_lock;
+	/* array of kernel send contexts */
+	struct send_context **kernel_send_context;
+	/* array of vl maps */
+	struct pio_vl_map __rcu *pio_map;
 	/* seqlock for sc2vl */
 	seqlock_t sc2vl_lock;
 	u64 sc2vl[4];
@@ -841,6 +886,8 @@ struct hfi1_devdata {
 	wait_queue_head_t		  sdma_unfreeze_wq;
 	atomic_t			  sdma_unfreeze_count;
 
+	/* common data between shared ASIC HFIs in this OS */
+	struct hfi1_asic_data *asic_data;
 
 	/* hfi1_pportdata, points to array of (physical) port-specific
 	 * data structs, indexed by pidx (0..n-1)
@@ -873,10 +920,11 @@ struct hfi1_devdata {
 	/* reset value */
 	u64 z_int_counter;
 	u64 z_rcv_limit;
+	u64 z_send_schedule;
 	/* percpu int_counter */
 	u64 __percpu *int_counter;
 	u64 __percpu *rcv_limit;
-
+	u64 __percpu *send_schedule;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
 	/* number of pio send contexts in use by the driver */
@@ -885,6 +933,8 @@ struct hfi1_devdata {
 	 * number of ctxts available for PSM open
 	 */
 	u32 freectxts;
+	/* total number of available user/PSM contexts */
+	u32 num_user_contexts;
 	/* base receive interrupt timeout, in CSR units */
 	u32 rcv_intr_timeout_csr;
 
@@ -996,9 +1046,8 @@ struct hfi1_devdata {
 	u16 irev;	/* implementation revision */
 	u16 dc8051_ver; /* 8051 firmware version */
 
+	struct platform_config platform_config;
 	struct platform_config_cache pcfg_cache;
-	/* control high-level access to qsfp */
-	struct mutex qsfp_i2c_mutex;
 
 	struct diag_client *diag_client;
 	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
@@ -1008,8 +1057,6 @@ struct hfi1_devdata {
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors deferred to tasklet */
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 	/* MSI-X information */
 	struct hfi1_msix_entry *msix_entries;
@@ -1090,10 +1137,8 @@ struct hfi1_devdata {
 	 * Handlers for outgoing data so that snoop/capture does not
 	 * have to have its hooks in the send path
 	 */
-	int (*process_pio_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
-	int (*process_dma_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
+	send_routine process_pio_send;
+	send_routine process_dma_send;
 	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 				u64 pbc, const void *from, size_t count);
 
@@ -1105,7 +1150,6 @@ struct hfi1_devdata {
 	struct timer_list rcverr_timer;
 	u32 rcv_ovfl_cnt;
 
-	int assigned_node_id;
 	wait_queue_head_t event_queue;
 
 	/* Save the enabled LCB error bits */
@@ -1115,6 +1159,16 @@ struct hfi1_devdata {
 	/* receive context tail dummy address */
 	__le64 *rcvhdrtail_dummy_kvaddr;
 	dma_addr_t rcvhdrtail_dummy_physaddr;
+
+	bool eprom_available;	/* true if EPROM is available for this device */
+	bool aspm_supported;	/* Does HW support ASPM */
+	bool aspm_enabled;	/* ASPM state: enabled/disabled */
+	/* Serialize ASPM enable/disable between multiple verbs contexts */
+	spinlock_t aspm_lock;
+	/* Number of verbs contexts which have disabled ASPM */
+	atomic_t aspm_disabled_cnt;
+
+	struct hfi1_affinity *affinity;
 };
 
 /* 8051 firmware version helper */
@@ -1125,6 +1179,9 @@ struct hfi1_devdata {
 #define PT_EAGER    1
 #define PT_INVALID  2
 
+struct tid_rb_node;
+struct mmu_rb_node;
+
 /* Private data for file operations */
 struct hfi1_filedata {
 	struct hfi1_ctxtdata *uctxt;
@@ -1133,6 +1190,16 @@ struct hfi1_filedata {
 	struct hfi1_user_sdma_pkt_q *pq;
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
+	u32 tid_n_pinned;
+	struct rb_root tid_rb_root;
+	struct tid_rb_node **entry_to_rb;
+	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
+	u32 tid_limit;
+	u32 tid_used;
+	u32 *invalid_tids;
+	u32 invalid_tid_idx;
+	/* protect invalid_tids array and invalid_tid_idx */
+	spinlock_t invalid_lock;
 };
 
 extern struct list_head hfi1_dev_list;
@@ -1156,7 +1223,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
 int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
 int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
 int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
 void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
 			 struct hfi1_devdata *, u8, u8);
 void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
@@ -1164,6 +1231,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
 int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+void set_all_slowpath(struct hfi1_devdata *dd);
 
 /* receive packet handler dispositions */
 #define RCV_PKT_OK      0x0 /* keep going */
@@ -1184,6 +1252,15 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
 	return ppd->lstate; /* use the cached value */
 }
 
+void receive_interrupt_work(struct work_struct *work);
+
+/* extract service channel from header and rhf */
+static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+{
+	return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
+	       ((!!(rhf & RHF_DC_INFO_MASK)) << 4);
+}
+
 static inline u16 generate_jkey(kuid_t uid)
 {
 	return from_kuid(current_user_ns(), uid) & 0xffff;
@@ -1253,7 +1330,7 @@ static inline u32 egress_cycles(u32 len, u32 rate)
 void set_link_ipg(struct hfi1_pportdata *ppd);
 void process_becn(struct hfi1_pportdata *ppd, u8 sl,  u16 rlid, u32 lqpn,
 		  u32 rqpn, u8 svc_type);
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh);
 
@@ -1424,6 +1501,7 @@ static inline int valid_ib_mtu(unsigned int mtu)
 		mtu == 1024 || mtu == 2048 ||
 		mtu == 4096;
 }
+
 static inline int valid_opa_max_mtu(unsigned int mtu)
 {
 	return mtu >= 2048 &&
@@ -1445,12 +1523,13 @@ void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
 int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
 void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 			   u64 pbc, const void *from, size_t count);
+int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
 {
@@ -1472,6 +1551,11 @@ static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp)
 	return container_of(ibp, struct hfi1_pportdata, ibport_data);
 }
 
+static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
+{
+	return container_of(rdi, struct hfi1_ibdev, rdi);
+}
+
 static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1515,12 +1599,10 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
 #define HFI1_HAS_SDMA_TIMEOUT  0x8
 #define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
 #define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
-#define HFI1_DO_INIT_ASIC      0x100  /* This device will init the ASIC */
 
 /* IB dword length mask in PBC (lower 11 bits); same for all chips */
 #define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
 
-
 /* ctxt_flag bit offsets */
 		/* context has been setup */
 #define HFI1_CTXT_SETUP_DONE 1
@@ -1538,14 +1620,10 @@ void hfi1_free_devdata(struct hfi1_devdata *);
 void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define HFI1_LED_LOG 2  /* Logical (link) YELLOW LED */
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
+/* LED beaconing functions */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff);
+void shutdown_led_override(struct hfi1_pportdata *ppd);
 
 #define HFI1_CREDIT_RETURN_RATE (100)
 
@@ -1587,12 +1665,13 @@ void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
  */
 #define DEFAULT_RCVHDR_ENTSIZE 32
 
+bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
 int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct page **, size_t, bool);
+void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
 
 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 {
-	*((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
+	*((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
 }
 
 static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1601,7 +1680,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 	 * volatile because it's a DMA target from the chip, routine is
 	 * inlined, and don't want register caching or reordering.
 	 */
-	return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+	return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
 }
 
 /*
@@ -1633,12 +1712,13 @@ void restore_pci_variables(struct hfi1_devdata *dd);
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len);
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len);
 
-dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long,
-			 size_t, int);
 const char *get_unit_name(int unit);
+const char *get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1659,7 +1739,7 @@ int process_receive_invalid(struct hfi1_packet *packet);
 
 extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
 
-void update_sge(struct hfi1_sge_state *ss, u32 length);
+void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
 extern unsigned int hfi1_max_mtu;
@@ -1667,7 +1747,7 @@ extern unsigned int hfi1_cu;
 extern unsigned int user_credit_return_threshold;
 extern int num_user_contexts;
 extern unsigned n_krcvqs;
-extern u8 krcvqs[];
+extern uint krcvqs[];
 extern int krcvqsset;
 extern uint kdeth_qp;
 extern uint loopback;
@@ -1730,7 +1810,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
 		base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
 
 	if (is_ax(dd))
-		/* turn off send-side job key checks - A0 erratum */
+		/* turn off send-side job key checks - A0 */
 		return base_sc_integrity &
 		       ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
 	return base_sc_integrity;
@@ -1757,7 +1837,7 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 	| SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
 	if (is_ax(dd))
-		/* turn off send-side job key checks - A0 erratum */
+		/* turn off send-side job key checks - A0 */
 		return base_sdma_integrity &
 		       ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
 	return base_sdma_integrity;
@@ -1794,6 +1874,10 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 	dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
 			get_unit_name((dd)->unit), ##__VA_ARGS__)
 
+#define dd_dev_dbg(dd, fmt, ...) \
+	dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
+		get_unit_name((dd)->unit), ##__VA_ARGS__)
+
 #define hfi1_dev_porterr(dd, port, fmt, ...) \
 	dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
 			get_unit_name((dd)->unit), (dd)->unit, (port), \
@@ -1825,13 +1909,14 @@ static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd)
 
 	dd->z_int_counter = get_all_cpu_total(dd->int_counter);
 	dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
+	dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.z_rc_acks =
-			get_all_cpu_total(ppd->ibport_data.rc_acks);
-		ppd->ibport_data.z_rc_qacks =
-			get_all_cpu_total(ppd->ibport_data.rc_qacks);
+		ppd->ibport_data.rvp.z_rc_acks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
+		ppd->ibport_data.rvp.z_rc_qacks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
 	}
 }
 
@@ -1844,6 +1929,18 @@ static inline void setextled(struct hfi1_devdata *dd, u32 on)
 		write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
 }
 
+/* return the i2c resource given the target */
+static inline u32 i2c_target(u32 target)
+{
+	return target ? CR_I2C2 : CR_I2C1;
+}
+
+/* return the i2c chain chip resource that this HFI uses for QSFP */
+static inline u32 qsfp_resource(struct hfi1_devdata *dd)
+{
+	return i2c_target(dd->hfi1_id);
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #endif                          /* _HFI1_KERNEL_H */
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 4dd8051aba7e..cfcdc16b41c3 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <rdma/rdma_vt.h>
 
 #include "hfi.h"
 #include "device.h"
@@ -65,6 +63,7 @@
 #include "sdma.h"
 #include "debugfs.h"
 #include "verbs.h"
+#include "aspm.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -75,6 +74,7 @@
 #define HFI1_MIN_USER_CTXT_BUFCNT 7
 
 #define HFI1_MIN_HDRQ_EGRBUF_CNT 2
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
@@ -87,9 +87,9 @@ module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
 MODULE_PARM_DESC(
 	num_user_contexts, "Set max number of user contexts to use");
 
-u8 krcvqs[RXE_NUM_DATA_VL];
+uint krcvqs[RXE_NUM_DATA_VL];
 int krcvqsset;
-module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
+module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
 MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
@@ -128,16 +128,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 {
 	unsigned i;
 	int ret;
-	int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
 	/* Control context has to be always 0 */
 	BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
 
-	if (local_node_id < 0)
-		local_node_id = numa_node_id();
-	dd->assigned_node_id = local_node_id;
-
-	dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL);
+	dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+			       GFP_KERNEL, dd->node);
 	if (!dd->rcd)
 		goto nomem;
 
@@ -147,10 +143,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		struct hfi1_ctxtdata *rcd;
 
 		ppd = dd->pport + (i % dd->num_pports);
-		rcd = hfi1_create_ctxtdata(ppd, i);
+		rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
 		if (!rcd) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel receive context, failing\n");
+				   "Unable to allocate kernel receive context, failing\n");
 			goto nomem;
 		}
 		/*
@@ -171,7 +167,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
 		if (!rcd->sc) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel send context, failing\n");
+				   "Unable to allocate kernel send context, failing\n");
 			dd->rcd[rcd->ctxt] = NULL;
 			hfi1_free_ctxtdata(dd, rcd);
 			goto nomem;
@@ -189,6 +185,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		}
 	}
 
+	/*
+	 * Initialize aspm, to be done after gen3 transition and setting up
+	 * contexts and before enabling interrupts
+	 */
+	aspm_init(dd);
+
 	return 0;
 nomem:
 	ret = -ENOMEM;
@@ -201,7 +203,8 @@ bail:
 /*
  * Common code for user and kernel context setup.
  */
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+					   int numa)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	struct hfi1_ctxtdata *rcd;
@@ -224,10 +227,10 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
 		rcd->cnt = 1;
 		rcd->ctxt = ctxt;
 		dd->rcd[ctxt] = rcd;
-		rcd->numa_id = numa_node_id();
+		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
 
-		spin_lock_init(&rcd->exp_lock);
+		mutex_init(&rcd->exp_lock);
 
 		/*
 		 * Calculate the context's RcvArray entry starting point.
@@ -260,7 +263,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
 		/* Validate and initialize Rcv Hdr Q variables */
 		if (rcvhdrcnt % HDRQ_INCREMENT) {
 			dd_dev_err(dd,
-				   "ctxt%u: header queue count %d must be divisible by %d\n",
+				   "ctxt%u: header queue count %d must be divisible by %lu\n",
 				   rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
 			goto bail;
 		}
@@ -332,7 +335,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
 	}
 	return rcd;
 bail:
-	kfree(rcd->opstats);
 	kfree(rcd->egrbufs.rcvtids);
 	kfree(rcd->egrbufs.buffers);
 	kfree(rcd);
@@ -380,7 +382,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		/*
 		 * This should _never_ happen - rcu_read_lock() is held,
 		 * and set_link_ipg() should not be called if cc_state
@@ -432,7 +434,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return HRTIMER_NORESTART;
 	}
@@ -494,14 +496,19 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
 	INIT_WORK(&ppd->link_up_work, handle_link_up);
 	INIT_WORK(&ppd->link_down_work, handle_link_down);
+	INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
 	INIT_WORK(&ppd->freeze_work, handle_freeze);
 	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
 	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
 	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
+	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
+
 	mutex_init(&ppd->hls_lock);
 	spin_lock_init(&ppd->sdma_alllock);
 	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
+	ppd->qsfp_info.ppd = ppd;
 	ppd->sm_trap_qp = 0x0;
 	ppd->sa_qp = 0x1;
 
@@ -583,8 +590,8 @@ static void enable_chip(struct hfi1_devdata *dd)
 	 * Enable kernel ctxts' receive and receive interrupt.
 	 * Other ctxts done as user opens and initializes them.
 	 */
-	rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 	for (i = 0; i < dd->first_user_ctxt; ++i) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
@@ -730,14 +737,14 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 			lastfail = hfi1_setup_eagerbufs(rcd);
 		if (lastfail)
 			dd_dev_err(dd,
-				"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
 	}
 	if (lastfail)
 		ret = lastfail;
 
 	/* Allocate enough memory for user event notification. */
-	len = ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
-		    sizeof(*dd->events), PAGE_SIZE);
+	len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
+			 sizeof(*dd->events));
 	dd->events = vmalloc_user(len);
 	if (!dd->events)
 		dd_dev_err(dd, "Failed to allocate user events page\n");
@@ -763,7 +770,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 	/* enable chip even if we have an error, so we can debug cause */
 	enable_chip(dd);
 
-	ret = hfi1_cq_init(dd);
 done:
 	/*
 	 * Set status even if port serdes is not initialized
@@ -780,20 +786,15 @@ done:
 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 			ppd = dd->pport + pidx;
 
-			/* initialize the qsfp if it exists
-			 * Requires interrupts to be enabled so we are notified
-			 * when the QSFP completes reset, and has
-			 * to be done before bringing up the SERDES
+			/*
+			 * start the serdes - must be after interrupts are
+			 * enabled so we are notified when the link goes up
 			 */
-			init_qsfp(ppd);
-
-			/* start the serdes - must be after interrupts are
-			   enabled so we are notified when the link goes up */
 			lastfail = bringup_serdes(ppd);
 			if (lastfail)
 				dd_dev_info(dd,
-					"Failed to bring up port %u\n",
-					ppd->port);
+					    "Failed to bring up port %u\n",
+					    ppd->port);
 
 			/*
 			 * Set status even if port serdes is not initialized
@@ -905,6 +906,8 @@ static void shutdown_device(struct hfi1_devdata *dd)
 		/* disable the send device */
 		pio_send_control(dd, PSC_GLOBAL_DISABLE);
 
+		shutdown_led_override(ppd);
+
 		/*
 		 * Clear SerdesEnable.
 		 * We can't count on interrupts since we are stopping.
@@ -962,17 +965,33 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 	kfree(rcd->egrbufs.buffers);
 
 	sc_free(rcd->sc);
-	vfree(rcd->physshadow);
-	vfree(rcd->tid_pg_list);
 	vfree(rcd->user_event_mask);
 	vfree(rcd->subctxt_uregbase);
 	vfree(rcd->subctxt_rcvegrbuf);
 	vfree(rcd->subctxt_rcvhdr_base);
-	kfree(rcd->tidusemap);
 	kfree(rcd->opstats);
 	kfree(rcd);
 }
 
+/*
+ * Release our hold on the shared asic data.  If we are the last one,
+ * free the structure.  Must be holding hfi1_devs_lock.
+ */
+static void release_asic_data(struct hfi1_devdata *dd)
+{
+	int other;
+
+	if (!dd->asic_data)
+		return;
+	dd->asic_data->dds[dd->hfi1_id] = NULL;
+	other = dd->hfi1_id ? 0 : 1;
+	if (!dd->asic_data->dds[other]) {
+		/* we are the last holder, free it */
+		kfree(dd->asic_data);
+	}
+	dd->asic_data = NULL;
+}
+
 void hfi1_free_devdata(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -980,12 +999,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	idr_remove(&hfi1_unit_table, dd->unit);
 	list_del(&dd->list);
+	release_asic_data(dd);
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+	free_platform_config(dd);
 	rcu_barrier(); /* wait for rcu callbacks to complete */
 	free_percpu(dd->int_counter);
 	free_percpu(dd->rcv_limit);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	hfi1_dev_affinity_free(dd);
+	free_percpu(dd->send_schedule);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 
 /*
@@ -1000,19 +1022,19 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 {
 	unsigned long flags;
 	struct hfi1_devdata *dd;
-	int ret;
+	int ret, nports;
+
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct hfi1_pportdata);
 
-	dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra);
+	dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						     nports);
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
-	/* extra is * number of ports */
-	dd->num_pports = extra / sizeof(struct hfi1_pportdata);
+	dd->num_pports = nports;
 	dd->pport = (struct hfi1_pportdata *)(dd + 1);
 
 	INIT_LIST_HEAD(&dd->list);
-	dd->node = dev_to_node(&pdev->dev);
-	if (dd->node < 0)
-		dd->node = 0;
 	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 
@@ -1042,9 +1064,9 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	spin_lock_init(&dd->sc_init_lock);
 	spin_lock_init(&dd->dc8051_lock);
 	spin_lock_init(&dd->dc8051_memlock);
-	mutex_init(&dd->qsfp_i2c_mutex);
 	seqlock_init(&dd->sc2vl_lock);
 	spin_lock_init(&dd->sde_map_lock);
+	spin_lock_init(&dd->pio_map_lock);
 	init_waitqueue_head(&dd->event_queue);
 
 	dd->int_counter = alloc_percpu(u64);
@@ -1063,6 +1085,14 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 		goto bail;
 	}
 
+	dd->send_schedule = alloc_percpu(u64);
+	if (!dd->send_schedule) {
+		ret = -ENOMEM;
+		hfi1_early_err(&pdev->dev,
+			       "Could not allocate per-cpu int_counter\n");
+		goto bail;
+	}
+
 	if (!hfi1_cpulist_count) {
 		u32 count = num_online_cpus();
 
@@ -1075,13 +1105,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 			&pdev->dev,
 			"Could not alloc cpulist info, cpu affinity might be wrong\n");
 	}
-	hfi1_dbg_ibdev_init(&dd->verbs_dev);
 	return dd;
 
 bail:
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 }
 
@@ -1174,8 +1203,10 @@ static int __init hfi1_mod_init(void)
 		user_credit_return_threshold = 100;
 
 	compute_krcvqs();
-	/* sanitize receive interrupt count, time must wait until after
-	   the hardware type is known */
+	/*
+	 * sanitize receive interrupt count, time must wait until after
+	 * the hardware type is known
+	 */
 	if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
 		rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
 	/* reject invalid combinations */
@@ -1210,6 +1241,9 @@ static int __init hfi1_mod_init(void)
 	idr_init(&hfi1_unit_table);
 
 	hfi1_dbg_init();
+	ret = hfi1_wss_init();
+	if (ret < 0)
+		goto bail_wss;
 	ret = pci_register_driver(&hfi1_pci_driver);
 	if (ret < 0) {
 		pr_err("Unable to register driver: error %d\n", -ret);
@@ -1218,6 +1252,8 @@ static int __init hfi1_mod_init(void)
 	goto bail; /* all OK */
 
 bail_dev:
+	hfi1_wss_exit();
+bail_wss:
 	hfi1_dbg_exit();
 	idr_destroy(&hfi1_unit_table);
 	dev_cleanup();
@@ -1233,6 +1269,7 @@ module_init(hfi1_mod_init);
 static void __exit hfi1_mod_cleanup(void)
 {
 	pci_unregister_driver(&hfi1_pci_driver);
+	hfi1_wss_exit();
 	hfi1_dbg_exit();
 	hfi1_cpulist_count = 0;
 	kfree(hfi1_cpulist);
@@ -1304,16 +1341,18 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 		}
 	}
 	kfree(tmp);
+	free_pio_map(dd);
 	/* must follow rcv context free - need to remove rcv's hooks */
 	for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
 		sc_free(dd->send_contexts[ctxt].sc);
 	dd->num_send_contexts = 0;
 	kfree(dd->send_contexts);
 	dd->send_contexts = NULL;
+	kfree(dd->hw_to_sw);
+	dd->hw_to_sw = NULL;
 	kfree(dd->boardname);
 	vfree(dd->events);
 	vfree(dd->status);
-	hfi1_cq_exit(dd);
 }
 
 /*
@@ -1347,6 +1386,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		ret = -EINVAL;
 		goto bail;
 	}
+	if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		hfi1_early_err(&pdev->dev,
+			       "Receive header queue count cannot be greater than %u\n",
+			       HFI1_MAX_HDRQ_EGRBUF_CNT);
+		ret = -EINVAL;
+		goto bail;
+	}
 	/* use the encoding function as a sanitization check */
 	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
 		hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1423,8 +1469,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * we still create devices, so diags, etc. can be used
 	 * to determine cause of problem.
 	 */
-	if (!initfail && !ret)
+	if (!initfail && !ret) {
 		dd->flags |= HFI1_INITTED;
+		/* create debufs files after init and ib register */
+		hfi1_dbg_ibdev_init(&dd->verbs_dev);
+	}
 
 	j = hfi1_device_create(dd);
 	if (j)
@@ -1465,6 +1514,8 @@ static void remove_one(struct pci_dev *pdev)
 {
 	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
+	/* close debugfs files before ib unregister */
+	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
@@ -1506,8 +1557,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 		 * rcvhdrqentsize is in DWs, so we have to convert to bytes
 		 * (* sizeof(u32)).
 		 */
-		amt = ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
-			    sizeof(u32), PAGE_SIZE);
+		amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
+				 sizeof(u32));
 
 		gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
 			GFP_USER : GFP_KERNEL;
@@ -1517,18 +1568,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 
 		if (!rcd->rcvhdrq) {
 			dd_dev_err(dd,
-				"attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
-				amt, rcd->ctxt);
+				   "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+				   amt, rcd->ctxt);
 			goto bail;
 		}
 
-		/* Event mask is per device now and is in hfi1_devdata */
-		/*if (rcd->ctxt >= dd->first_user_ctxt) {
-			rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
-			if (!rcd->user_event_mask)
-				goto bail_free_hdrq;
-				}*/
-
 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 			rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
 				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
@@ -1569,8 +1613,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 
 bail_free:
 	dd_dev_err(dd,
-		"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
-		rcd->ctxt);
+		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+		   rcd->ctxt);
 	vfree(rcd->user_event_mask);
 	rcd->user_event_mask = NULL;
 	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
@@ -1660,7 +1704,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 			if (rcd->egrbufs.rcvtid_size == round_mtu ||
 			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
 				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
-					rcd->ctxt);
+					   rcd->ctxt);
 				goto bail_rcvegrbuf_phys;
 			}
 
@@ -1695,8 +1739,9 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 				     rcd->egrbufs.buffers[j].len)) {
 					j++;
 					offset = 0;
-				} else
+				} else {
 					offset += new_size;
+				}
 			}
 			rcd->egrbufs.rcvtid_size = new_size;
 		}
@@ -1709,7 +1754,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 		  rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
 		  rcd->egrbufs.size);
 
-
 	/*
 	 * Set the contexts rcv array head update threshold to the closest
 	 * power of 2 (so we can use a mask instead of modulo) below half
@@ -1743,14 +1787,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 
 	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
 		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
-			      rcd->egrbufs.rcvtids[idx].phys, order);
+			     rcd->egrbufs.rcvtids[idx].phys, order);
 		cond_resched();
 	}
 	goto bail;
 
 bail_rcvegrbuf_phys:
 	for (idx = 0; idx < rcd->egrbufs.alloced &&
-		     rcd->egrbufs.buffers[idx].addr;
+	     rcd->egrbufs.buffers[idx].addr;
 	     idx++) {
 		dma_free_coherent(&dd->pcidev->dev,
 				  rcd->egrbufs.buffers[idx].len,
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c
index 426582b9ab65..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/staging/rdma/hfi1/intr.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -98,7 +95,7 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
 	 */
 	if (!(dd->flags & HFI1_INITTED))
 		return;
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	ib_dispatch_event(&event);
@@ -131,28 +128,26 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		 * NOTE: This uses this device's vAU, vCU, and vl15_init for
 		 * the remote values.  Both sides must be using the values.
 		 */
-		if (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 			set_up_vl15(dd, dd->vau, dd->vl15_init);
 			assign_remote_cm_au_table(dd, dd->vcu);
 			ppd->neighbor_guid =
-				read_csr(dd,
-					DC_DC8051_STS_REMOTE_GUID);
+				read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
 			ppd->neighbor_type =
 				read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
 					DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
 			ppd->neighbor_port_number =
 				read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
-					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
-			dd_dev_info(dd,
-				"Neighbor GUID: %llx Neighbor type %d\n",
-				ppd->neighbor_guid,
-				ppd->neighbor_type);
+					 DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+			dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
+				    ppd->neighbor_guid,
+				    ppd->neighbor_type);
 		}
 
 		/* physical link went up */
 		ppd->linkup = 1;
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 
 		/* link widths are not available until the link is fully up */
 		get_linkup_link_widths(ppd);
@@ -165,7 +160,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		reset_link_credits(dd);
 
 		/* freeze after a link down to guarantee a clean egress */
-		start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN);
+		start_freeze_handling(ppd, FREEZE_SELF | FREEZE_LINK_DOWN);
 
 		ev = IB_EVENT_PORT_ERR;
 
@@ -177,8 +172,6 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		/* notify IB of the link change */
 		signal_ib_event(ppd, ev);
 	}
-
-
 }
 
 /*
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h
index e8ba5606d08d..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/staging/rdma/hfi1/iowait.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_IOWAIT_H
 #define _HFI1_IOWAIT_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,6 +51,8 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 
+#include "sdma_txreq.h"
+
 /*
  * typedef (*restart_t)() - restart callback
  * @work: pointer to work structure
@@ -67,9 +66,11 @@ struct sdma_engine;
  * @list: used to add/insert into QP/PQ wait lists
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
- * @wakeup: space callback
+ * @wakeup: space callback wakeup
+ * @sdma_drained: sdma count drained
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
+ * @wait_pio: wait for pio_busy == 0
  * @sdma_busy: # of packets in flight
  * @count: total number of descriptors in tx_head'ed list
  * @tx_limit: limit for overflow queuing
@@ -101,9 +102,12 @@ struct iowait {
 		struct sdma_txreq *tx,
 		unsigned seq);
 	void (*wakeup)(struct iowait *wait, int reason);
+	void (*sdma_drained)(struct iowait *wait);
 	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
+	wait_queue_head_t wait_pio;
 	atomic_t sdma_busy;
+	atomic_t pio_busy;
 	u32 count;
 	u32 tx_limit;
 	u32 tx_count;
@@ -117,7 +121,7 @@ struct iowait {
  * @tx_limit: limit for overflow queuing
  * @func: restart function for workqueue
  * @sleep: sleep function for no space
- * @wakeup: wakeup function for no space
+ * @resume: wakeup function for no space
  *
  * This function initializes the iowait
  * structure embedded in the QP or PQ.
@@ -133,17 +137,21 @@ static inline void iowait_init(
 		struct iowait *wait,
 		struct sdma_txreq *tx,
 		unsigned seq),
-	void (*wakeup)(struct iowait *wait, int reason))
+	void (*wakeup)(struct iowait *wait, int reason),
+	void (*sdma_drained)(struct iowait *wait))
 {
 	wait->count = 0;
 	INIT_LIST_HEAD(&wait->list);
 	INIT_LIST_HEAD(&wait->tx_head);
 	INIT_WORK(&wait->iowork, func);
 	init_waitqueue_head(&wait->wait_dma);
+	init_waitqueue_head(&wait->wait_pio);
 	atomic_set(&wait->sdma_busy, 0);
+	atomic_set(&wait->pio_busy, 0);
 	wait->tx_limit = tx_limit;
 	wait->sleep = sleep;
 	wait->wakeup = wakeup;
+	wait->sdma_drained = sdma_drained;
 }
 
 /**
@@ -174,6 +182,88 @@ static inline void iowait_sdma_drain(struct iowait *wait)
 }
 
 /**
+ * iowait_sdma_pending() - return sdma pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_sdma_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_inc - note sdma io pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_add - add count to pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_add(struct iowait *wait, int count)
+{
+	atomic_add(count, &wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_dec - note sdma complete
+ * @wait: iowait structure
+ */
+static inline int iowait_sdma_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->sdma_busy);
+}
+
+/**
+ * iowait_pio_drain() - wait for pios to drain
+ *
+ * @wait: iowait structure
+ *
+ * This will delay until the iowait pios have
+ * completed.
+ */
+static inline void iowait_pio_drain(struct iowait *wait)
+{
+	wait_event_timeout(wait->wait_pio,
+			   !atomic_read(&wait->pio_busy),
+			   HZ);
+}
+
+/**
+ * iowait_pio_pending() - return pio pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_pio_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->pio_busy);
+}
+
+/**
+ * iowait_pio_inc - note pio pending
+ * @wait: iowait structure
+ */
+static inline void iowait_pio_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->pio_busy);
+}
+
+/**
+ * iowait_sdma_dec - note pio complete
+ * @wait: iowait structure
+ */
+static inline int iowait_pio_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->pio_busy);
+}
+
+/**
  * iowait_drain_wakeup() - trigger iowait_drain() waiter
  *
  * @wait: iowait structure
@@ -183,6 +273,28 @@ static inline void iowait_sdma_drain(struct iowait *wait)
 static inline void iowait_drain_wakeup(struct iowait *wait)
 {
 	wake_up(&wait->wait_dma);
+	wake_up(&wait->wait_pio);
+	if (wait->sdma_drained)
+		wait->sdma_drained(wait);
+}
+
+/**
+ * iowait_get_txhead() - get packet off of iowait list
+ *
+ * @wait wait struture
+ */
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+{
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&wait->tx_head)) {
+		tx = list_first_entry(
+			&wait->tx_head,
+			struct sdma_txreq,
+			list);
+		list_del_init(&tx->list);
+	}
+	return tx;
 }
 
 #endif
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
deleted file mode 100644
index cb4e6087dfdb..000000000000
--- a/drivers/staging/rdma/hfi1/keys.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "hfi.h"
-
-/**
- * hfi1_alloc_lkey - allocate an lkey
- * @mr: memory region that this lkey protects
- * @dma_region: 0->normal key, 1->restricted DMA key
- *
- * Returns 0 if successful, otherwise returns -errno.
- *
- * Increments mr reference count as required.
- *
- * Sets the lkey field mr for non-dma regions.
- *
- */
-
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region)
-{
-	unsigned long flags;
-	u32 r;
-	u32 n;
-	int ret = 0;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-
-	hfi1_get_mr(mr);
-	spin_lock_irqsave(&rkt->lock, flags);
-
-	/* special case for dma_mr lkey == 0 */
-	if (dma_region) {
-		struct hfi1_mregion *tmr;
-
-		tmr = rcu_access_pointer(dev->dma_mr);
-		if (!tmr) {
-			rcu_assign_pointer(dev->dma_mr, mr);
-			mr->lkey_published = 1;
-		} else {
-			hfi1_put_mr(mr);
-		}
-		goto success;
-	}
-
-	/* Find the next available LKEY */
-	r = rkt->next;
-	n = r;
-	for (;;) {
-		if (!rcu_access_pointer(rkt->table[r]))
-			break;
-		r = (r + 1) & (rkt->max - 1);
-		if (r == n)
-			goto bail;
-	}
-	rkt->next = (r + 1) & (rkt->max - 1);
-	/*
-	 * Make sure lkey is never zero which is reserved to indicate an
-	 * unrestricted LKEY.
-	 */
-	rkt->gen++;
-	/*
-	 * bits are capped in verbs.c to ensure enough bits for
-	 * generation number
-	 */
-	mr->lkey = (r << (32 - hfi1_lkey_table_size)) |
-		((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen)
-		 << 8);
-	if (mr->lkey == 0) {
-		mr->lkey |= 1 << 8;
-		rkt->gen++;
-	}
-	rcu_assign_pointer(rkt->table[r], mr);
-	mr->lkey_published = 1;
-success:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-out:
-	return ret;
-bail:
-	hfi1_put_mr(mr);
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = -ENOMEM;
-	goto out;
-}
-
-/**
- * hfi1_free_lkey - free an lkey
- * @mr: mr to free from tables
- */
-void hfi1_free_lkey(struct hfi1_mregion *mr)
-{
-	unsigned long flags;
-	u32 lkey = mr->lkey;
-	u32 r;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-	int freed = 0;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (!mr->lkey_published)
-		goto out;
-	if (lkey == 0)
-		RCU_INIT_POINTER(dev->dma_mr, NULL);
-	else {
-		r = lkey >> (32 - hfi1_lkey_table_size);
-		RCU_INIT_POINTER(rkt->table[r], NULL);
-	}
-	mr->lkey_published = 0;
-	freed++;
-out:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	if (freed) {
-		synchronize_rcu();
-		hfi1_put_mr(mr);
-	}
-}
-
-/**
- * hfi1_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
-/**
- * hfi1_rkey_ok - check the IB virtual address, length, and RKEY
- * @qp: qp for validation
- * @sge: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- *
- * increments the reference count upon success
- */
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-	struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use RKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (rkey == 0) {
-		struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		sge->mr = mr;
-		sge->vaddr = (void *) vaddr;
-		sge->length = len;
-		sge->sge_length = len;
-		sge->m = 0;
-		sge->n = 0;
-		goto ok;
-	}
-
-	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
-		goto bail;
-
-	off = vaddr - mr->iova;
-	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-		     (mr->access_flags & acc) == 0))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	sge->mr = mr;
-	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	sge->length = mr->map[m]->segs[n].length - off;
-	sge->sge_length = len;
-	sge->m = m;
-	sge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c
index 4f5dbd14b5de..d1e7f4d7cf6f 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -55,6 +52,7 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
+#include "qp.h"
 
 /* the reset value from the FM is supposed to be 0xffff, handle both */
 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -91,7 +89,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 	int pkey_idx;
 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		return;
 
@@ -100,7 +98,8 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 		return;
 
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout && time_before(jiffies,
+						 ibp->rvp.trap_timeout))
 		return;
 
 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -121,42 +120,43 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = OPA_SMI_CLASS_VERSION;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->route.lid.data, data, len);
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 
-			ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid);
-			if (IS_ERR(ah))
+			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
+			if (IS_ERR(ah)) {
 				ret = PTR_ERR(ah);
-			else {
+			} else {
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 			}
-		} else
+		} else {
 			ret = -EINVAL;
+		}
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 }
 
@@ -174,10 +174,10 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 	memset(&data, 0, sizeof(data));
 
 	if (trap_num == OPA_TRAP_BAD_P_KEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -233,9 +233,12 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
 	struct opa_mad_notice_attr data;
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	u32 lid = ppd_from_ibp(ibp)->lid;
 
 	memset(&data, 0, sizeof(data));
@@ -245,7 +248,7 @@ void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
 	data.issuer_lid = cpu_to_be32(lid);
 	data.ntc_144.lid = data.issuer_lid;
-	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 
 	send_trap(ibp, &data, sizeof(data));
 }
@@ -407,37 +410,38 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == mkey)
 		valid_mkey = 1;
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (mad->method == IB_MGMT_METHOD_GET ||
 	     mad->method == IB_MGMT_METHOD_SET ||
 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 	if (!valid_mkey) {
 		switch (mad->method) {
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
 				 hop_cnt);
@@ -501,16 +505,6 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
 }
 
-static u8 __opa_porttype(struct hfi1_pportdata *ppd)
-{
-	if (qsfp_mod_present(ppd)) {
-		if (ppd->qsfp_info.cache_valid)
-			return OPA_PORT_TYPE_STANDARD;
-		return OPA_PORT_TYPE_DISCONNECTED;
-	}
-	return OPA_PORT_TYPE_UNKNOWN;
-}
-
 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
@@ -522,6 +516,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	struct opa_port_info *pi = (struct opa_port_info *)data;
 	u8 mtu;
 	u8 credit_rate;
+	u8 is_beaconing_active;
 	u32 state;
 	u32 num_ports = OPA_AM_NPORT(am);
 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -538,8 +533,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	ppd = dd->pport + (port - 1);
 	ibp = &ppd->ibport_data;
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -548,14 +543,14 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pi->mkey = ibp->mkey;
-
-	pi->subnet_prefix = ibp->gid_prefix;
-	pi->sm_lid = cpu_to_be32(ibp->sm_lid);
-	pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-	pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pi->mkey = ibp->rvp.mkey;
+
+	pi->subnet_prefix = ibp->rvp.gid_prefix;
+	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
+	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
 
@@ -581,38 +576,45 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	if (start_of_sm_config && (state == IB_PORT_INIT))
 		ppd->is_sm_config_started = 1;
 
-	pi->port_phys_conf = __opa_porttype(ppd) & 0xf;
+	pi->port_phys_conf = (ppd->port_type & 0xf);
 
 #if PI_LED_ENABLE_SUP
 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
 	pi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
 	pi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	pi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	pi->port_states.offline_reason |= ppd->offline_disabled_reason &
-						OPA_PI_MASK_OFFLINE_REASON;
+	pi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	pi->port_states.portphysstate_portstate =
 		(hfi1_ibphys_portstate(ppd) << 4) | state;
 
-	pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 
 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
 	for (i = 0; i < ppd->vls_supported; i++) {
 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
 		if ((i % 2) == 0)
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4);
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
 		else
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu;
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
 	}
 	/* don't forget VL 15 */
 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
-	pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu;
-	pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL;
+	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
+	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
 	pi->partenforce_filterraw |=
 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
@@ -620,17 +622,17 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
-	pi->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
-	pi->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pi->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 
 	pi->vl.cap = ppd->vls_supported;
-	pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit);
+	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
 
-	pi->clientrereg_subnettimeout = ibp->subnet_timeout;
+	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
 
 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
 					  OPA_PORT_LINK_MODE_OPA << 5 |
@@ -701,8 +703,10 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
 
-	/* this counter is 16 bits wide, but the replay_depth.wire
-	 * variable is only 8 bits */
+	/*
+	 * this counter is 16 bits wide, but the replay_depth.wire
+	 * variable is only 8 bits
+	 */
 	if (tmp > 0xff)
 		tmp = 0xff;
 	pi->replay_depth.wire = tmp;
@@ -749,7 +753,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
 
@@ -763,7 +767,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	p = (__be16 *) data;
+	p = (__be16 *)data;
 	q = (u16 *)data;
 	/* get the real pkeys if we are requesting the first block */
 	if (start_block == 0) {
@@ -772,9 +776,9 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 			p[i] = cpu_to_be16(q[i]);
 		if (resp_len)
 			*resp_len += size;
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	return reply((struct ib_mad_hdr *)smp);
 }
 
@@ -901,8 +905,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 	u32 logical_old = driver_logical_state(ppd);
 	int ret, logical_allowed, physical_allowed;
 
-	logical_allowed = ret =
-		logical_transition_allowed(logical_old, logical_new);
+	ret = logical_transition_allowed(logical_old, logical_new);
+	logical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -912,8 +916,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 		return ret;
 	}
 
-	physical_allowed = ret =
-		physical_transition_allowed(physical_old, physical_new);
+	ret = physical_transition_allowed(physical_old, physical_new);
+	physical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -928,6 +932,14 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 		return HFI_TRANSITION_IGNORED;
 
 	/*
+	 * A change request of Physical Port State from
+	 * 'Offline' to 'Polling' should be ignored.
+	 */
+	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
+	    (physical_new == IB_PORTPHYSSTATE_POLLING))
+		return HFI_TRANSITION_IGNORED;
+
+	/*
 	 * Either physical_allowed or logical_allowed is
 	 * HFI_TRANSITION_ALLOWED.
 	 */
@@ -972,16 +984,15 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
 			break;
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
-		if (phys_state == IB_PORTPHYSSTATE_NOP)
+		if (phys_state == IB_PORTPHYSSTATE_NOP) {
 			link_state = HLS_DN_DOWNDEF;
-		else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
+		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
 			link_state = HLS_DN_POLL;
-			set_link_down_reason(ppd,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE, 0,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE);
-		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED)
+			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
+					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
+		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
 			link_state = HLS_DN_DISABLE;
-		else {
+		} else {
 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
 				phys_state);
 			smp->status |= IB_SMP_INVALID_FIELD;
@@ -991,11 +1002,11 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
 		set_link_state(ppd, link_state);
 		if (link_state == HLS_DN_DISABLE &&
 		    (ppd->offline_disabled_reason >
-		     OPA_LINKDOWN_REASON_SMA_DISABLED ||
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
 		     ppd->offline_disabled_reason ==
-		     OPA_LINKDOWN_REASON_NONE))
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_SMA_DISABLED;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 		/*
 		 * Don't send a reply if the response would be sent
 		 * through the disabled port.
@@ -1091,13 +1102,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	ls_old = driver_lstate(ppd);
 
-	ibp->mkey = pi->mkey;
-	ibp->gid_prefix = pi->subnet_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
+	ibp->rvp.mkey = pi->mkey;
+	ibp->rvp.gid_prefix = pi->subnet_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
 
 	/* Must be a valid unicast LID address. */
 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
-	     lid >= HFI1_MULTICAST_LID_BASE) {
+	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
 			lid);
@@ -1130,23 +1141,23 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	/* Must be a valid unicast LID address. */
 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
-	     smlid >= HFI1_MULTICAST_LID_BASE) {
+	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
-	} else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 	}
@@ -1167,8 +1178,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
 	lwe = be16_to_cpu(pi->link_width.enabled);
 	if (lwe) {
-		if (lwe == OPA_LINK_WIDTH_RESET
-				|| lwe == OPA_LINK_WIDTH_RESET_OLD)
+		if (lwe == OPA_LINK_WIDTH_RESET ||
+		    lwe == OPA_LINK_WIDTH_RESET_OLD)
 			set_link_width_enabled(ppd, ppd->link_width_supported);
 		else if ((lwe & ~ppd->link_width_supported) == 0)
 			set_link_width_enabled(ppd, lwe);
@@ -1177,19 +1188,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	}
 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
 	/* LWD.E is always applied - 0 means "disabled" */
-	if (lwe == OPA_LINK_WIDTH_RESET
-			|| lwe == OPA_LINK_WIDTH_RESET_OLD) {
+	if (lwe == OPA_LINK_WIDTH_RESET ||
+	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
 		set_link_width_downgrade_enabled(ppd,
-				ppd->link_width_downgrade_supported);
+						 ppd->
+						 link_width_downgrade_supported
+						 );
 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
 		/* only set and apply if something changed */
 		if (lwe != ppd->link_width_downgrade_enabled) {
 			set_link_width_downgrade_enabled(ppd, lwe);
 			call_link_downgrade_policy = 1;
 		}
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	lse = be16_to_cpu(pi->link_speed.enabled);
 	if (lse) {
 		if (lse & be16_to_cpu(pi->link_speed.supported))
@@ -1198,22 +1211,24 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 			smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
-	ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
-	ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
+	ibp->rvp.mkeyprot =
+		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
+	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 	for (i = 0; i < ppd->vls_supported; i++) {
 		if ((i % 2) == 0)
-			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4)
-					  & 0xF);
+			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
+					   4) & 0xF);
 		else
-			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF);
+			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
+					  0xF);
 		if (mtu == 0xffff) {
 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
 				mtu,
@@ -1223,8 +1238,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 		}
 		if (dd->vld[i].mtu != mtu) {
 			dd_dev_info(dd,
-				"MTU change on vl %d from %d to %d\n",
-				i, dd->vld[i].mtu, mtu);
+				    "MTU change on vl %d from %d to %d\n",
+				    i, dd->vld[i].mtu, mtu);
 			dd->vld[i].mtu = mtu;
 			call_set_mtu++;
 		}
@@ -1232,13 +1247,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	/* As per OPAV1 spec: VL15 must support and be configured
 	 * for operation with a 2048 or larger MTU.
 	 */
-	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF);
+	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
 	if (mtu < 2048 || mtu == 0xffff)
 		mtu = 2048;
 	if (dd->vld[15].mtu != mtu) {
 		dd_dev_info(dd,
-			"MTU change on vl 15 from %d to %d\n",
-			dd->vld[15].mtu, mtu);
+			    "MTU change on vl 15 from %d to %d\n",
+			    dd->vld[15].mtu, mtu);
 		dd->vld[15].mtu = mtu;
 		call_set_mtu++;
 	}
@@ -1254,21 +1269,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 			smp->status |= IB_SMP_INVALID_FIELD;
 		} else {
 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
-						vls) == -EINVAL)
+					    vls) == -EINVAL)
 				smp->status |= IB_SMP_INVALID_FIELD;
 		}
 	}
 
 	if (pi->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 	if (pi->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 	if (pi->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
-	ibp->subnet_timeout =
+	ibp->rvp.subnet_timeout =
 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
 
 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
@@ -1388,7 +1403,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
 
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
@@ -1402,7 +1417,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 n_blocks_sent = OPA_AM_NBLK(am);
 	u32 start_block = am & 0x7ff;
-	u16 *p = (u16 *) data;
+	u16 *p = (u16 *)data;
 	__be16 *q = (__be16 *)data;
 	int i;
 	u16 n_blocks_avail;
@@ -1415,7 +1430,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	if (start_block + n_blocks_sent > n_blocks_avail ||
 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
@@ -1514,14 +1529,22 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	u8 *p = data;
 	int i;
+	u8 sc;
 
 	if (am) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++)
-		ibp->sl_to_sc[i] = *p++;
+	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
+		sc = *p++;
+		if (ibp->sl_to_sc[i] != sc) {
+			ibp->sl_to_sc[i] = sc;
+
+			/* Put all stale qps into error state */
+			hfi1_error_port_qps(ibp, i);
+		}
+	}
 
 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
 }
@@ -1574,7 +1597,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 {
 	u32 n_blocks = OPA_AM_NBLK(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	size_t size = 4 * sizeof(u64);
 
 	if (n_blocks != 1) {
@@ -1597,7 +1620,7 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NBLK(am);
 	int async_update = OPA_AM_ASYNC(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	struct hfi1_pportdata *ppd;
 	int lstate;
 
@@ -1609,8 +1632,10 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 	/* IB numbers ports from 1, hw from 0 */
 	ppd = dd->pport + (port - 1);
 	lstate = driver_lstate(ppd);
-	/* it's known that async_update is 0 by this point, but include
-	 * the explicit check for clarity */
+	/*
+	 * it's known that async_update is 0 by this point, but include
+	 * the explicit check for clarity
+	 */
 	if (!async_update &&
 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1629,7 +1654,7 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int size;
 
 	if (n_blocks != 1) {
@@ -1654,7 +1679,7 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int lstate;
 
 	if (n_blocks != 1) {
@@ -1687,7 +1712,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	u32 lstate;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 
 	if (nports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1707,12 +1732,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	psi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
 	psi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	psi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	psi->port_states.offline_reason |= ppd->offline_disabled_reason &
-				OPA_PI_MASK_OFFLINE_REASON;
+	psi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	psi->port_states.portphysstate_portstate =
@@ -1737,7 +1761,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	u8 ls_new, ps_new;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 	int ret, invalid = 0;
 
 	if (nports != 1) {
@@ -1782,14 +1806,16 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
 	u32 len = OPA_AM_CI_LEN(am) + 1;
 	int ret;
 
-#define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */
+#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
 
-	/* check that addr is within spec, and
-	 * addr and (addr + len - 1) are on the same "page" */
+	/*
+	 * check that addr is within spec, and
+	 * addr and (addr + len - 1) are on the same "page"
+	 */
 	if (addr >= 4096 ||
-		(__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
+	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -1823,7 +1849,7 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 	int size;
 
 	if (num_ports != 1) {
@@ -1846,7 +1872,7 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 
 	if (num_ports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1919,13 +1945,15 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
 
 	switch (section) {
 	case OPA_VLARB_LOW_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
 		break;
 	case OPA_VLARB_HIGH_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
 		break;
-	/* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
-	 * can be changed from the default values */
+	/*
+	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
+	 * can be changed from the default values
+	 */
 	case OPA_VLARB_PREEMPT_ELEMENTS:
 		/* FALLTHROUGH */
 	case OPA_VLARB_PREEMPT_MATRIX:
@@ -2137,8 +2165,10 @@ struct opa_port_data_counters_msg {
 };
 
 struct opa_port_error_counters64_msg {
-	/* Request contains first two fields, response contains the
-	 * whole magilla */
+	/*
+	 * Request contains first two fields, response contains the
+	 * whole magilla
+	 */
 	__be64 port_select_mask[4];
 	__be32 vl_select_mask;
 
@@ -2172,7 +2202,6 @@ struct opa_port_error_info_msg {
 	__be32 error_info_select_mask;
 	__be32 reserved1;
 	struct _port_ei {
-
 		u8 port_number;
 		u8 reserved2[7];
 
@@ -2251,7 +2280,7 @@ enum error_info_selects {
 };
 
 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
-				struct ib_device *ibdev, u32 *resp_len)
+				     struct ib_device *ibdev, u32 *resp_len)
 {
 	struct opa_class_port_info *p =
 		(struct opa_class_port_info *)pmp->data;
@@ -2279,23 +2308,29 @@ static void a0_portstatus(struct hfi1_pportdata *ppd,
 {
 	if (!is_bx(ppd->dd)) {
 		unsigned long vl;
-		u64 max_vl_xmit_wait = 0, tmp;
+		u64 sum_vl_xmit_wait = 0;
 		u32 vl_all_mask = VL_MASK_ALL;
 
 		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
 				 8 * sizeof(vl_all_mask)) {
-			tmp = read_port_cntr(ppd, C_TX_WAIT_VL,
-					     idx_from_vl(vl));
-			if (tmp > max_vl_xmit_wait)
-				max_vl_xmit_wait = tmp;
+			u64 tmp = sum_vl_xmit_wait +
+				  read_port_cntr(ppd, C_TX_WAIT_VL,
+						 idx_from_vl(vl));
+			if (tmp < sum_vl_xmit_wait) {
+				/* we wrapped */
+				sum_vl_xmit_wait = (u64)~0;
+				break;
+			}
+			sum_vl_xmit_wait = tmp;
 		}
-		rsp->port_xmit_wait = cpu_to_be64(max_vl_xmit_wait);
+		if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
+			rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
 	}
 }
 
-
 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_port_status_req *req =
 		(struct opa_port_status_req *)pmp->data;
@@ -2320,8 +2355,8 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	if (nports != 1 || (port_num && port_num != port)
-	    || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
+	if (nports != 1 || (port_num && port_num != port) ||
+	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2351,7 +2386,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 					 CNTR_INVALID_VL));
 	rsp->port_multicast_xmit_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-					CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->port_multicast_rcv_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
 					  CNTR_INVALID_VL));
@@ -2380,7 +2415,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 	}
 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
+				   CNTR_INVALID_VL);
 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
 		/* overflow/wrapped */
 		rsp->link_error_recovery = cpu_to_be32(~0);
@@ -2395,13 +2430,13 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
 					  CNTR_INVALID_VL));
 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-					  CNTR_INVALID_VL));
+						      CNTR_INVALID_VL));
 
 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2417,27 +2452,27 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		vlinfo++;
 		vfi++;
@@ -2467,7 +2502,7 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
 						CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* local link integrity must be right-shifted by the lli resolution */
 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
 	tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
@@ -2477,10 +2512,10 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
 	error_counter_summary += (tmp >> res_ler);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* ppd->link_downed is a 32-bit value */
 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
 						CNTR_INVALID_VL);
@@ -2491,21 +2526,22 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
 	return error_counter_summary;
 }
 
-static void a0_datacounters(struct hfi1_devdata *dd, struct _port_dctrs *rsp,
+static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
 			    u32 vl_select_mask)
 {
-	if (!is_bx(dd)) {
+	if (!is_bx(ppd->dd)) {
 		unsigned long vl;
-		int vfi = 0;
 		u64 sum_vl_xmit_wait = 0;
+		u32 vl_all_mask = VL_MASK_ALL;
 
-		for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-				8 * sizeof(vl_select_mask)) {
+		for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
+				 8 * sizeof(vl_all_mask)) {
 			u64 tmp = sum_vl_xmit_wait +
-				be64_to_cpu(rsp->vls[vfi++].port_vl_xmit_wait);
+				  read_port_cntr(ppd, C_TX_WAIT_VL,
+						 idx_from_vl(vl));
 			if (tmp < sum_vl_xmit_wait) {
 				/* we wrapped */
-				sum_vl_xmit_wait = (u64) ~0;
+				sum_vl_xmit_wait = (u64)~0;
 				break;
 			}
 			sum_vl_xmit_wait = tmp;
@@ -2515,8 +2551,30 @@ static void a0_datacounters(struct hfi1_devdata *dd, struct _port_dctrs *rsp,
 	}
 }
 
+static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
+				   struct _port_dctrs *rsp)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_multicast_xmit_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
+					  CNTR_INVALID_VL));
+	rsp->port_multicast_rcv_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
+					  CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				    struct ib_device *ibdev,
+				    u8 port, u32 *resp_len)
 {
 	struct opa_port_data_counters_msg *req =
 		(struct opa_port_data_counters_msg *)pmp->data;
@@ -2572,7 +2630,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	rsp = (struct _port_dctrs *)&(req->port[0]);
+	rsp = &req->port[0];
 	memset(rsp, 0, sizeof(*rsp));
 
 	rsp->port_number = port;
@@ -2583,39 +2641,19 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	 */
 	hfi1_read_link_quality(dd, &lq);
 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
+	pma_get_opa_port_dctrs(ibdev, rsp);
 
-	/* rsp->sw_port_congestion is 0 for HFIs */
-	/* rsp->port_xmit_time_cong is 0 for HFIs */
-	/* rsp->port_xmit_wasted_bw ??? */
-	/* rsp->port_xmit_wait_data ??? */
-	/* rsp->port_mark_fecn is 0 for HFIs */
-
-	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_xmit_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_rcv_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
-						CNTR_INVALID_VL));
 	rsp->port_xmit_wait =
 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
 	rsp->port_rcv_fecn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
 	rsp->port_rcv_becn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
-
 	rsp->port_error_counter_summary =
 		cpu_to_be64(get_error_counter_summary(ibdev, port,
 						      res_lli, res_ler));
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2623,49 +2661,50 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	 * any additional checks for vl.
 	 */
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-		 8 * sizeof(req->vl_select_mask)) {
+			 8 * sizeof(req->vl_select_mask)) {
 		memset(vlinfo, 0, sizeof(*vlinfo));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_data =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
 		/* rsp->port_vl_xmit_wasted_bw ??? */
 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
-		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait */
+		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
+		 */
 		/*rsp->vls[vfi].port_vl_mark_fecn =
-			cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
-				+ offset));
-		*/
+		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
+		 *		+ offset));
+		 */
 		vlinfo++;
 		vfi++;
 	}
 
-	a0_datacounters(dd, rsp, vl_select_mask);
+	a0_datacounters(ppd, rsp, vl_select_mask);
 
 	if (resp_len)
 		*resp_len += response_data_size;
@@ -2673,12 +2712,88 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
+				       struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
+						pmp->data;
+	struct _port_dctrs rsp;
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_dctrs(ibdev, &rsp);
+
+	p->port_xmit_data = rsp.port_xmit_data;
+	p->port_rcv_data = rsp.port_rcv_data;
+	p->port_xmit_packets = rsp.port_xmit_pkts;
+	p->port_rcv_packets = rsp.port_rcv_pkts;
+	p->port_unicast_xmit_packets = 0;
+	p->port_unicast_rcv_packets =  0;
+	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
+	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
+static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
+				   struct _port_ectrs *rsp, u8 port)
+{
+	u64 tmp, tmp2;
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+	struct hfi1_ibport *ibp = to_iport(ibdev, port);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
+					CNTR_INVALID_VL);
+	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->link_error_recovery = cpu_to_be32(~0);
+	} else {
+		rsp->link_error_recovery = cpu_to_be32(tmp2);
+	}
+
+	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
+	rsp->port_rcv_remote_physical_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
+					  CNTR_INVALID_VL));
+	rsp->port_rcv_switch_relay_errors = 0;
+	rsp->port_xmit_discards =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
+					   CNTR_INVALID_VL));
+	rsp->port_xmit_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	rsp->port_rcv_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
+	if (tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->local_link_integrity_errors = cpu_to_be64(~0);
+	} else {
+		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
+	}
+	rsp->excessive_buffer_overruns =
+		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ectrs *rsp;
-	unsigned long port_num;
+	u8 port_num;
 	struct opa_port_error_counters64_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 num_ports;
@@ -2688,7 +2803,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	struct hfi1_pportdata *ppd;
 	struct _vls_ectrs *vlinfo;
 	unsigned long vl;
-	u64 port_mask, tmp, tmp2;
+	u64 port_mask, tmp;
 	u32 vl_select_mask;
 	int vfi;
 
@@ -2717,62 +2832,34 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-					sizeof(port_mask));
+				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	rsp = (struct _port_ectrs *)&(req->port[0]);
+	rsp = &req->port[0];
 
 	ibp = to_iport(ibdev, port_num);
 	ppd = ppd_from_ibp(ibp);
 
 	memset(rsp, 0, sizeof(*rsp));
-	rsp->port_number = (u8)port_num;
+	rsp->port_number = port_num;
+
+	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
 
-	rsp->port_rcv_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	/* port_rcv_switch_relay_errors is 0 for HFIs */
-	rsp->port_xmit_discards =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
-						CNTR_INVALID_VL));
 	rsp->port_rcv_remote_physical_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL));
-	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
-	if (tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->local_link_integrity_errors = cpu_to_be64(~0);
-	} else {
-		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
-	}
-	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
-	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->link_error_recovery = cpu_to_be32(~0);
-	} else {
-		rsp->link_error_recovery = cpu_to_be32(tmp2);
-	}
-	rsp->port_xmit_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	rsp->excessive_buffer_overruns =
-		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->fm_config_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL));
-	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-						CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
+
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
-	vlinfo = (struct _vls_ectrs *)&(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	vl_select_mask = be32_to_cpu(req->vl_select_mask);
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
@@ -2789,8 +2876,94 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
+				   struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+		pmp->data;
+	struct _port_ectrs rsp;
+	u64 temp_link_overrun_errors;
+	u64 temp_64;
+	u32 temp_32;
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_ectrs(ibdev, &rsp, port);
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	p->symbol_error_counter = 0; /* N/A for OPA */
+
+	temp_32 = be32_to_cpu(rsp.link_error_recovery);
+	if (temp_32 > 0xFFUL)
+		p->link_error_recovery_counter = 0xFF;
+	else
+		p->link_error_recovery_counter = (u8)temp_32;
+
+	temp_32 = be32_to_cpu(rsp.link_downed);
+	if (temp_32 > 0xFFUL)
+		p->link_downed_counter = 0xFF;
+	else
+		p->link_downed_counter = (u8)temp_32;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
+	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
+	if (temp_64 > 0xFFFFUL)
+		p->port_xmit_discards = cpu_to_be16(0xFFFF);
+	else
+		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_xmit_constraint_errors = 0xFF;
+	else
+		p->port_xmit_constraint_errors = (u8)temp_64;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_rcv_constraint_errors = 0xFFUL;
+	else
+		p->port_rcv_constraint_errors = (u8)temp_64;
+
+	/* LocalLink: 7:4, BufferOverrun: 3:0 */
+	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+
+	temp_link_overrun_errors = temp_64 << 4;
+
+	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+	temp_link_overrun_errors |= temp_64;
+
+	p->link_overrun_errors = (u8)temp_link_overrun_errors;
+
+	p->vl15_dropped = 0; /* N/A for OPA */
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ei *rsp;
@@ -2798,12 +2971,12 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u64 reg;
 
 	req = (struct opa_port_error_info_msg *)pmp->data;
-	rsp = (struct _port_ei *)&(req->port[0]);
+	rsp = &req->port[0];
 
 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -2831,7 +3004,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2840,15 +3013,17 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	rsp->port_rcv_ei.status_and_code =
 		dd->err_info_rcvport.status_and_code;
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
-		&dd->err_info_rcvport.packet_flit1, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
-		&dd->err_info_rcvport.packet_flit2, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
 
 	/* ExcessiverBufferOverrunInfo */
 	reg = read_csr(dd, RCV_ERR_INFO);
 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
-		/* if the RcvExcessBufferOverrun bit is set, save SC of
-		 * first pkt that encountered an excess buffer overrun */
+		/*
+		 * if the RcvExcessBufferOverrun bit is set, save SC of
+		 * first pkt that encountered an excess buffer overrun
+		 */
 		u8 tmp = (u8)reg;
 
 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
@@ -2885,7 +3060,8 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 }
 
 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_clear_port_status *req =
 		(struct opa_clear_port_status *)pmp->data;
@@ -2944,8 +3120,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
 
 	/* Only applicable for switch */
-	/*if (counter_select & CS_PORT_MARK_FECN)
-		write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/
+	/* if (counter_select & CS_PORT_MARK_FECN)
+	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
+	 */
 
 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
@@ -2968,7 +3145,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-						CNTR_INVALID_VL, 0);
+			       CNTR_INVALID_VL, 0);
 	}
 
 	if (counter_select & CS_PORT_RCV_ERRORS)
@@ -2990,7 +3167,6 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
 			 8 * sizeof(vl_select_mask)) {
-
 		if (counter_select & CS_PORT_XMIT_DATA)
 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
 
@@ -3019,9 +3195,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 		if (counter_select & CS_PORT_RCV_BUBBLE)
 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
 
-		/*if (counter_select & CS_PORT_MARK_FECN)
-		     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
-		*/
+		/* if (counter_select & CS_PORT_MARK_FECN)
+		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
+		 */
 		/* port_vl_xmit_discards ??? */
 	}
 
@@ -3032,19 +3208,20 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 }
 
 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	struct _port_ei *rsp;
 	struct opa_port_error_info_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u32 error_info_select;
 
 	req = (struct opa_port_error_info_msg *)pmp->data;
-	rsp = (struct _port_ei *)&(req->port[0]);
+	rsp = &req->port[0];
 
 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3064,7 +3241,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -3078,8 +3255,10 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
 
 	/* ExcessiverBufferOverrunInfo */
 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
-		/* status bit is essentially kept in the h/w - bit 5 of
-		 * RCV_ERR_INFO */
+		/*
+		 * status bit is essentially kept in the h/w - bit 5 of
+		 * RCV_ERR_INFO
+		 */
 		write_csr(dd, RCV_ERR_INFO,
 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 
@@ -3131,13 +3310,12 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
 }
 
 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
-					     u8 *data,
-					     struct ib_device *ibdev,
-					     u8 port, u32 *resp_len)
+				       u8 *data, struct ib_device *ibdev,
+				       u8 port, u32 *resp_len)
 {
 	int i;
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3147,7 +3325,7 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3176,7 +3354,7 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
 				       u32 *resp_len)
 {
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3238,7 +3416,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
 			continue;
 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
-			&cce->rqpn, 3);
+		       &cce->rqpn, 3);
 		cong_log->events[i].sl_svc_type_cn_entry =
 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
 		cong_log->events[i].remote_lid_cn_entry =
@@ -3268,7 +3446,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 				   u32 *resp_len)
 {
 	struct ib_cc_table_attr *cc_table_attr =
-		(struct ib_cc_table_attr *) data;
+		(struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3289,7 +3467,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3309,7 +3487,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 	rcu_read_unlock();
 
 	if (resp_len)
-		*resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1);
+		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
 
 	return reply((struct ib_mad_hdr *)smp);
 }
@@ -3325,7 +3503,7 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
-	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data;
+	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3355,14 +3533,14 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 	}
 
 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
-	if (new_cc_state == NULL)
+	if (!new_cc_state)
 		goto getit;
 
 	spin_lock(&ppd->cc_state_lock);
 
 	old_cc_state = get_cc_state(ppd);
 
-	if (old_cc_state == NULL) {
+	if (!old_cc_state) {
 		spin_unlock(&ppd->cc_state_lock);
 		kfree(new_cc_state);
 		return reply((struct ib_mad_hdr *)smp);
@@ -3402,26 +3580,31 @@ struct opa_led_info {
 };
 
 #define OPA_LED_SHIFT	31
-#define OPA_LED_MASK	(1 << OPA_LED_SHIFT)
+#define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
 
 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct hfi1_pportdata *ppd = dd->pport;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
-	u64 reg;
+	u32 is_beaconing_active;
 
 	if (nport != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	reg = read_csr(dd, DCC_CFG_LED_CNTRL);
-	if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) &&
-		((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf))
-			p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK);
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
 
 	if (resp_len)
 		*resp_len += sizeof(struct opa_led_info);
@@ -3434,7 +3617,7 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
 
@@ -3443,7 +3626,10 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	setextled(dd, on);
+	if (on)
+		hfi1_start_led_override(dd->pport, 2000, 1500);
+	else
+		shutdown_led_override(dd->pport);
 
 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
 }
@@ -3486,7 +3672,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
@@ -3525,9 +3711,9 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3568,7 +3754,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
@@ -3595,9 +3781,9 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3647,14 +3833,13 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
 		/* zero the payload for this segment */
 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
 
-		(void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3691,14 +3876,13 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
 			return reply((struct ib_mad_hdr *)smp);
 		}
 
-		(void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3816,7 +4000,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
@@ -3836,13 +4020,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
+			(void)check_mkey(to_iport(ibdev, port_num),
 					  (struct ib_mad_hdr *)smp, 0,
 					  smp->mkey, smp->route.dr.dr_slid,
 					  smp->route.dr.return_path,
 					  smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	*resp_len = opa_get_smp_header_size(smp);
@@ -3854,23 +4038,25 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 			clear_opa_smp_data(smp);
 			ret = subn_get_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_get_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_SET:
 		switch (attr_id) {
 		default:
 			ret = subn_set_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_set_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_REPORT:
 	case IB_MGMT_METHOD_REPORT_RESP:
@@ -3881,13 +4067,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 	default:
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)smp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -3903,7 +4089,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 	if (smp->class_version != 1) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
@@ -3924,13 +4110,13 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
-					  (struct ib_mad_hdr *)smp, 0,
-					  smp->mkey,
-					  (__force __be32)smp->dr_slid,
-					  smp->return_path, smp->hop_cnt);
+			(void)check_mkey(to_iport(ibdev, port_num),
+					 (struct ib_mad_hdr *)smp, 0,
+					 smp->mkey,
+					 (__force __be32)smp->dr_slid,
+					 smp->return_path, smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	switch (smp->method) {
@@ -3938,15 +4124,77 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 		switch (smp->attr_id) {
 		case IB_SMP_ATTR_NODE_INFO:
 			ret = subn_get_nodeinfo(smp, ibdev, port);
-			goto bail;
+			break;
 		default:
 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)smp);
-			goto bail;
+			break;
 		}
+		break;
+	}
+
+	return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port,
+			const struct ib_mad *in_mad,
+			struct ib_mad *out_mad)
+{
+	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
+						&pmp->data;
+	int ret = IB_MAD_RESULT_FAILURE;
+
+	*out_mad = *in_mad;
+	if (pmp->mad_hdr.class_version != 1) {
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		return ret;
+	}
+
+	switch (pmp->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+		switch (pmp->mad_hdr.attr_id) {
+		case IB_PMA_PORT_COUNTERS:
+			ret = pma_get_ib_portcounters(pmp, ibdev, port);
+			break;
+		case IB_PMA_PORT_COUNTERS_EXT:
+			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
+			break;
+		case IB_PMA_CLASS_PORT_INFO:
+			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		default:
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		}
+		break;
+
+	case IB_MGMT_METHOD_SET:
+		if (pmp->mad_hdr.attr_id) {
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+		}
+		break;
+
+	case IB_MGMT_METHOD_TRAP:
+	case IB_MGMT_METHOD_GET_RESP:
+		/*
+		 * The ib_mad module will call us to process responses
+		 * before checking for other consumers.
+		 * Just tell the caller to process it normally.
+		 */
+		ret = IB_MAD_RESULT_SUCCESS;
+		break;
+
+	default:
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -3971,44 +4219,46 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
 		switch (pmp->mad_hdr.attr_id) {
 		case IB_PMA_CLASS_PORT_INFO:
 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
-			goto bail;
+			break;
 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						       resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_SET:
 		switch (pmp->mad_hdr.attr_id) {
 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
@@ -4018,14 +4268,14 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 
 	default:
 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4090,12 +4340,15 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
-		goto bail;
+		break;
+	case IB_MGMT_CLASS_PERF_MGMT:
+		ret = process_perf(ibdev, port, in_mad, out_mad);
+		break;
 	default:
 		ret = IB_MAD_RESULT_SUCCESS;
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4147,66 +4400,3 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 
 	return IB_MAD_RESULT_FAILURE;
 }
-
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
-int hfi1_create_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-	int ret;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
-}
-
-void hfi1_free_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-	}
-}
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h
index f0317750e2fc..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/staging/rdma/hfi1/mad.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,8 +48,10 @@
 #define _HFI1_MAD_H
 
 #include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE	1 /* use led enabled bit in struct
-				   * opa_port_states, if available */
+#define USE_PI_LED_ENABLE	1 /*
+				   * use led enabled bit in struct
+				   * opa_port_states, if available
+				   */
 #include <rdma/opa_smi.h>
 #include <rdma/opa_port_info.h>
 #ifndef PI_LED_ENABLE_SUP
@@ -235,7 +234,6 @@ struct ib_pma_portcounters_cong {
 #define IB_CC_SVCTYPE_RD 0x2
 #define IB_CC_SVCTYPE_UD 0x3
 
-
 /*
  * There should be an equivalent IB #define for the following, but
  * I cannot find it.
@@ -267,7 +265,7 @@ struct opa_hfi1_cong_log {
 	u8 congestion_flags;
 	__be16 threshold_event_counter;
 	__be32 current_time_stamp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS];
 } __packed;
 
diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/staging/rdma/hfi1/mmap.c
deleted file mode 100644
index 5173b1c60b3d..000000000000
--- a/drivers/staging/rdma/hfi1/mmap.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "verbs.h"
-
-/**
- * hfi1_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct hfi1_mmap_info
- */
-void hfi1_release_mmap_info(struct kref *ref)
-{
-	struct hfi1_mmap_info *ip =
-		container_of(ref, struct hfi1_mmap_info, ref);
-	struct hfi1_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void hfi1_vma_open(struct vm_area_struct *vma)
-{
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void hfi1_vma_close(struct vm_area_struct *vma)
-{
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, hfi1_release_mmap_info);
-}
-
-static struct vm_operations_struct hfi1_vm_ops = {
-	.open =     hfi1_vma_open,
-	.close =    hfi1_vma_close,
-};
-
-/**
- * hfi1_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct hfi1_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct hfi1_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &hfi1_vm_ops;
-		vma->vm_private_data = ip;
-		hfi1_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for hfi1_mmap
- */
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
-					     u32 size,
-					     struct ib_ucontext *context,
-					     void *obj) {
-	struct hfi1_mmap_info *ip;
-
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj)
-{
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
new file mode 100644
index 000000000000..c7ad0164ea9a
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/interval_tree_generic.h>
+
+#include "mmu_rb.h"
+#include "trace.h"
+
+struct mmu_rb_handler {
+	struct list_head list;
+	struct mmu_notifier mn;
+	struct rb_root *root;
+	spinlock_t lock;        /* protect the RB tree */
+	struct mmu_rb_ops *ops;
+};
+
+static LIST_HEAD(mmu_rb_handlers);
+static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
+
+static unsigned long mmu_node_start(struct mmu_rb_node *);
+static unsigned long mmu_node_last(struct mmu_rb_node *);
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
+				     unsigned long);
+static inline void mmu_notifier_range_start(struct mmu_notifier *,
+					    struct mm_struct *,
+					    unsigned long, unsigned long);
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+					unsigned long, unsigned long);
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+					   unsigned long, unsigned long);
+
+static struct mmu_notifier_ops mn_opts = {
+	.invalidate_page = mmu_notifier_page,
+	.invalidate_range_start = mmu_notifier_range_start,
+};
+
+INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
+		     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
+
+static unsigned long mmu_node_start(struct mmu_rb_node *node)
+{
+	return node->addr & PAGE_MASK;
+}
+
+static unsigned long mmu_node_last(struct mmu_rb_node *node)
+{
+	return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1;
+}
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+{
+	struct mmu_rb_handler *handlr;
+	unsigned long flags;
+
+	if (!ops->invalidate)
+		return -EINVAL;
+
+	handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+	if (!handlr)
+		return -ENOMEM;
+
+	handlr->root = root;
+	handlr->ops = ops;
+	INIT_HLIST_NODE(&handlr->mn.hlist);
+	spin_lock_init(&handlr->lock);
+	handlr->mn.ops = &mn_opts;
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_add_tail(&handlr->list, &mmu_rb_handlers);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	return mmu_notifier_register(&handlr->mn, current->mm);
+}
+
+void hfi1_mmu_rb_unregister(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler)
+		return;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_del(&handler->list);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	if (!RB_EMPTY_ROOT(root)) {
+		struct rb_node *node;
+		struct mmu_rb_node *rbnode;
+
+		while ((node = rb_first(root))) {
+			rbnode = rb_entry(node, struct mmu_rb_node, node);
+			rb_erase(node, root);
+			if (handler->ops->remove)
+				handler->ops->remove(root, rbnode, false);
+		}
+	}
+
+	if (current->mm)
+		mmu_notifier_unregister(&handler->mn, current->mm);
+	kfree(handler);
+}
+
+int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!handler)
+		return -EINVAL;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
+		  mnode->len);
+	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+	if (node) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+	__mmu_int_rb_insert(mnode, root);
+
+	if (handler->ops->insert) {
+		ret = handler->ops->insert(root, mnode);
+		if (ret)
+			__mmu_int_rb_remove(mnode, root);
+	}
+unlock:
+	spin_unlock_irqrestore(&handler->lock, flags);
+	return ret;
+}
+
+/* Caller must host handler lock */
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+					   unsigned long addr,
+					   unsigned long len)
+{
+	struct mmu_rb_node *node = NULL;
+
+	hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+	if (!handler->ops->filter) {
+		node = __mmu_int_rb_iter_first(handler->root, addr,
+					       (addr + len) - 1);
+	} else {
+		for (node = __mmu_int_rb_iter_first(handler->root, addr,
+						    (addr + len) - 1);
+		     node;
+		     node = __mmu_int_rb_iter_next(node, addr,
+						   (addr + len) - 1)) {
+			if (handler->ops->filter(node, addr, len))
+				return node;
+		}
+	}
+	return node;
+}
+
+static void __mmu_rb_remove(struct mmu_rb_handler *handler,
+			    struct mmu_rb_node *node, bool arg)
+{
+	/* Validity of handler and node pointers has been checked by caller. */
+	hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+		  node->len);
+	__mmu_int_rb_remove(node, handler->root);
+	if (handler->ops->remove)
+		handler->ops->remove(handler->root, node, arg);
+}
+
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
+				       unsigned long len)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	if (!handler)
+		return ERR_PTR(-EINVAL);
+
+	spin_lock_irqsave(&handler->lock, flags);
+	node = __mmu_rb_search(handler, addr, len);
+	spin_unlock_irqrestore(&handler->lock, flags);
+
+	return node;
+}
+
+void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler || !node)
+		return;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	__mmu_rb_remove(handler, node, false);
+	spin_unlock_irqrestore(&handler->lock, flags);
+}
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_for_each_entry(handler, &mmu_rb_handlers, list) {
+		if (handler->root == root)
+			goto unlock;
+	}
+	handler = NULL;
+unlock:
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	return handler;
+}
+
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long addr)
+{
+	mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+}
+
+static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
+					    struct mm_struct *mm,
+					    unsigned long start,
+					    unsigned long end)
+{
+	mmu_notifier_mem_invalidate(mn, start, end);
+}
+
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+					unsigned long start, unsigned long end)
+{
+	struct mmu_rb_handler *handler =
+		container_of(mn, struct mmu_rb_handler, mn);
+	struct rb_root *root = handler->root;
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
+	     node = __mmu_int_rb_iter_next(node, start, end - 1)) {
+		hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
+			  node->addr, node->len);
+		if (handler->ops->invalidate(root, node))
+			__mmu_rb_remove(handler, node, true);
+	}
+	spin_unlock_irqrestore(&handler->lock, flags);
+}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
new file mode 100644
index 000000000000..f8523fdb8a18
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MMU_RB_H
+#define _HFI1_MMU_RB_H
+
+#include "hfi.h"
+
+struct mmu_rb_node {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long __last;
+	struct rb_node node;
+};
+
+struct mmu_rb_ops {
+	bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
+	int (*insert)(struct rb_root *, struct mmu_rb_node *);
+	void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
+	int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
+void hfi1_mmu_rb_unregister(struct rb_root *);
+int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
+				       unsigned long);
+
+#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
deleted file mode 100644
index a3f8b884fdd6..000000000000
--- a/drivers/staging/rdma/hfi1/mr.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "hfi.h"
-
-/* Fast memory region */
-struct hfi1_fmr {
-	struct ib_fmr ibfmr;
-	struct hfi1_mregion mr;        /* must be last */
-};
-
-static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct hfi1_fmr, ibfmr);
-}
-
-static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd,
-			int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_mregion(struct hfi1_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * hfi1_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
- */
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct hfi1_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = hfi1_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct hfi1_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = hfi1_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata)
-{
-	struct hfi1_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		void *vaddr;
-
-		vaddr = page_address(sg_page(sg));
-		if (!vaddr) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		mr->mr.map[m]->segs[n].vaddr = vaddr;
-		mr->mr.map[m]->segs[n].length = umem->page_size;
-		n++;
-		if (n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by hfi1_get_dma_mr()
- * or hfi1_reg_user_mr().
- */
-int hfi1_dereg_mr(struct ib_mr *ibmr)
-{
-	struct hfi1_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&mr->mr);
-
-	hfi1_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		dd_dev_err(
-			dd_from_ibdev(mr->mr.pd->device),
-			"hfi1_dereg_mr timeout mr %p pd %p refcount %u\n",
-			mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
-		hfi1_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- * FIXME: IB_WR_REG_MR is not supported
- */
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_num_sg)
-{
-	struct hfi1_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	return &mr->ibmr;
-}
-
-/**
- * hfi1_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr)
-{
-	struct hfi1_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + HFI1_SEGSZ - 1) / HFI1_SEGSZ;
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = hfi1_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int hfi1_unmap_fmr(struct list_head *fmr_list)
-{
-	struct hfi1_fmr *fmr;
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * hfi1_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&fmr->mr);
-	hfi1_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		hfi1_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h
index f64eec1c2951..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/staging/rdma/hfi1/opa_compat.h
@@ -1,14 +1,13 @@
 #ifndef _LINUX_H
 #define _LINUX_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -111,19 +108,4 @@ enum opa_port_phys_state {
 	/* values 12-15 are reserved/ignored */
 };
 
-/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */
-#define OPA_PORT_TYPE_UNKNOWN          0
-#define OPA_PORT_TYPE_DISCONNECTED     1
-/* port is not currently usable, CableInfo not available */
-#define OPA_PORT_TYPE_FIXED            2
-/* A fixed backplane port in a director class switch. All OPA ASICS */
-#define OPA_PORT_TYPE_VARIABLE         3
-/* A backplane port in a blade system, possibly mixed configuration */
-#define OPA_PORT_TYPE_STANDARD         4
-/* implies a SFF-8636 defined format for CableInfo (QSFP) */
-#define OPA_PORT_TYPE_SI_PHOTONICS      5
-/* A silicon photonics module implies TBD defined format for CableInfo
- * as defined by Intel SFO group */
-/* 6 - 15 are reserved */
-
 #endif /* _LINUX_H */
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c
index 8317b07d722a..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/staging/rdma/hfi1/pcie.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -57,6 +54,7 @@
 
 #include "hfi.h"
 #include "chip_registers.h"
+#include "aspm.h"
 
 /* link speed vector for Gen3 speed - not in Linux headers */
 #define GEN1_SPEED_VECTOR 0x1
@@ -122,8 +120,9 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 			goto bail;
 		}
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	} else
+	} else {
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	}
 	if (ret) {
 		hfi1_early_err(&pdev->dev,
 			       "Unable to set DMA consistent mask: %d\n", ret);
@@ -131,13 +130,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	pci_set_master(pdev);
-	ret = pci_enable_pcie_error_reporting(pdev);
-	if (ret) {
-		hfi1_early_err(&pdev->dev,
-			       "Unable to enable pcie error reporting: %d\n",
-			      ret);
-		ret = 0;
-	}
+	(void)pci_enable_pcie_error_reporting(pdev);
 	goto done;
 
 bail:
@@ -222,10 +215,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							&dd->pcie_devctl2);
+				  &dd->pcie_devctl2);
 	pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
-	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							&dd->pci_lnkctl3);
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
 
 	return 0;
@@ -238,7 +230,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
  */
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
 {
-	u64 __iomem *base = (void __iomem *) dd->kregbase;
+	u64 __iomem *base = (void __iomem *)dd->kregbase;
 
 	dd->flags &= ~HFI1_PRESENT;
 	dd->kregbase = NULL;
@@ -247,8 +239,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
 		iounmap(dd->rcvarray_wc);
 	if (dd->piobase)
 		iounmap(dd->piobase);
-
-	pci_set_drvdata(dd->pcidev, NULL);
 }
 
 /*
@@ -276,7 +266,7 @@ void hfi1_pcie_flr(struct hfi1_devdata *dd)
 
 clear:
 	pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
-						PCI_EXP_DEVCTL_BCR_FLR);
+				 PCI_EXP_DEVCTL_BCR_FLR);
 	/* PCIe spec requires the function to be back within 100ms */
 	msleep(100);
 }
@@ -289,9 +279,11 @@ static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
 	struct msix_entry *msix_entry;
 	int i;
 
-	/* We can't pass hfi1_msix_entry array to msix_setup
+	/*
+	 * We can't pass hfi1_msix_entry array to msix_setup
 	 * so use a dummy msix_entry array and copy the allocated
-	 * irq back to the hfi1_msix_entry array. */
+	 * irq back to the hfi1_msix_entry array.
+	 */
 	msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
 	if (!msix_entry) {
 		ret = -ENOMEM;
@@ -321,7 +313,6 @@ do_intx:
 		   nvec, ret);
 	*msixcnt = 0;
 	hfi1_enable_intx(dd->pcidev);
-
 }
 
 /* return the PCIe link speed from the given link status */
@@ -369,6 +360,7 @@ static void update_lbus_info(struct hfi1_devdata *dd)
 int pcie_speeds(struct hfi1_devdata *dd)
 {
 	u32 linkcap;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 
 	if (!pci_is_pcie(dd->pcidev)) {
 		dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -381,15 +373,15 @@ int pcie_speeds(struct hfi1_devdata *dd)
 	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
 	if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
 		dd_dev_info(dd,
-			"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
-			linkcap & PCI_EXP_LNKCAP_SLS);
+			    "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
+			    linkcap & PCI_EXP_LNKCAP_SLS);
 		dd->link_gen3_capable = 0;
 	}
 
 	/*
 	 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
 	 */
-	if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+	if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
 		dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
 		dd->link_gen3_capable = 0;
 	}
@@ -397,9 +389,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
 	/* obtain the link width and current speed */
 	update_lbus_info(dd);
 
-	/* check against expected pcie width and complain if "wrong" */
-	if (dd->lbus_width < 16)
-		dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width);
+	dd_dev_info(dd, "%s\n", dd->lbus_info);
 
 	return 0;
 }
@@ -438,23 +428,18 @@ void hfi1_enable_intx(struct pci_dev *pdev)
 void restore_pci_variables(struct hfi1_devdata *dd)
 {
 	pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_0, dd->pcibar0);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_1, dd->pcibar1);
-	pci_write_config_dword(dd->pcidev,
-				PCI_ROM_ADDRESS, dd->pci_rom);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
+	pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							dd->pcie_devctl2);
+				   dd->pcie_devctl2);
 	pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
-	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							dd->pci_lnkctl3);
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
 }
 
-
 /*
  * BIOS may not set PCIe bus-utilization parameters for best performance.
  * Check and optionally adjust them to maximize our throughput.
@@ -463,6 +448,10 @@ static int hfi1_pcie_caps;
 module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
 MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
 
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
 static void tune_pcie_caps(struct hfi1_devdata *dd)
 {
 	struct pci_dev *parent;
@@ -481,6 +470,12 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
 	}
 	/* Find out supported and configured values for parent (root) */
 	parent = dd->pcidev->bus->self;
+	/*
+	 * The driver cannot perform the tuning if it does not have
+	 * access to the upstream component.
+	 */
+	if (!parent)
+		return;
 	if (!pci_is_root_bus(parent->bus)) {
 		dd_dev_info(dd, "Parent not root\n");
 		return;
@@ -534,6 +529,7 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
 		pcie_set_readrq(dd->pcidev, ep_mrrs);
 	}
 }
+
 /* End of PCIe capability tuning */
 
 /*
@@ -748,21 +744,22 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
 		c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
 		c_plus1 = eq[i][POST] / div;
 		pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
-			eq_value(c_minus1, c0, c_plus1));
+				       eq_value(c_minus1, c0, c_plus1));
 		/* check if these coefficients violate EQ rules */
 		pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
-								&violation);
+				      &violation);
 		if (violation
 		    & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
 			if (hit_error == 0) {
 				dd_dev_err(dd,
-					"Gen3 EQ Table Coefficient rule violations\n");
+					   "Gen3 EQ Table Coefficient rule violations\n");
 				dd_dev_err(dd, "         prec   attn   post\n");
 			}
 			dd_dev_err(dd, "   p%02d:   %02x     %02x     %02x\n",
-				i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]);
+				   i, (u32)eq[i][0], (u32)eq[i][1],
+				   (u32)eq[i][2]);
 			dd_dev_err(dd, "            %02x     %02x     %02x\n",
-				(u32)c_minus1, (u32)c0, (u32)c_plus1);
+				   (u32)c_minus1, (u32)c0, (u32)c_plus1);
 			hit_error = 1;
 		}
 	}
@@ -774,7 +771,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
 /*
  * Steps to be done after the PCIe firmware is downloaded and
  * before the SBR for the Pcie Gen3.
- * The hardware mutex is already being held.
+ * The SBus resource is already being held.
  */
 static void pcie_post_steps(struct hfi1_devdata *dd)
 {
@@ -817,8 +814,8 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
 		if (pdev != dev) {
 			dd_dev_err(dd,
-				"%s: another device is on the same bus\n",
-				__func__);
+				   "%s: another device is on the same bus\n",
+				   __func__);
 			return -ENOTTY;
 		}
 
@@ -842,8 +839,8 @@ static void write_gasket_interrupt(struct hfi1_devdata *dd, int index,
 				   u16 code, u16 data)
 {
 	write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
-	    (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT)
-	    |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
+		  (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
+		   ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
 }
 
 /*
@@ -853,25 +850,101 @@ static void arm_gasket_logic(struct hfi1_devdata *dd)
 {
 	u64 reg;
 
-	reg = (((u64)1 << dd->hfi1_id)
-			<< ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT)
-		| ((u64)pcie_serdes_broadcast[dd->hfi1_id]
-			<< ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT
-		| ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK
-		| ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK)
-			<< ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT
-		);
+	reg = (((u64)1 << dd->hfi1_id) <<
+	       ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
+	      ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
+	       ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
+	       ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
+	       ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
+	       ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
 	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
 	/* read back to push the write */
 	read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
 }
 
 /*
+ * CCE_PCIE_CTRL long name helpers
+ * We redefine these shorter macros to use in the code while leaving
+ * chip_registers.h to be autogenerated from the hardware spec.
+ */
+#define LANE_BUNDLE_MASK              CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_MASK
+#define LANE_BUNDLE_SHIFT             CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_SHIFT
+#define LANE_DELAY_MASK               CCE_PCIE_CTRL_PCIE_LANE_DELAY_MASK
+#define LANE_DELAY_SHIFT              CCE_PCIE_CTRL_PCIE_LANE_DELAY_SHIFT
+#define MARGIN_OVERWRITE_ENABLE_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_OVERWRITE_ENABLE_SHIFT
+#define MARGIN_SHIFT                  CCE_PCIE_CTRL_XMT_MARGIN_SHIFT
+#define MARGIN_G1_G2_OVERWRITE_MASK   CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_MASK
+#define MARGIN_G1_G2_OVERWRITE_SHIFT  CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_SHIFT
+#define MARGIN_GEN1_GEN2_MASK         CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_MASK
+#define MARGIN_GEN1_GEN2_SHIFT        CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_SHIFT
+
+ /*
+  * Write xmt_margin for full-swing (WFR-B) or half-swing (WFR-C).
+  */
+static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname)
+{
+	u64 pcie_ctrl;
+	u64 xmt_margin;
+	u64 xmt_margin_oe;
+	u64 lane_delay;
+	u64 lane_bundle;
+
+	pcie_ctrl = read_csr(dd, CCE_PCIE_CTRL);
+
+	/*
+	 * For Discrete, use full-swing.
+	 *  - PCIe TX defaults to full-swing.
+	 *    Leave this register as default.
+	 * For Integrated, use half-swing
+	 *  - Copy xmt_margin and xmt_margin_oe
+	 *    from Gen1/Gen2 to Gen3.
+	 */
+	if (dd->pcidev->device == PCI_DEVICE_ID_INTEL1) { /* integrated */
+		/* extract initial fields */
+		xmt_margin = (pcie_ctrl >> MARGIN_GEN1_GEN2_SHIFT)
+			      & MARGIN_GEN1_GEN2_MASK;
+		xmt_margin_oe = (pcie_ctrl >> MARGIN_G1_G2_OVERWRITE_SHIFT)
+				 & MARGIN_G1_G2_OVERWRITE_MASK;
+		lane_delay = (pcie_ctrl >> LANE_DELAY_SHIFT) & LANE_DELAY_MASK;
+		lane_bundle = (pcie_ctrl >> LANE_BUNDLE_SHIFT)
+			       & LANE_BUNDLE_MASK;
+
+		/*
+		 * For A0, EFUSE values are not set.  Override with the
+		 * correct values.
+		 */
+		if (is_ax(dd)) {
+			/*
+			 * xmt_margin and OverwiteEnabel should be the
+			 * same for Gen1/Gen2 and Gen3
+			 */
+			xmt_margin = 0x5;
+			xmt_margin_oe = 0x1;
+			lane_delay = 0xF; /* Delay 240ns. */
+			lane_bundle = 0x0; /* Set to 1 lane. */
+		}
+
+		/* overwrite existing values */
+		pcie_ctrl = (xmt_margin << MARGIN_GEN1_GEN2_SHIFT)
+			| (xmt_margin_oe << MARGIN_G1_G2_OVERWRITE_SHIFT)
+			| (xmt_margin << MARGIN_SHIFT)
+			| (xmt_margin_oe << MARGIN_OVERWRITE_ENABLE_SHIFT)
+			| (lane_delay << LANE_DELAY_SHIFT)
+			| (lane_bundle << LANE_BUNDLE_SHIFT);
+
+		write_csr(dd, CCE_PCIE_CTRL, pcie_ctrl);
+	}
+
+	dd_dev_dbg(dd, "%s: program XMT margin, CcePcieCtrl 0x%llx\n",
+		   fname, pcie_ctrl);
+}
+
+/*
  * Do all the steps needed to transition the PCIe link to Gen3 speed.
  */
 int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 {
-	struct pci_dev *parent;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 	u64 fw_ctrl;
 	u64 reg, therm;
 	u32 reg32, fs, lf;
@@ -880,8 +953,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 	int do_retry, retry_count = 0;
 	uint default_pset;
 	u16 target_vector, target_speed;
-	u16 lnkctl, lnkctl2, vendor;
-	u8 nsbr = 1;
+	u16 lnkctl2, vendor;
 	u8 div;
 	const u8 (*eq)[3];
 	int return_error = 0;
@@ -908,17 +980,21 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 	/* if already at target speed, done (unless forced) */
 	if (dd->lbus_speed == target_speed) {
 		dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
-			pcie_target,
-			pcie_force ? "re-doing anyway" : "skipping");
+			    pcie_target,
+			    pcie_force ? "re-doing anyway" : "skipping");
 		if (!pcie_force)
 			return 0;
 	}
 
 	/*
-	 * A0 needs an additional SBR
+	 * The driver cannot do the transition if it has no access to the
+	 * upstream component
 	 */
-	if (is_ax(dd))
-		nsbr++;
+	if (!parent) {
+		dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
+			    __func__);
+		return 0;
+	}
 
 	/*
 	 * Do the Gen3 transition.  Steps are those of the PCIe Gen3
@@ -934,10 +1010,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 		goto done_no_mutex;
 	}
 
-	/* hold the HW mutex across the firmware download and SBR */
-	ret = acquire_hw_mutex(dd);
-	if (ret)
+	/* hold the SBus resource across the firmware download and SBR */
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
+			   __func__);
 		return ret;
+	}
 
 	/* make sure thermal polling is not causing interrupts */
 	therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
@@ -955,8 +1034,11 @@ retry:
 	/* step 4: download PCIe Gen3 SerDes firmware */
 	dd_dev_info(dd, "%s: downloading firmware\n", __func__);
 	ret = load_pcie_firmware(dd);
-	if (ret)
+	if (ret) {
+		/* do not proceed if the firmware cannot be downloaded */
+		return_error = 1;
 		goto done;
+	}
 
 	/* step 5: set up device parameter settings */
 	dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
@@ -986,7 +1068,7 @@ retry:
 	 * PcieCfgRegPl100 - Gen3 Control
 	 *
 	 * turn off PcieCfgRegPl100.Gen3ZRxDcNonCompl
-	 * turn on PcieCfgRegPl100.EqEieosCnt (erratum)
+	 * turn on PcieCfgRegPl100.EqEieosCnt
 	 * Everything else zero.
 	 */
 	reg32 = PCIE_CFG_REG_PL100_EQ_EIEOS_CNT_SMASK;
@@ -1016,8 +1098,10 @@ retry:
 		default_pset = DEFAULT_MCP_PSET;
 	}
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
-		(fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT)
-		| (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
+			       (fs <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
+			       (lf <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
 	ret = load_eq_table(dd, eq, fs, div);
 	if (ret)
 		goto done;
@@ -1031,15 +1115,15 @@ retry:
 		pcie_pset = default_pset;
 	if (pcie_pset > 10) {	/* valid range is 0-10, inclusive */
 		dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
-			__func__, pcie_pset, default_pset);
+			   __func__, pcie_pset, default_pset);
 		pcie_pset = default_pset;
 	}
 	dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
-		((1 << pcie_pset)
-			<< PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT)
-		| PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK
-		| PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
+			       ((1 << pcie_pset) <<
+			PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
+			PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
+			PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
 
 	/*
 	 * step 5b: Do post firmware download steps via SBus
@@ -1064,17 +1148,15 @@ retry:
 
 	/*
 	 * step 5d: program XMT margin
-	 * Right now, leave the default alone.  To change, do a
-	 * read-modify-write of:
-	 *	CcePcieCtrl.XmtMargin
-	 *	CcePcieCtrl.XmitMarginOverwriteEnable
 	 */
+	write_xmt_margin(dd, __func__);
 
-	/* step 5e: disable active state power management (ASPM) */
+	/*
+	 * step 5e: disable active state power management (ASPM). It
+	 * will be enabled if required later
+	 */
 	dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
-	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl);
-	lnkctl &= ~PCI_EXP_LNKCTL_ASPMC;
-	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl);
+	aspm_hw_disable_l1(dd);
 
 	/*
 	 * step 5f: clear DirectSpeedChange
@@ -1093,16 +1175,15 @@ retry:
 	 * that it is Gen3 capable earlier.
 	 */
 	dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
-	parent = dd->pcidev->bus->self;
 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	/* only write to parent if target is not as high as ours */
 	if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
 		lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 		lnkctl2 |= target_vector;
 		dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-			(u32)lnkctl2);
+			    (u32)lnkctl2);
 		pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
 	} else {
 		dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
@@ -1111,17 +1192,17 @@ retry:
 	dd_dev_info(dd, "%s: setting target link speed\n", __func__);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 	lnkctl2 |= target_vector;
 	dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
 
 	/* step 5h: arm gasket logic */
 	/* hold DC in reset across the SBR */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
+	(void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
 	/* save firmware control across the SBR */
 	fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
 
@@ -1152,8 +1233,8 @@ retry:
 	ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
 	if (ret) {
 		dd_dev_info(dd,
-			"%s: read of VendorID failed after SBR, err %d\n",
-			__func__, ret);
+			    "%s: read of VendorID failed after SBR, err %d\n",
+			    __func__, ret);
 		return_error = 1;
 		goto done;
 	}
@@ -1193,8 +1274,7 @@ retry:
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
 
 	/* check for any per-lane errors */
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
@@ -1205,8 +1285,8 @@ retry:
 			& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
 	if ((status & (1 << dd->hfi1_id)) == 0) {
 		dd_dev_err(dd,
-			"%s: gasket status 0x%x, expecting 0x%x\n",
-			__func__, status, 1 << dd->hfi1_id);
+			   "%s: gasket status 0x%x, expecting 0x%x\n",
+			   __func__, status, 1 << dd->hfi1_id);
 		ret = -EIO;
 		goto done;
 	}
@@ -1223,13 +1303,13 @@ retry:
 	/* update our link information cache */
 	update_lbus_info(dd);
 	dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
-		dd->lbus_info);
+		    dd->lbus_info);
 
 	if (dd->lbus_speed != target_speed) { /* not target */
 		/* maybe retry */
 		do_retry = retry_count < pcie_retry;
 		dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
-			pcie_target, do_retry ? ", retrying" : "");
+			   pcie_target, do_retry ? ", retrying" : "");
 		retry_count++;
 		if (do_retry) {
 			msleep(100); /* allow time to settle */
@@ -1245,7 +1325,7 @@ done:
 		dd_dev_info(dd, "%s: Re-enable therm polling\n",
 			    __func__);
 	}
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 done_no_mutex:
 	/* return no error if it is OK to be at current speed */
 	if (ret && !return_error) {
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index b51a4416312b..c6849ce9e5eb 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -101,7 +98,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 	/* Fall through */
 	case PSC_DATA_VL_ENABLE:
 		/* Disallow sending on VLs not enabled */
-		mask = (((~0ull)<<num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK)<<
+		mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
 				SEND_CTRL_UNSUPPORTED_VL_SHIFT;
 		reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
 		break;
@@ -130,7 +127,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 	if (write) {
 		write_csr(dd, SEND_CTRL, reg);
 		if (flush)
-			(void) read_csr(dd, SEND_CTRL); /* flush write */
+			(void)read_csr(dd, SEND_CTRL); /* flush write */
 	}
 
 	spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
@@ -177,8 +174,10 @@ static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
 
 /* memory pool information, used when calculating final sizes */
 struct mem_pool_info {
-	int centipercent;	/* 100th of 1% of memory to use, -1 if blocks
-				   already set */
+	int centipercent;	/*
+				 * 100th of 1% of memory to use, -1 if blocks
+				 * already set
+				 */
 	int count;		/* count of contexts in the pool */
 	int blocks;		/* block size of the pool */
 	int size;		/* context size, in blocks */
@@ -312,7 +311,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
 		if (i == SC_ACK) {
 			count = dd->n_krcv_queues;
 		} else if (i == SC_KERNEL) {
-			count = num_vls + 1 /* VL15 */;
+			count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
 		} else if (count == SCC_PER_CPU) {
 			count = dd->num_rcv_contexts - dd->n_krcv_queues;
 		} else if (count < 0) {
@@ -509,7 +508,7 @@ static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
 	sci = &dd->send_contexts[sw_index];
 	if (!sci->allocated) {
 		dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
-			__func__, sw_index, hw_context);
+			   __func__, sw_index, hw_context);
 	}
 	sci->allocated = 0;
 	dd->hw_to_sw[hw_context] = INVALID_SCI;
@@ -625,7 +624,7 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
 				& SC(CREDIT_CTRL_THRESHOLD_MASK))
 			   << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 
 		/* force a credit return on change to avoid a possible stall */
 		force_return = 1;
@@ -700,7 +699,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 	if (dd->flags & HFI1_FROZEN)
 		return NULL;
 
-	sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa);
+	sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
 	if (!sc)
 		return NULL;
 
@@ -763,9 +762,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 
 	/* set the default partition key */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
-		(DEFAULT_PKEY &
-			SC(CHECK_PARTITION_KEY_VALUE_MASK))
-		    << SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
+			(SC(CHECK_PARTITION_KEY_VALUE_MASK) &
+			 DEFAULT_PKEY) <<
+			SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
 
 	/* per context type checks */
 	if (type == SC_USER) {
@@ -778,8 +777,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 
 	/* set the send context check opcode mask and value */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
-		((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
-		((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
+			((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
+			((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
 
 	/* set up credit return */
 	reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
@@ -797,7 +796,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 		thresh = sc_percent_to_threshold(sc, 50);
 	} else if (type == SC_USER) {
 		thresh = sc_percent_to_threshold(sc,
-				user_credit_return_threshold);
+						 user_credit_return_threshold);
 	} else { /* kernel */
 		thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
 	}
@@ -852,7 +851,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 		  sc->credit_ctrl,
 		  thresh);
 
-
 	return sc;
 }
 
@@ -971,11 +969,11 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		if (loop > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
-				"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				__func__, sc->sw_index,
-				sc->hw_context, (u32)reg);
+				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
+				   __func__, sc->sw_index,
+				   sc->hw_context, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		loop++;
@@ -1021,7 +1019,7 @@ int sc_restart(struct send_context *sc)
 		return -EINVAL;
 
 	dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
-		sc->hw_context);
+		    sc->hw_context);
 
 	/*
 	 * Step 1: Wait for the context to actually halt.
@@ -1036,7 +1034,7 @@ int sc_restart(struct send_context *sc)
 			break;
 		if (loop > 100) {
 			dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
-				__func__, sc->sw_index, sc->hw_context);
+				   __func__, sc->sw_index, sc->hw_context);
 			return -ETIME;
 		}
 		loop++;
@@ -1062,9 +1060,9 @@ int sc_restart(struct send_context *sc)
 				break;
 			if (loop > 100) {
 				dd_dev_err(dd,
-					"%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
-					__func__, sc->sw_index,
-					sc->hw_context, count);
+					   "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
+					   __func__, sc->sw_index,
+					   sc->hw_context, count);
 			}
 			loop++;
 			udelay(1);
@@ -1177,18 +1175,18 @@ void pio_reset_all(struct hfi1_devdata *dd)
 	if (ret == -EIO) {
 		/* clear the error */
 		write_csr(dd, SEND_PIO_ERR_CLEAR,
-			SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
+			  SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
 	}
 
 	/* reset init all */
 	write_csr(dd, SEND_PIO_INIT_CTXT,
-			SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
+		  SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
 	udelay(2);
 	ret = pio_init_wait_progress(dd);
 	if (ret < 0) {
 		dd_dev_err(dd,
-			"PIO send context init %s while initializing all PIO blocks\n",
-			ret == -ETIMEDOUT ? "is stuck" : "had an error");
+			   "PIO send context init %s while initializing all PIO blocks\n",
+			   ret == -ETIMEDOUT ? "is stuck" : "had an error");
 	}
 }
 
@@ -1236,8 +1234,7 @@ int sc_enable(struct send_context *sc)
 	 */
 	reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
 	if (reg)
-		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR),
-			reg);
+		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
 
 	/*
 	 * The HW PIO initialization engine can handle only one init
@@ -1295,7 +1292,7 @@ void sc_return_credits(struct send_context *sc)
 
 	/* a 0->1 transition schedules a credit return */
 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
-		SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
+			SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
 	/*
 	 * Ensure that the write is flushed and the credit return is
 	 * scheduled. We care more about the 0 -> 1 transition.
@@ -1321,7 +1318,7 @@ void sc_drop(struct send_context *sc)
 		return;
 
 	dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
-			__func__, sc->sw_index, sc->hw_context);
+		    __func__, sc->sw_index, sc->hw_context);
 }
 
 /*
@@ -1346,7 +1343,7 @@ void sc_stop(struct send_context *sc, int flag)
 	wake_up(&sc->halt_wait);
 }
 
-#define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32))
+#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
 
 /*
@@ -1430,8 +1427,10 @@ retry:
 	next = head + 1;
 	if (next >= sc->sr_size)
 		next = 0;
-	/* update the head - must be last! - the releaser can look at fields
-	   in pbuf once we move the head */
+	/*
+	 * update the head - must be last! - the releaser can look at fields
+	 * in pbuf once we move the head
+	 */
 	smp_wmb();
 	sc->sr_head = next;
 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
@@ -1469,7 +1468,7 @@ void sc_add_credit_return_intr(struct send_context *sc)
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	sc->credit_intr_count++;
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
@@ -1491,7 +1490,7 @@ void sc_del_credit_return_intr(struct send_context *sc)
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
 }
@@ -1526,8 +1525,9 @@ static void sc_piobufavail(struct send_context *sc)
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
-	struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE];
-	struct hfi1_qp *qp;
+	struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	unsigned i, n = 0;
 
@@ -1545,24 +1545,28 @@ static void sc_piobufavail(struct send_context *sc)
 		struct iowait *wait;
 
 		if (n == ARRAY_SIZE(qps))
-			goto full;
+			break;
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		qps[n++] = qp;
 	}
 	/*
-	 * Counting: only call wantpiobuf_intr() if there were waiters and they
-	 * are now all gone.
+	 * If there had been waiters and there are more
+	 * insure that we redo the force to avoid a potential hang.
 	 */
-	if (n)
+	if (n) {
 		hfi1_sc_wantpiobuf_intr(sc, 0);
-full:
+		if (!list_empty(list))
+			hfi1_sc_wantpiobuf_intr(sc, 1);
+	}
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	for (i = 0; i < n; i++)
-		hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO);
+		hfi1_qp_wakeup(qps[i],
+			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
 }
 
 /* translate a send credit update to a bit code of reasons */
@@ -1661,7 +1665,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
 	sw_index = dd->hw_to_sw[hw_context];
 	if (unlikely(sw_index >= dd->num_send_contexts)) {
 		dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
-			__func__, hw_context, sw_index);
+			   __func__, hw_context, sw_index);
 		goto done;
 	}
 	sc = dd->send_contexts[sw_index].sc;
@@ -1674,8 +1678,8 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
 		sw_index = dd->hw_to_sw[gc];
 		if (unlikely(sw_index >= dd->num_send_contexts)) {
 			dd_dev_err(dd,
-				"%s: invalid hw (%u) to sw (%u) mapping\n",
-				__func__, hw_context, sw_index);
+				   "%s: invalid hw (%u) to sw (%u) mapping\n",
+				   __func__, hw_context, sw_index);
 			continue;
 		}
 		sc_release_update(dd->send_contexts[sw_index].sc);
@@ -1684,11 +1688,217 @@ done:
 	spin_unlock(&dd->sc_lock);
 }
 
+/*
+ * pio_select_send_context_vl() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns a send context based on the selector and a vl.
+ * The mapping fields are protected by RCU
+ */
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl)
+{
+	struct pio_vl_map *m;
+	struct pio_map_elem *e;
+	struct send_context *rval;
+
+	/*
+	 * NOTE This should only happen if SC->VL changed after the initial
+	 * checks on the QP/AH
+	 * Default will return VL0's send context below
+	 */
+	if (unlikely(vl >= num_vls)) {
+		rval = NULL;
+		goto done;
+	}
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->pio_map);
+	if (unlikely(!m)) {
+		rcu_read_unlock();
+		return dd->vld[0].sc;
+	}
+	e = m->map[vl & m->mask];
+	rval = e->ksc[selector & e->mask];
+	rcu_read_unlock();
+
+done:
+	rval = !rval ? dd->vld[0].sc : rval;
+	return rval;
+}
+
+/*
+ * pio_select_send_context_sc() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @sc5: the 5 bit sc
+ *
+ * This function returns an send context based on the selector and an sc
+ */
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5)
+{
+	u8 vl = sc_to_vlt(dd, sc5);
+
+	return pio_select_send_context_vl(dd, selector, vl);
+}
+
+/*
+ * Free the indicated map struct
+ */
+static void pio_map_free(struct pio_vl_map *m)
+{
+	int i;
+
+	for (i = 0; m && i < m->actual_vls; i++)
+		kfree(m->map[i]);
+	kfree(m);
+}
+
+/*
+ * Handle RCU callback
+ */
+static void pio_map_rcu_callback(struct rcu_head *list)
+{
+	struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
+
+	pio_map_free(m);
+}
+
+/*
+ * pio_map_init - called when #vls change
+ * @dd: hfi1_devdata
+ * @port: port number
+ * @num_vls: number of vls
+ * @vl_scontexts: per vl send context mapping (optional)
+ *
+ * This routine changes the mapping based on the number of vls.
+ *
+ * vl_scontexts is used to specify a non-uniform vl/send context
+ * loading. NULL implies auto computing the loading and giving each
+ * VL an uniform distribution of send contexts per VL.
+ *
+ * The auto algorithm computers the sc_per_vl and the number of extra
+ * send contexts. Any extra send contexts are added from the last VL
+ * on down
+ *
+ * rcu locking is used here to control access to the mapping fields.
+ *
+ * If either the num_vls or num_send_contexts are non-power of 2, the
+ * array sizes in the struct pio_vl_map and the struct pio_map_elem are
+ * rounded up to the next highest power of 2 and the first entry is
+ * reused in a round robin fashion.
+ *
+ * If an error occurs the map change is not done and the mapping is not
+ * chaged.
+ *
+ */
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
+{
+	int i, j;
+	int extra, sc_per_vl;
+	int scontext = 1;
+	int num_kernel_send_contexts = 0;
+	u8 lvl_scontexts[OPA_MAX_VLS];
+	struct pio_vl_map *oldmap, *newmap;
+
+	if (!vl_scontexts) {
+		/* send context 0 reserved for VL15 */
+		for (i = 1; i < dd->num_send_contexts; i++)
+			if (dd->send_contexts[i].type == SC_KERNEL)
+				num_kernel_send_contexts++;
+		/* truncate divide */
+		sc_per_vl = num_kernel_send_contexts / num_vls;
+		/* extras */
+		extra = num_kernel_send_contexts % num_vls;
+		vl_scontexts = lvl_scontexts;
+		/* add extras from last vl down */
+		for (i = num_vls - 1; i >= 0; i--, extra--)
+			vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
+	}
+	/* build new map */
+	newmap = kzalloc(sizeof(*newmap) +
+			 roundup_pow_of_two(num_vls) *
+			 sizeof(struct pio_map_elem *),
+			 GFP_KERNEL);
+	if (!newmap)
+		goto bail;
+	newmap->actual_vls = num_vls;
+	newmap->vls = roundup_pow_of_two(num_vls);
+	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	for (i = 0; i < newmap->vls; i++) {
+		/* save for wrap around */
+		int first_scontext = scontext;
+
+		if (i < newmap->actual_vls) {
+			int sz = roundup_pow_of_two(vl_scontexts[i]);
+
+			/* only allocate once */
+			newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
+						 sz * sizeof(struct
+							     send_context *),
+						 GFP_KERNEL);
+			if (!newmap->map[i])
+				goto bail;
+			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
+			/* assign send contexts */
+			for (j = 0; j < sz; j++) {
+				if (dd->kernel_send_context[scontext])
+					newmap->map[i]->ksc[j] =
+					dd->kernel_send_context[scontext];
+				if (++scontext >= first_scontext +
+						  vl_scontexts[i])
+					/* wrap back to first send context */
+					scontext = first_scontext;
+			}
+		} else {
+			/* just re-use entry without allocating */
+			newmap->map[i] = newmap->map[i % num_vls];
+		}
+		scontext = first_scontext + vl_scontexts[i];
+	}
+	/* newmap in hand, save old map */
+	spin_lock_irq(&dd->pio_map_lock);
+	oldmap = rcu_dereference_protected(dd->pio_map,
+					   lockdep_is_held(&dd->pio_map_lock));
+
+	/* publish newmap */
+	rcu_assign_pointer(dd->pio_map, newmap);
+
+	spin_unlock_irq(&dd->pio_map_lock);
+	/* success, free any old map after grace period */
+	if (oldmap)
+		call_rcu(&oldmap->list, pio_map_rcu_callback);
+	return 0;
+bail:
+	/* free any partial allocation */
+	pio_map_free(newmap);
+	return -ENOMEM;
+}
+
+void free_pio_map(struct hfi1_devdata *dd)
+{
+	/* Free PIO map if allocated */
+	if (rcu_access_pointer(dd->pio_map)) {
+		spin_lock_irq(&dd->pio_map_lock);
+		pio_map_free(rcu_access_pointer(dd->pio_map));
+		RCU_INIT_POINTER(dd->pio_map, NULL);
+		spin_unlock_irq(&dd->pio_map_lock);
+		synchronize_rcu();
+	}
+	kfree(dd->kernel_send_context);
+	dd->kernel_send_context = NULL;
+}
+
 int init_pervl_scs(struct hfi1_devdata *dd)
 {
 	int i;
-	u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */
+	u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */
+	u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */
 	u32 ctxt;
+	struct hfi1_pportdata *ppd = dd->pport;
 
 	dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
 				  dd->rcd[0]->rcvhdrqentsize, dd->node);
@@ -1696,6 +1906,12 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 		goto nomem;
 	hfi1_init_ctxt(dd->vld[15].sc);
 	dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
+
+	dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+					sizeof(struct send_context *),
+					GFP_KERNEL, dd->node);
+	dd->kernel_send_context[0] = dd->vld[15].sc;
+
 	for (i = 0; i < num_vls; i++) {
 		/*
 		 * Since this function does not deal with a specific
@@ -1708,12 +1924,19 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 					 dd->rcd[0]->rcvhdrqentsize, dd->node);
 		if (!dd->vld[i].sc)
 			goto nomem;
-
+		dd->kernel_send_context[i + 1] = dd->vld[i].sc;
 		hfi1_init_ctxt(dd->vld[i].sc);
-
 		/* non VL15 start with the max MTU */
 		dd->vld[i].mtu = hfi1_max_mtu;
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		dd->kernel_send_context[i + 1] =
+		sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
+		if (!dd->kernel_send_context[i + 1])
+			goto nomem;
+		hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
+	}
+
 	sc_enable(dd->vld[15].sc);
 	ctxt = dd->vld[15].sc->hw_context;
 	mask = all_vl_mask & ~(1LL << 15);
@@ -1721,17 +1944,29 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 	dd_dev_info(dd,
 		    "Using send context %u(%u) for VL15\n",
 		    dd->vld[15].sc->sw_index, ctxt);
+
 	for (i = 0; i < num_vls; i++) {
 		sc_enable(dd->vld[i].sc);
 		ctxt = dd->vld[i].sc->hw_context;
-		mask = all_vl_mask & ~(1LL << i);
+		mask = all_vl_mask & ~(data_vls_mask);
 		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		sc_enable(dd->kernel_send_context[i + 1]);
+		ctxt = dd->kernel_send_context[i + 1]->hw_context;
+		mask = all_vl_mask & ~(data_vls_mask);
+		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
+	}
+
+	if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
+		goto nomem;
 	return 0;
 nomem:
 	sc_free(dd->vld[15].sc);
 	for (i = 0; i < num_vls; i++)
 		sc_free(dd->vld[i].sc);
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
+		sc_free(dd->kernel_send_context[i + 1]);
 	return -ENOMEM;
 }
 
@@ -1769,11 +2004,11 @@ int init_credit_return(struct hfi1_devdata *dd)
 					bytes,
 					&dd->cr_base[i].pa,
 					GFP_KERNEL);
-		if (dd->cr_base[i].va == NULL) {
+		if (!dd->cr_base[i].va) {
 			set_dev_node(&dd->pcidev->dev, dd->node);
 			dd_dev_err(dd,
-				"Unable to allocate credit return DMA range for NUMA %d\n",
-				i);
+				   "Unable to allocate credit return DMA range for NUMA %d\n",
+				   i);
 			ret = -ENOMEM;
 			goto done;
 		}
@@ -1797,10 +2032,10 @@ void free_credit_return(struct hfi1_devdata *dd)
 	for (i = 0; i < num_numa; i++) {
 		if (dd->cr_base[i].va) {
 			dma_free_coherent(&dd->pcidev->dev,
-				TXE_NUM_CONTEXTS
-					* sizeof(struct credit_return),
-				dd->cr_base[i].va,
-				dd->cr_base[i].pa);
+					  TXE_NUM_CONTEXTS *
+					  sizeof(struct credit_return),
+					  dd->cr_base[i].va,
+					  dd->cr_base[i].pa);
 		}
 	}
 	kfree(dd->cr_base);
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h
index 53d3e0a79375..0026976ce4f6 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/staging/rdma/hfi1/pio.h
@@ -1,14 +1,13 @@
 #ifndef _PIO_H
 #define _PIO_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,7 +47,6 @@
  *
  */
 
-
 /* send context types */
 #define SC_KERNEL 0
 #define SC_ACK    1
@@ -106,6 +102,7 @@ struct send_context {
 	struct hfi1_devdata *dd;		/* device */
 	void __iomem *base_addr;	/* start of PIO memory */
 	union pio_shadow_ring *sr;	/* shadow ring */
+
 	volatile __le64 *hw_free;	/* HW free counter */
 	struct work_struct halt_work;	/* halted context work queue entry */
 	unsigned long flags;		/* flags */
@@ -165,6 +162,112 @@ struct sc_config_sizes {
 	short int count;
 };
 
+/*
+ * The diagram below details the relationship of the mapping structures
+ *
+ * Since the mapping now allows for non-uniform send contexts per vl, the
+ * number of send contexts for a vl is either the vl_scontexts[vl] or
+ * a computation based on num_kernel_send_contexts/num_vls:
+ *
+ * For example:
+ * nactual = vl_scontexts ? vl_scontexts[vl] : num_kernel_send_contexts/num_vls
+ *
+ * n = roundup to next highest power of 2 using nactual
+ *
+ * In the case where there are num_kernel_send_contexts/num_vls doesn't divide
+ * evenly, the extras are added from the last vl downward.
+ *
+ * For the case where n > nactual, the send contexts are assigned
+ * in a round robin fashion wrapping back to the first send context
+ * for a particular vl.
+ *
+ *               dd->pio_map
+ *                    |                                   pio_map_elem[0]
+ *                    |                                +--------------------+
+ *                    v                                |       mask         |
+ *               pio_vl_map                            |--------------------|
+ *      +--------------------------+                   | ksc[0] -> sc 1     |
+ *      |    list (RCU)            |                   |--------------------|
+ *      |--------------------------|                 ->| ksc[1] -> sc 2     |
+ *      |    mask                  |              --/  |--------------------|
+ *      |--------------------------|            -/     |        *           |
+ *      |    actual_vls (max 8)    |          -/       |--------------------|
+ *      |--------------------------|       --/         | ksc[n] -> sc n     |
+ *      |    vls (max 8)           |     -/            +--------------------+
+ *      |--------------------------|  --/
+ *      |    map[0]                |-/
+ *      |--------------------------|                   +--------------------+
+ *      |    map[1]                |---                |       mask         |
+ *      |--------------------------|   \----           |--------------------|
+ *      |           *              |        \--        | ksc[0] -> sc 1+n   |
+ *      |           *              |           \----   |--------------------|
+ *      |           *              |                \->| ksc[1] -> sc 2+n   |
+ *      |--------------------------|                   |--------------------|
+ *      |   map[vls - 1]           |-                  |         *          |
+ *      +--------------------------+ \-                |--------------------|
+ *                                     \-              | ksc[m] -> sc m+n   |
+ *                                       \             +--------------------+
+ *                                        \-
+ *                                          \
+ *                                           \-        +--------------------+
+ *                                             \-      |       mask         |
+ *                                               \     |--------------------|
+ *                                                \-   | ksc[0] -> sc 1+m+n |
+ *                                                  \- |--------------------|
+ *                                                    >| ksc[1] -> sc 2+m+n |
+ *                                                     |--------------------|
+ *                                                     |         *          |
+ *                                                     |--------------------|
+ *                                                     | ksc[o] -> sc o+m+n |
+ *                                                     +--------------------+
+ *
+ */
+
+/* Initial number of send contexts per VL */
+#define INIT_SC_PER_VL 2
+
+/*
+ * struct pio_map_elem - mapping for a vl
+ * @mask - selector mask
+ * @ksc - array of kernel send contexts for this vl
+ *
+ * The mask is used to "mod" the selector to
+ * produce index into the trailing array of
+ * kscs
+ */
+struct pio_map_elem {
+	u32 mask;
+	struct send_context *ksc[0];
+};
+
+/*
+ * struct pio_vl_map - mapping for a vl
+ * @list - rcu head for free callback
+ * @mask - vl mask to "mod" the vl to produce an index to map array
+ * @actual_vls - number of vls
+ * @vls - numbers of vls rounded to next power of 2
+ * @map - array of pio_map_elem entries
+ *
+ * This is the parent mapping structure. The trailing members of the
+ * struct point to pio_map_elem entries, which in turn point to an
+ * array of kscs for that vl.
+ */
+struct pio_vl_map {
+	struct rcu_head list;
+	u32 mask;
+	u8 actual_vls;
+	u8 vls;
+	struct pio_map_elem *map[0];
+};
+
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
+		 u8 *vl_scontexts);
+void free_pio_map(struct hfi1_devdata *dd);
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl);
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5);
+
 /* send context functions */
 int init_credit_return(struct hfi1_devdata *dd);
 void free_credit_return(struct hfi1_devdata *dd);
@@ -183,7 +286,7 @@ void sc_flush(struct send_context *sc);
 void sc_drop(struct send_context *sc);
 void sc_stop(struct send_context *sc, int bit);
 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
-			pio_release_cb cb, void *arg);
+				pio_release_cb cb, void *arg);
 void sc_release_update(struct send_context *sc);
 void sc_return_credits(struct send_context *sc);
 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
@@ -212,12 +315,11 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd);
 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl);
 void pio_send_control(struct hfi1_devdata *dd, int op);
 
-
 /* PIO copy routines */
 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	      const void *from, size_t count);
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-					const void *from, size_t nbytes);
+			const void *from, size_t nbytes);
 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
 void seg_pio_copy_end(struct pio_buf *pbuf);
 
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c
index ebb0bafc68cb..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/staging/rdma/hfi1/pio_copy.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,9 +49,9 @@
 
 /* additive distance between non-SOP and SOP space */
 #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
-#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
+#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
 /* number of QUADWORDs in a block */
-#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
+#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
 
 /**
  * pio_copy - copy data block to MMIO space
@@ -83,11 +80,13 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((count>>1) * sizeof(u64));
+	dend = dest + ((count >> 1) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -152,8 +151,10 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 		writeq(val.val64, dest);
 		dest += sizeof(u64);
 	}
-	/* fill in rest of block, no need to check pbuf->end
-	   as we only wrap on a block boundary */
+	/*
+	 * fill in rest of block, no need to check pbuf->end
+	 * as we only wrap on a block boundary
+	 */
 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
 		writeq(0, dest);
 		dest += sizeof(u64);
@@ -177,7 +178,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
  * "zero" shift - bit shift used to zero out upper bytes.  Input is
  * the count of LSB bytes to preserve.
  */
-#define zshift(x) (8 * (8-(x)))
+#define zshift(x) (8 * (8 - (x)))
 
 /*
  * "merge" shift - bit shift used to merge with carry bytes.  Input is
@@ -196,7 +197,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
  * o nbytes must not span a QW boundary
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	unsigned long off;
 
@@ -223,7 +224,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	unsigned long off = (unsigned long)from & 0x7;
 	unsigned int room, xbytes;
@@ -235,7 +236,7 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
 	while (nbytes) {
 		/* find the number of bytes in this u64 */
 		room = 8 - off;	/* this u64 has room for this many bytes */
-		xbytes = nbytes > room ? room : nbytes;
+		xbytes = min(room, nbytes);
 
 		/*
 		 * shift down to zero lower bytes, shift up to zero upper
@@ -244,7 +245,7 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
 		pbuf->carry.val64 |= (((*(u64 *)from)
 					>> mshift(off))
 					<< zshift(xbytes))
-					>> zshift(xbytes+pbuf->carry_bytes);
+					>> zshift(xbytes + pbuf->carry_bytes);
 		off = 0;
 		pbuf->carry_bytes += xbytes;
 		nbytes -= xbytes;
@@ -362,7 +363,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
  * o from may _not_ be u64 aligned.
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[0], from, nbytes);
 	pbuf->carry_bytes = nbytes;
@@ -377,7 +378,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
 	pbuf->carry_bytes += nbytes;
@@ -411,7 +412,7 @@ static inline void merge_write8(
 
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
 	writeq(pbuf->carry.val64, dest);
-	jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
+	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
 }
 
 /*
@@ -433,7 +434,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
 		u64 zero = 0;
 
 		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-						8 - pbuf->carry_bytes);
+		      8 - pbuf->carry_bytes);
 		writeq(pbuf->carry.val64, dest);
 		return 1;
 	}
@@ -453,7 +454,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
  * @nbytes: bytes to copy
  */
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-				const void *from, size_t nbytes)
+			const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
 	void __iomem *send = dest + PIO_BLOCK_SIZE;
@@ -463,11 +464,13 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -562,10 +565,12 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
-		xend = send < dend ? send : dend;
+		xend = min(send, dend);
 
 		/* shift up to SOP=1 space */
 		dest += SOP_DISTANCE;
@@ -639,13 +644,13 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
  * Must handle nbytes < 8.
  */
 static void mid_copy_straight(struct pio_buf *pbuf,
-						const void *from, size_t nbytes)
+			      const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
 	void __iomem *dend;			/* 8-byte data end */
 
 	/* calculate 8-byte data end */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
 		/*
@@ -656,10 +661,12 @@ static void mid_copy_straight(struct pio_buf *pbuf,
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
-		xend = send < dend ? send : dend;
+		xend = min(send, dend);
 
 		/* shift up to SOP=1 space */
 		dest += SOP_DISTANCE;
@@ -713,7 +720,7 @@ static void mid_copy_straight(struct pio_buf *pbuf,
 	/* we know carry_bytes was zero on entry to this routine */
 	read_low_bytes(pbuf, from, nbytes & 0x7);
 
-	pbuf->qw_written += nbytes>>3;
+	pbuf->qw_written += nbytes >> 3;
 }
 
 /*
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c
new file mode 100644
index 000000000000..0a1d074583e4
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/platform.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "efivar.h"
+
+void get_platform_config(struct hfi1_devdata *dd)
+{
+	int ret = 0;
+	unsigned long size = 0;
+	u8 *temp_platform_config = NULL;
+
+	ret = read_hfi1_efi_var(dd, "configuration", &size,
+				(void **)&temp_platform_config);
+	if (ret) {
+		dd_dev_info(dd,
+			    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+			    __func__);
+		/* fall back to request firmware */
+		platform_config_load = 1;
+		goto bail;
+	}
+
+	dd->platform_config.data = temp_platform_config;
+	dd->platform_config.size = size;
+
+bail:
+	/* exit */;
+}
+
+void free_platform_config(struct hfi1_devdata *dd)
+{
+	if (!platform_config_load) {
+		/*
+		 * was loaded from EFI, release memory
+		 * allocated by read_efi_var
+		 */
+		kfree(dd->platform_config.data);
+	}
+	/*
+	 * else do nothing, dispose_firmware will release
+	 * struct firmware platform_config on driver exit
+	 */
+}
+
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
+{
+	u8 tx_ctrl_byte = on ? 0x0 : 0xF;
+	int ret = 0;
+
+	ret = qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_TX_CTRL_BYTE_OFFS,
+			 &tx_ctrl_byte, 1);
+	/* we expected 1, so consider 0 an error */
+	if (ret == 0)
+		ret = -EIO;
+	else if (ret == 1)
+		ret = 0;
+	return ret;
+}
+
+static int qual_power(struct hfi1_pportdata *ppd)
+{
+	u32 cable_power_class = 0, power_class_max = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret = 0;
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+		SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, &power_class_max, 4);
+	if (ret)
+		return ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	else if (cable_power_class > 4 && cable_power_class > (power_class_max))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	/*
+	 * cable_power_class will never have value 4 as this simply
+	 * means the high power settings are unused
+	 */
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Port disabled due to system power restrictions\n",
+			__func__);
+		ret = -EPERM;
+	}
+	return ret;
+}
+
+static int qual_bitrate(struct hfi1_pportdata *ppd)
+{
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G) &&
+	    cache[QSFP_NOM_BIT_RATE_250_OFFS] < 0x64)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G) &&
+	    cache[QSFP_NOM_BIT_RATE_100_OFFS] < 0x7D)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Cable failed bitrate check, disabling port\n",
+			__func__);
+		return -EPERM;
+	}
+	return 0;
+}
+
+static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
+{
+	u8 cable_power_class = 0, power_ctrl_byte = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class) {
+		power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
+
+		power_ctrl_byte |= 1;
+		power_ctrl_byte &= ~(0x2);
+
+		ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+				 QSFP_PWR_CTRL_BYTE_OFFS,
+				 &power_ctrl_byte, 1);
+		if (ret != 1)
+			return -EIO;
+
+		if (cable_power_class > 3) {
+			/* > power class 4*/
+			power_ctrl_byte |= (1 << 2);
+			ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+					 QSFP_PWR_CTRL_BYTE_OFFS,
+					 &power_ctrl_byte, 1);
+			if (ret != 1)
+				return -EIO;
+		}
+
+		/* SFF 8679 rev 1.7 LPMode Deassert time */
+		msleep(300);
+	}
+	return 0;
+}
+
+static void apply_rx_cdr(struct hfi1_pportdata *ppd,
+			 u32 rx_preset_index,
+			 u8 *cdr_ctrl_byte)
+{
+	u32 rx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
+		return;
+
+	/* rx_preset preset to zero to catch error */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
+		&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR,
+		&rx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	rx_preset = (rx_preset | (rx_preset << 1) |
+			(rx_preset << 2) | (rx_preset << 3));
+
+	if (rx_preset) {
+		*cdr_ctrl_byte |= rx_preset;
+	} else {
+		*cdr_ctrl_byte &= rx_preset;
+		/* Preserve current TX CDR status */
+		*cdr_ctrl_byte |= (cache[QSFP_CDR_CTRL_BYTE_OFFS] & 0xF0);
+	}
+}
+
+static void apply_tx_cdr(struct hfi1_pportdata *ppd,
+			 u32 tx_preset_index,
+			 u8 *ctr_ctrl_byte)
+{
+	u32 tx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
+		return;
+
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, &tx_preset, 4);
+
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR, &tx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	tx_preset = (tx_preset | (tx_preset << 1) |
+			(tx_preset << 2) | (tx_preset << 3));
+
+	if (tx_preset)
+		*ctr_ctrl_byte |= (tx_preset << 4);
+	else
+		/* Preserve current/determined RX CDR status */
+		*ctr_ctrl_byte &= ((tx_preset << 4) | 0xF);
+}
+
+static void apply_cdr_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
+
+	apply_rx_cdr(ppd, rx_preset_index, &cdr_ctrl_byte);
+
+	apply_tx_cdr(ppd, tx_preset_index, &cdr_ctrl_byte);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+		   &cdr_ctrl_byte, 1);
+}
+
+static void apply_tx_eq_auto(struct hfi1_pportdata *ppd)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x8))
+		return;
+	/* Disable adaptive TX EQ if present */
+	tx_eq = cache[(128 * 3) + 241];
+	tx_eq &= 0xF0;
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 241, &tx_eq, 1);
+}
+
+static void apply_tx_eq_prog(struct hfi1_pportdata *ppd, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u32 tx_preset;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x4))
+		return;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ_APPLY,
+		&tx_preset, 4);
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_EQ_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+			tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ,
+			&tx_preset, 4);
+
+	if (((cache[(128 * 3) + 224] & 0xF0) >> 4) < tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX EQ %x unsupported\n",
+			__func__, tx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying EQ %x\n",
+			__func__, cache[608] & 0xF0);
+
+		tx_preset = (cache[608] & 0xF0) >> 4;
+	}
+
+	tx_eq = tx_preset | (tx_preset << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 234, &tx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 235, &tx_eq, 1);
+}
+
+static void apply_rx_eq_emp(struct hfi1_pportdata *ppd, u32 rx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_eq, *cache = ppd->qsfp_info.cache;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x2))
+		return;
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+			rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
+			&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_EMP_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP,
+		&rx_preset, 4);
+
+	if ((cache[(128 * 3) + 224] & 0xF) < rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Requested RX EMP %x\n",
+			__func__, rx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying supported EMP %x\n",
+			__func__, cache[608] & 0xF);
+
+		rx_preset = cache[608] & 0xF;
+	}
+
+	rx_eq = rx_preset | (rx_preset << 4);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 236, &rx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 237, &rx_eq, 1);
+}
+
+static void apply_eq_settings(struct hfi1_pportdata *ppd,
+			      u32 rx_preset_index, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+
+	apply_tx_eq_auto(ppd);
+
+	apply_tx_eq_prog(ppd, tx_preset_index);
+
+	apply_rx_eq_emp(ppd, rx_preset_index);
+}
+
+static void apply_rx_amplitude_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_amp = 0, i = 0, preferred = 0, *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x1)) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
+				  &rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP,
+				  &rx_preset, 4);
+
+	dd_dev_info(ppd->dd,
+		    "%s: Requested RX AMP %x\n",
+		    __func__,
+		    rx_preset);
+
+	for (i = 0; i < 4; i++) {
+		if (cache[(128 * 3) + 225] & (1 << i)) {
+			preferred = i;
+			if (preferred == rx_preset)
+				break;
+		}
+	}
+
+	/*
+	 * Verify that preferred RX amplitude is not just a
+	 * fall through of the default
+	 */
+	if (!preferred && !(cache[(128 * 3) + 225] & 0x1)) {
+		dd_dev_info(ppd->dd, "No supported RX AMP, not applying\n");
+		return;
+	}
+
+	dd_dev_info(ppd->dd,
+		    "%s: Applying RX AMP %x\n", __func__, preferred);
+
+	rx_amp = preferred | (preferred << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 238, &rx_amp, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 239, &rx_amp, 1);
+}
+
+#define OPA_INVALID_INDEX 0xFFF
+
+static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
+			   u32 config_data, const char *message)
+{
+	u8 i;
+	int ret = HCMD_SUCCESS;
+
+	for (i = 0; i < 4; i++) {
+		ret = load_8051_config(ppd->dd, field_id, i, config_data);
+		if (ret != HCMD_SUCCESS) {
+			dd_dev_err(
+				ppd->dd,
+				"%s: %s for lane %u failed\n",
+				message, __func__, i);
+		}
+	}
+}
+
+static void apply_tunings(
+		struct hfi1_pportdata *ppd, u32 tx_preset_index,
+		u8 tuning_method, u32 total_atten, u8 limiting_active)
+{
+	int ret = 0;
+	u32 config_data = 0, tx_preset = 0;
+	u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* Enable external device config if channel is limiting active */
+	read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			 GENERAL_CONFIG, &config_data);
+	config_data |= limiting_active;
+	ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			       GENERAL_CONFIG, config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(
+			ppd->dd,
+			"%s: Failed to set enable external device config\n",
+			__func__);
+
+	config_data = 0; /* re-init  */
+	/* Pass tuning method to 8051 */
+	read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			 &config_data);
+	config_data |= tuning_method;
+	ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			       config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n",
+			   __func__);
+
+	/* Set same channel loss for both TX and RX */
+	config_data = 0 | (total_atten << 16) | (total_atten << 24);
+	apply_tx_lanes(ppd, CHANNEL_LOSS_SETTINGS, config_data,
+		       "Setting channel loss");
+
+	/* Inform 8051 of cable capabilities */
+	if (ppd->qsfp_info.cache_valid) {
+		external_device_config =
+			((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) |
+			((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) |
+			((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) |
+			(cache[QSFP_EQ_INFO_OFFS] & 0x4);
+		ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, &config_data);
+		/* Clear, then set the external device config field */
+		config_data &= ~(0xFF << 24);
+		config_data |= (external_device_config << 24);
+		ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, config_data);
+		if (ret != HCMD_SUCCESS)
+			dd_dev_info(ppd->dd,
+				    "%s: Failed set ext device config params\n",
+				    __func__);
+	}
+
+	if (tx_preset_index == OPA_INVALID_INDEX) {
+		if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
+			dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
+				    __func__);
+		return;
+	}
+
+	/* Following for limiting active channels only */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_PRECUR, &tx_preset, 4);
+	precur = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_ATTN, &tx_preset, 4);
+	attn = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
+	postcur = tx_preset;
+
+	config_data = precur | (attn << 8) | (postcur << 16);
+
+	apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
+		       "Applying TX settings");
+}
+
+static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
+			    u32 *ptr_rx_preset, u32 *ptr_total_atten)
+{
+	int ret;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret) {
+		dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+			   __func__, (int)ppd->dd->hfi1_id);
+		return ret;
+	}
+
+	ppd->qsfp_info.limiting_active = 1;
+
+	ret = set_qsfp_tx(ppd, 0);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_bitrate(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (ppd->qsfp_info.reset_needed) {
+		reset_qsfp(ppd);
+		ppd->qsfp_info.reset_needed = 0;
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+	} else {
+		ppd->qsfp_info.reset_needed = 1;
+	}
+
+	ret = set_qsfp_high_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	} else {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	}
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+		PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
+	if (ret) {
+		*ptr_rx_preset = OPA_INVALID_INDEX;
+		goto bail_unlock;
+	}
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, ptr_total_atten, 4);
+	else if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_12G, ptr_total_atten, 4);
+
+	apply_cdr_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_eq_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	ret = set_qsfp_tx(ppd, 1);
+
+bail_unlock:
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+	return ret;
+}
+
+static int tune_qsfp(struct hfi1_pportdata *ppd,
+		     u32 *ptr_tx_preset, u32 *ptr_rx_preset,
+		     u8 *ptr_tuning_method, u32 *ptr_total_atten)
+{
+	u32 cable_atten = 0, remote_atten = 0, platform_atten = 0;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	int ret = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+	case 0xA ... 0xB:
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G,
+			&platform_atten, 4);
+		if (ret)
+			return ret;
+
+		if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+			cable_atten = cache[QSFP_CU_ATTEN_12G_OFFS];
+		else if ((lss & OPA_LINK_SPEED_12_5G) &&
+			 (lse & OPA_LINK_SPEED_12_5G))
+			cable_atten = cache[QSFP_CU_ATTEN_7G_OFFS];
+
+		/* Fallback to configured attenuation if cable memory is bad */
+		if (cable_atten == 0 || cable_atten > 36) {
+			ret = get_platform_config_field(
+				ppd->dd,
+				PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+				SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G,
+				&cable_atten, 4);
+			if (ret)
+				return ret;
+		}
+
+		ret = get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+		if (ret)
+			return ret;
+
+		*ptr_total_atten = platform_atten + cable_atten + remote_atten;
+
+		*ptr_tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case 0x0 ... 0x9: /* fallthrough */
+	case 0xC: /* fallthrough */
+	case 0xE:
+		ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
+				       ptr_total_atten);
+		if (ret)
+			return ret;
+
+		*ptr_tuning_method = OPA_ACTIVE_TUNING;
+		break;
+	case 0xD: /* fallthrough */
+	case 0xF:
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+			    __func__);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * This function communicates its success or failure via ppd->driver_link_ready
+ * Thus, it depends on its association with start_link(...) which checks
+ * driver_link_ready before proceeding with the link negotiation and
+ * initialization process.
+ */
+void tune_serdes(struct hfi1_pportdata *ppd)
+{
+	int ret = 0;
+	u32 total_atten = 0;
+	u32 remote_atten = 0, platform_atten = 0;
+	u32 rx_preset_index, tx_preset_index;
+	u8 tuning_method = 0, limiting_active = 0;
+	struct hfi1_devdata *dd = ppd->dd;
+
+	rx_preset_index = OPA_INVALID_INDEX;
+	tx_preset_index = OPA_INVALID_INDEX;
+
+	/* the link defaults to enabled */
+	ppd->link_enabled = 1;
+	/* the driver link ready state defaults to not ready */
+	ppd->driver_link_ready = 0;
+	ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
+
+	/* Skip the tuning for testing (loopback != none) and simulations */
+	if (loopback != LOOPBACK_NONE ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		ppd->driver_link_ready = 1;
+		return;
+	}
+
+	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+					PORT_TABLE_PORT_TYPE, &ppd->port_type,
+					4);
+	if (ret)
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+
+	switch (ppd->port_type) {
+	case PORT_TYPE_DISCONNECTED:
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
+		dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+			    __func__);
+		goto bail;
+	case PORT_TYPE_FIXED:
+		/* platform_atten, remote_atten pre-zeroed to catch error */
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, &platform_atten, 4);
+
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+
+		total_atten = platform_atten + remote_atten;
+
+		tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case PORT_TYPE_VARIABLE:
+		if (qsfp_mod_present(ppd)) {
+			/*
+			 * platform_atten, remote_atten pre-zeroed to
+			 * catch error
+			 */
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_LOCAL_ATTEN_25G,
+				&platform_atten, 4);
+
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_REMOTE_ATTEN_25G,
+				&remote_atten, 4);
+
+			total_atten = platform_atten + remote_atten;
+
+			tuning_method = OPA_PASSIVE_TUNING;
+		} else
+			ppd->offline_disabled_reason =
+			     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
+		break;
+	case PORT_TYPE_QSFP:
+		if (qsfp_mod_present(ppd)) {
+			refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+
+			if (ppd->qsfp_info.cache_valid) {
+				ret = tune_qsfp(ppd,
+						&tx_preset_index,
+						&rx_preset_index,
+						&tuning_method,
+						&total_atten);
+
+				/*
+				 * We may have modified the QSFP memory, so
+				 * update the cache to reflect the changes
+				 */
+				refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+				if (ret)
+					goto bail;
+
+				limiting_active =
+						ppd->qsfp_info.limiting_active;
+			} else {
+				dd_dev_err(dd,
+					   "%s: Reading QSFP memory failed\n",
+					   __func__);
+				goto bail;
+			}
+		} else
+			ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+		break;
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+		tuning_method = OPA_UNKNOWN_TUNING;
+		total_atten = 0;
+		limiting_active = 0;
+		tx_preset_index = OPA_INVALID_INDEX;
+		break;
+	}
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+		apply_tunings(ppd, tx_preset_index, tuning_method,
+			      total_atten, limiting_active);
+
+	if (!ret)
+		ppd->driver_link_ready = 1;
+
+	return;
+bail:
+	ppd->driver_link_ready = 0;
+}
diff --git a/drivers/staging/rdma/hfi1/platform_config.h b/drivers/staging/rdma/hfi1/platform.h
index 8a94a8342052..19620cf546d5 100644
--- a/drivers/staging/rdma/hfi1/platform_config.h
+++ b/drivers/staging/rdma/hfi1/platform.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -47,8 +44,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
-#ifndef __PLATFORM_CONFIG_H
-#define __PLATFORM_CONFIG_H
+#ifndef __PLATFORM_H
+#define __PLATFORM_H
 
 #define METADATA_TABLE_FIELD_START_SHIFT		0
 #define METADATA_TABLE_FIELD_START_LEN_BITS		15
@@ -94,17 +91,18 @@ enum platform_config_system_table_fields {
 enum platform_config_port_table_fields {
 	PORT_TABLE_RESERVED,
 	PORT_TABLE_PORT_TYPE,
-	PORT_TABLE_ATTENUATION_12G,
-	PORT_TABLE_ATTENUATION_25G,
+	PORT_TABLE_LOCAL_ATTEN_12G,
+	PORT_TABLE_LOCAL_ATTEN_25G,
 	PORT_TABLE_LINK_SPEED_SUPPORTED,
 	PORT_TABLE_LINK_WIDTH_SUPPORTED,
+	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
+	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
 	PORT_TABLE_VL_CAP,
 	PORT_TABLE_MTU_CAP,
 	PORT_TABLE_TX_LANE_ENABLE_MASK,
 	PORT_TABLE_LOCAL_MAX_TIMEOUT,
-	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
-	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
-	PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU,
+	PORT_TABLE_REMOTE_ATTEN_12G,
+	PORT_TABLE_REMOTE_ATTEN_25G,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
 	PORT_TABLE_RX_PRESET_IDX,
@@ -115,10 +113,10 @@ enum platform_config_port_table_fields {
 enum platform_config_rx_preset_table_fields {
 	RX_PRESET_TABLE_RESERVED,
 	RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
-	RX_PRESET_TABLE_QSFP_RX_EQ_APPLY,
+	RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_CDR,
-	RX_PRESET_TABLE_QSFP_RX_EQ,
+	RX_PRESET_TABLE_QSFP_RX_EMP,
 	RX_PRESET_TABLE_QSFP_RX_AMP,
 	RX_PRESET_TABLE_MAX
 };
@@ -149,6 +147,11 @@ enum platform_config_variable_settings_table_fields {
 	VARIABLE_SETTINGS_TABLE_MAX
 };
 
+struct platform_config {
+	size_t size;
+	const u8 *data;
+};
+
 struct platform_config_data {
 	u32 *table;
 	u32 *table_metadata;
@@ -179,9 +182,11 @@ static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
  * fields defined for each table above
  */
 
-/*=====================================================
+/*
+ * =====================================================
  *  System table encodings
- *====================================================*/
+ * =====================================================
+ */
 #define PLATFORM_CONFIG_MAGIC_NUM		0x3d4f5041
 #define PLATFORM_CONFIG_MAGIC_NUMBER_LEN	4
 
@@ -199,12 +204,13 @@ enum platform_config_qsfp_power_class_encoding {
 	QSFP_POWER_CLASS_7
 };
 
-
-/*=====================================================
+/*
+ * ====================================================
  *  Port table encodings
- *==================================================== */
+ * ====================================================
+ */
 enum platform_config_port_type_encoding {
-	PORT_TYPE_RESERVED,
+	PORT_TYPE_UNKNOWN,
 	PORT_TYPE_DISCONNECTED,
 	PORT_TYPE_FIXED,
 	PORT_TYPE_VARIABLE,
@@ -283,4 +289,16 @@ enum platform_config_local_max_timeout_encoding {
 	LOCAL_MAX_TIMEOUT_1000_S
 };
 
-#endif			/*__PLATFORM_CONFIG_H*/
+enum link_tuning_encoding {
+	OPA_PASSIVE_TUNING,
+	OPA_ACTIVE_TUNING,
+	OPA_UNKNOWN_TUNING
+};
+
+/* platform.c */
+void get_platform_config(struct hfi1_devdata *dd);
+void free_platform_config(struct hfi1_devdata *dd);
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
+void tune_serdes(struct hfi1_pportdata *ppd);
+
+#endif			/*__PLATFORM_H*/
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index ce036810d576..29a5ad28019b 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,31 +51,32 @@
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/seq_file.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
 #include "trace.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
-
-static unsigned int hfi1_qp_table_size = 256;
+unsigned int hfi1_qp_table_size = 256;
 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
-static void flush_tx_list(struct hfi1_qp *qp);
+static void flush_tx_list(struct rvt_qp *qp);
 static int iowait_sleep(
 	struct sdma_engine *sde,
 	struct iowait *wait,
 	struct sdma_txreq *stx,
 	unsigned seq);
 static void iowait_wakeup(struct iowait *wait, int reason);
+static void iowait_sdma_drained(struct iowait *wait);
+static void qp_pio_drain(struct rvt_qp *qp);
 
-static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 
 /*
@@ -118,437 +116,15 @@ static const u16 credit_table[31] = {
 	32768                   /* 1E */
 };
 
-static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map)
-{
-	unsigned long page = get_zeroed_page(GFP_KERNEL);
-
-	/*
-	 * Free the page if someone raced with us installing it.
-	 */
-
-	spin_lock(&qpt->lock);
-	if (map->page)
-		free_page(page);
-	else
-		map->page = (void *)page;
-	spin_unlock(&qpt->lock);
-}
-
-/*
- * Allocate the next available QPN or
- * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
- */
-static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port)
-{
-	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
-	u32 ret;
-
-	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
-		unsigned n;
-
-		ret = type == IB_QPT_GSI;
-		n = 1 << (ret + 2 * (port - 1));
-		spin_lock(&qpt->lock);
-		if (qpt->flags & n)
-			ret = -EINVAL;
-		else
-			qpt->flags |= n;
-		spin_unlock(&qpt->lock);
-		goto bail;
-	}
-
-	qpn = qpt->last + qpt->incr;
-	if (qpn >= QPN_MAX)
-		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
-	/* offset carries bit 0 */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
-	max_scan = qpt->nmaps - !offset;
-	for (i = 0;;) {
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page))
-				break;
-		}
-		do {
-			if (!test_and_set_bit(offset, map->page)) {
-				qpt->last = qpn;
-				ret = qpn;
-				goto bail;
-			}
-			offset += qpt->incr;
-			/*
-			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
-			 * That is OK.   It gets re-assigned below
-			 */
-			qpn = mk_qpn(qpt, map, offset);
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-		/*
-		 * In order to keep the number of pages allocated to a
-		 * minimum, we scan the all existing pages before increasing
-		 * the size of the bitmap table.
-		 */
-		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
-				break;
-			map = &qpt->map[qpt->nmaps++];
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else if (map < &qpt->map[qpt->nmaps]) {
-			++map;
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else {
-			map = &qpt->map[0];
-			/* wrap to first map page, invert bit 0 */
-			offset = qpt->incr | ((offset & 1) ^ 1);
-		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (dd->qos_shift - 1));
-		qpn = mk_qpn(qpt, map, offset);
-	}
-
-	ret = -ENOMEM;
-
-bail:
-	return ret;
-}
-
-static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num <= 1) {
-		rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp);
-	} else {
-		u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-
-		qp->next = dev->qp_dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_dev->qp_table[n], qp);
-		trace_hfi1_qpinsert(qp, n);
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp[0],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[0], NULL);
-	} else if (rcu_dereference_protected(ibp->qp[1],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[1], NULL);
-	} else {
-		struct hfi1_qp *q;
-		struct hfi1_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-					!= NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-				 rcu_dereference_protected(qp->next,
-				 lockdep_is_held(&dev->qp_dev->qpt_lock)));
-				removed = 1;
-				trace_hfi1_qpremove(qp, n);
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-}
-
-/**
- * free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-static unsigned free_all_qps(struct hfi1_devdata *dd)
-{
-	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct hfi1_qp *qp;
-	unsigned n, qp_inuse = 0;
-
-	for (n = 0; n < dd->num_pports; n++) {
-		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
-
-		if (!hfi1_mcast_tree_empty(ibp))
-			qp_inuse++;
-		rcu_read_lock();
-		if (rcu_dereference(ibp->qp[0]))
-			qp_inuse++;
-		if (rcu_dereference(ibp->qp[1]))
-			qp_inuse++;
-		rcu_read_unlock();
-	}
-
-	if (!dev->qp_dev)
-		goto bail;
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_dev->qp_table[n],
-			lockdep_is_held(&dev->qp_dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	synchronize_rcu();
-bail:
-	return qp_inuse;
-}
-
-/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	iowait_init(
-		&qp->s_iowait,
-		1,
-		hfi1_do_send,
-		iowait_sleep,
-		iowait_wakeup);
-	qp->s_flags &= HFI1_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_adefered = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	clear_ahg(qp);
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-		hfi1_put_ss(&qp->s_rdma_read_sge);
-
-	hfi1_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct hfi1_sge *sge = &wqe->sg_list[i];
-
-				hfi1_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
-
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct hfi1_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
-}
-
-/**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err)
+static void flush_tx_list(struct rvt_qp *qp)
 {
-	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
-
-	if (qp->s_flags & HFI1_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND;
-
-	write_seqlock(&dev->iowait_lock);
-	if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
-		list_del_init(&qp->s_iowait.list);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-	write_sequnlock(&dev->iowait_lock);
-
-	if (!(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_hdrwords = 0;
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-		flush_tx_list(qp);
-	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		hfi1_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	if (qp->r_rq.wq) {
-		struct hfi1_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
-}
-
-static void flush_tx_list(struct hfi1_qp *qp)
-{
-	while (!list_empty(&qp->s_iowait.tx_head)) {
+	while (!list_empty(&priv->s_iowait.tx_head)) {
 		struct sdma_txreq *tx;
 
 		tx = list_first_entry(
-			&qp->s_iowait.tx_head,
+			&priv->s_iowait.tx_head,
 			struct sdma_txreq,
 			list);
 		list_del_init(&tx->list);
@@ -557,14 +133,15 @@ static void flush_tx_list(struct hfi1_qp *qp)
 	}
 }
 
-static void flush_iowait(struct hfi1_qp *qp)
+static void flush_iowait(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	unsigned long flags;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
-	if (!list_empty(&qp->s_iowait.list)) {
-		list_del_init(&qp->s_iowait.list);
+	if (!list_empty(&priv->s_iowait.list)) {
+		list_del_init(&priv->s_iowait.list);
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	}
@@ -597,362 +174,106 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 	return ib_mtu_enum_to_int(mtu);
 }
 
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata)
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata)
 {
+	struct ib_qp *ibqp = &qp->ibqp;
 	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
 	struct hfi1_devdata *dd = dd_from_dev(dev);
-
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
+	u8 sc;
 
 	if (attr_mask & IB_QP_AV) {
-		u8 sc;
-
-		if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
+			return -EINVAL;
+
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
-		u8 sc;
-
-		if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
+			return -EINVAL;
 
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > HFI1_QPN_MASK)
-			goto inval;
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
+	}
 
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
+	return 0;
+}
 
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		int mtu, pidx = qp->port_num - 1;
-
-		dd = dd_from_dev(dev);
-		mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-
-		if (mtu > dd->pport[pidx].ibmtu)
-			pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
-		else
-			pmtu = attr->path_mtu;
+	if (attr_mask & IB_QP_AV) {
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 	}
 
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
+	if (attr_mask & IB_QP_PATH_MIG_STATE &&
+	    attr->path_mig_state == IB_MIG_MIGRATED &&
+	    qp->s_mig_state == IB_MIG_ARMED) {
+		qp->s_flags |= RVT_S_AHG_CLEAR;
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 	}
+}
 
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			flush_iowait(qp);
-			qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_iowait.iowork);
-			del_timer_sync(&qp->s_timer);
-			iowait_sdma_drain(&qp->s_iowait);
-			flush_tx_list(qp);
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			clear_ahg(qp);
-			reset_qp(qp, ibqp->qp_type);
-		}
-		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to re-trigger if QP set to RTR more than once */
-		qp->r_flags &= ~HFI1_R_COMM_EST;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
-		break;
+/**
+ * hfi1_check_send_wqe - validate wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 0 on success, -EINVAL on failure
+ *
+ */
+int hfi1_check_send_wqe(struct rvt_qp *qp,
+			struct rvt_swqe *wqe)
+{
+	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct rvt_ah *ah;
 
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
 		break;
-
-	case IB_QPS_ERR:
-		lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	case IB_QPT_SMI:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
 		break;
-
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+			return -EINVAL;
 	default:
-		qp->state = new_state;
 		break;
 	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
-		qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-		qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-			qp->s_flags |= HFI1_S_AHG_CLEAR;
-			qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-			qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct hfi1_ibport *ibp;
-		u8 sc, vl;
-		u32 mtu;
-
-		dd = dd_from_dev(dev);
-		ibp = &dd->pport[qp->port_num - 1].ibport_data;
-
-		sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
-		vl = sc_to_vlt(dd, sc);
-
-		mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
-		if (vl < PER_VL_SEND_CONTEXTS)
-			mtu = min_t(u32, mtu, dd->vld[vl].mtu);
-		pmtu = mtu_to_enum(mtu, OPA_MTU_8192);
-
-		qp->path_mtu = pmtu;
-		qp->pmtu = mtu;
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = mask_psn(qp->r_psn);
-	attr->sq_psn = mask_psn(qp->s_next_psn);
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
-
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+	return wqe->length <= piothreshold;
 }
 
 /**
@@ -961,7 +282,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp)
+__be32 hfi1_compute_aeth(struct rvt_qp *qp)
 {
 	u32 aeth = qp->r_msn & HFI1_MSN_MASK;
 
@@ -974,7 +295,7 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
-		struct hfi1_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 tail;
 
@@ -1004,12 +325,13 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 			x = (min + max) / 2;
 			if (credit_table[x] == credits)
 				break;
-			if (credit_table[x] > credits)
+			if (credit_table[x] > credits) {
 				max = x;
-			else if (min == x)
-				break;
-			else
+			} else {
+				if (min == x)
+					break;
 				min = x;
+			}
 		}
 		aeth |= x << HFI1_AETH_CREDIT_SHIFT;
 	}
@@ -1017,348 +339,58 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 }
 
 /**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata)
-{
-	struct hfi1_qp *qp;
-	int err;
-	struct hfi1_swqe *swq = NULL;
-	struct hfi1_ibdev *dev;
-	struct hfi1_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-
-	if (init_attr->cap.max_send_sge > hfi1_max_sges ||
-	    init_attr->cap.max_send_wr > hfi1_max_qp_wrs ||
-	    init_attr->create_flags) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > hfi1_max_sges ||
-		    init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	}
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct hfi1_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct hfi1_swqe);
-		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct hfi1_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct hfi1_rwqe);
-			qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) +
-						   qp->r_rq.size * sz);
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = HFI1_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		reset_qp(qp, init_attr->qp_type);
-
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = hfi1_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == hfi1_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-
-	/*
-	 * We have our QP and its good, now keep track of what types of opcodes
-	 * can be processed on this QP. We do this by keeping track of what the
-	 * 3 high order bits of the opcode are.
-	 */
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-	case IB_QPT_UD:
-		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_RC:
-		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_UC:
-		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	default:
-		ret = ERR_PTR(-EINVAL);
-		goto bail_ip;
-	}
-
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
-}
-
-/**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
+ * _hfi1_schedule_send - schedule progress
+ * @qp: the QP
  *
- * Returns 0 on success.
+ * This schedules qp progress w/o regard to the s_flags.
  *
- * Note that this can be called while the QP is actively sending or
- * receiving!
+ * It is only used in the post send, which doesn't hold
+ * the s_lock.
  */
-int hfi1_destroy_qp(struct ib_qp *ibqp)
+void _hfi1_schedule_send(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		flush_iowait(qp);
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-		spin_unlock(&qp->s_lock);
-		spin_unlock_irq(&qp->r_lock);
-		cancel_work_sync(&qp->s_iowait.iowork);
-		del_timer_sync(&qp->s_timer);
-		iowait_sdma_drain(&qp->s_iowait);
-		flush_tx_list(qp);
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		spin_lock_irq(&qp->r_lock);
-		spin_lock(&qp->s_lock);
-		clear_mr_refs(qp, 1);
-		clear_ahg(qp);
-	}
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
+	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+			priv->s_sde ?
+			priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(dd->node)));
 }
 
-/**
- * init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt)
+static void qp_pio_drain(struct rvt_qp *qp)
 {
-	u32 offset, qpn, i;
-	struct qpn_map *map;
-	int ret = 0;
+	struct hfi1_ibdev *dev;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	spin_lock_init(&qpt->lock);
-
-	qpt->last = 0;
-	qpt->incr = 1 << dd->qos_shift;
-
-	/* insure we don't assign QPs from KDETH 64K window */
-	qpn = kdeth_qp << 16;
-	qpt->nmaps = qpn / BITS_PER_PAGE;
-	/* This should always be zero */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpt->nmaps];
-	dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n",
-		qpn, qpn + 65535);
-	for (i = 0; i < 65536; i++) {
-		if (!map->page) {
-			get_map_page(qpt, map);
-			if (!map->page) {
-				ret = -ENOMEM;
-				break;
-			}
-		}
-		set_bit(offset, map->page);
-		offset++;
-		if (offset == BITS_PER_PAGE) {
-			/* next page */
-			qpt->nmaps++;
-			map++;
-			offset = 0;
-		}
+	if (!priv->s_sendcontext)
+		return;
+	dev = to_idev(qp->ibqp.device);
+	while (iowait_pio_pending(&priv->s_iowait)) {
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
+		write_sequnlock_irq(&dev->iowait_lock);
+		iowait_pio_drain(&priv->s_iowait);
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
+		write_sequnlock_irq(&dev->iowait_lock);
 	}
-	return ret;
 }
 
 /**
- * free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
+ * hfi1_schedule_send - schedule progress
+ * @qp: the QP
+ *
+ * This schedules qp progress and caller should hold
+ * the s_lock.
  */
-static void free_qpn_table(struct hfi1_qpn_table *qpt)
+void hfi1_schedule_send(struct rvt_qp *qp)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		free_page((unsigned long) qpt->map[i].page);
+	if (hfi1_send_ok(qp))
+		_hfi1_schedule_send(qp);
 }
 
 /**
@@ -1368,7 +400,7 @@ static void free_qpn_table(struct hfi1_qpn_table *qpt)
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 	u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
 
@@ -1378,27 +410,27 @@ void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
 	 * honor the credit field.
 	 */
 	if (credit == HFI1_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= HFI1_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
-	} else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
 		if (cmp_msn(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
 	}
 }
 
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag)
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 {
 	unsigned long flags;
 
@@ -1421,16 +453,17 @@ static int iowait_sleep(
 	unsigned seq)
 {
 	struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	int ret = 0;
 	struct hfi1_ibdev *dev;
 
 	qp = tx->qp;
+	priv = qp->priv;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
-
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		/*
 		 * If we couldn't queue the DMA request, save the info
 		 * and try again later rather than destroying the
@@ -1442,18 +475,18 @@ static int iowait_sleep(
 		write_seqlock(&dev->iowait_lock);
 		if (sdma_progress(sde, seq, stx))
 			goto eagain;
-		if (list_empty(&qp->s_iowait.list)) {
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibport *ibp =
 				to_iport(qp->ibqp.device, qp->port_num);
 
-			ibp->n_dmawait++;
-			qp->s_flags |= HFI1_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->s_iowait.list, &sde->dmawait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ret = -EBUSY;
 	} else {
@@ -1470,61 +503,25 @@ eagain:
 
 static void iowait_wakeup(struct iowait *wait, int reason)
 {
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
 	WARN_ON(reason != SDMA_AVAIL_REASON);
-	hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC);
+	hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
 }
 
-int hfi1_qp_init(struct hfi1_ibdev *dev)
+static void iowait_sdma_drained(struct iowait *wait)
 {
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	int i;
-	int ret = -ENOMEM;
-
-	/* allocate parent object */
-	dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL);
-	if (!dev->qp_dev)
-		goto nomem;
-	/* allocate hash table */
-	dev->qp_dev->qp_table_size = hfi1_qp_table_size;
-	dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size);
-	dev->qp_dev->qp_table =
-		kmalloc(dev->qp_dev->qp_table_size *
-				sizeof(*dev->qp_dev->qp_table),
-			GFP_KERNEL);
-	if (!dev->qp_dev->qp_table)
-		goto nomem;
-	for (i = 0; i < dev->qp_dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL);
-	spin_lock_init(&dev->qp_dev->qpt_lock);
-	/* initialize qpn map */
-	ret = init_qpn_table(dd, &dev->qp_dev->qpn_table);
-	if (ret)
-		goto nomem;
-	return ret;
-nomem:
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
-	}
-	return ret;
-}
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
-void hfi1_qp_exit(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	u32 qps_inuse;
-
-	qps_inuse = free_all_qps(dd);
-	if (qps_inuse)
-		dd_dev_err(dd, "QP memory leak! %u still in use\n",
-			   qps_inuse);
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
+	/*
+	 * This happens when the send engine notes
+	 * a QP in the error state and cannot
+	 * do the flush work until that QP's
+	 * sdma work has finished.
+	 */
+	if (qp->s_flags & RVT_S_WAIT_DMA) {
+		qp->s_flags &= ~RVT_S_WAIT_DMA;
+		hfi1_schedule_send(qp);
 	}
 }
 
@@ -1537,7 +534,7 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev)
  * Return:
  * A send engine for the qp or NULL for SMI type qp.
  */
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct sdma_engine *sde;
@@ -1554,9 +551,33 @@ struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
 	return sde;
 }
 
+/*
+ * qp_to_send_context - map a qp to a send context
+ * @qp: the QP
+ * @sc5: the 5 bit sc
+ *
+ * Return:
+ * A send context for the qp
+ */
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		/* SMA packets to VL15 */
+		return dd->vld[15].sc;
+	default:
+		break;
+	}
+
+	return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
+					  sc5);
+}
+
 struct qp_iter {
 	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	int specials;
 	int n;
 };
@@ -1570,7 +591,7 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
 		return NULL;
 
 	iter->dev = dev;
-	iter->specials = dev->ibdev.phys_port_cnt * 2;
+	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
 	if (qp_iter_next(iter)) {
 		kfree(iter);
 		return NULL;
@@ -1584,8 +605,8 @@ int qp_iter_next(struct qp_iter *iter)
 	struct hfi1_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int ret = 1;
-	struct hfi1_qp *pqp = iter->qp;
-	struct hfi1_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
 	/*
 	 * The approach is to consider the special qps
@@ -1597,11 +618,11 @@ int qp_iter_next(struct qp_iter *iter)
 	 *
 	 * n = 0..iter->specials is the special qp indices
 	 *
-	 * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are
+	 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
 	 * the potential hash bucket entries
 	 *
 	 */
-	for (; n <  dev->qp_dev->qp_table_size + iter->specials; n++) {
+	for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
 		if (pqp) {
 			qp = rcu_dereference(pqp->next);
 		} else {
@@ -1610,17 +631,17 @@ int qp_iter_next(struct qp_iter *iter)
 				struct hfi1_ibport *ibp;
 				int pidx;
 
-				pidx = n % dev->ibdev.phys_port_cnt;
+				pidx = n % dev->rdi.ibdev.phys_port_cnt;
 				ppd = &dd_from_dev(dev)->pport[pidx];
 				ibp = &ppd->ibport_data;
 
 				if (!(n & 1))
-					qp = rcu_dereference(ibp->qp[0]);
+					qp = rcu_dereference(ibp->rvp.qp[0]);
 				else
-					qp = rcu_dereference(ibp->qp[1]);
+					qp = rcu_dereference(ibp->rvp.qp[1]);
 			} else {
 				qp = rcu_dereference(
-					dev->qp_dev->qp_table[
+					dev->rdi.qp_dev->qp_table[
 						(n - iter->specials)]);
 			}
 		}
@@ -1638,7 +659,7 @@ static const char * const qp_type_str[] = {
 	"SMI", "GSI", "RC", "UC", "UD",
 };
 
-static int qp_idle(struct hfi1_qp *qp)
+static int qp_idle(struct rvt_qp *qp)
 {
 	return
 		qp->s_last == qp->s_acked &&
@@ -1649,14 +670,17 @@ static int qp_idle(struct hfi1_qp *qp)
 
 void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 {
-	struct hfi1_swqe *wqe;
-	struct hfi1_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct sdma_engine *sde;
+	struct send_context *send_context;
 
-	sde = qp_to_sdma_engine(qp, qp->s_sc);
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	sde = qp_to_sdma_engine(qp, priv->s_sc);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+	send_context = qp_to_send_context(qp, priv->s_sc);
 	seq_printf(s,
-		   "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n",
+		   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
 		   iter->n,
 		   qp_idle(qp) ? "I" : "B",
 		   qp->ibqp.qp_num,
@@ -1666,8 +690,9 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 		   wqe ? wqe->wr.opcode : 0,
 		   qp->s_hdrwords,
 		   qp->s_flags,
-		   atomic_read(&qp->s_iowait.sdma_busy),
-		   !list_empty(&qp->s_iowait.list),
+		   iowait_sdma_pending(&priv->s_iowait),
+		   iowait_pio_pending(&priv->s_iowait),
+		   !list_empty(&priv->s_iowait.list),
 		   qp->timeout,
 		   wqe ? wqe->ssn : 0,
 		   qp->s_lsn,
@@ -1676,20 +701,26 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 		   qp->s_sending_psn, qp->s_sending_hpsn,
 		   qp->s_last, qp->s_acked, qp->s_cur,
 		   qp->s_tail, qp->s_head, qp->s_size,
+		   qp->s_avail,
 		   qp->remote_qpn,
 		   qp->remote_ah_attr.dlid,
 		   qp->remote_ah_attr.sl,
 		   qp->pmtu,
+		   qp->s_retry,
 		   qp->s_retry_cnt,
-		   qp->timeout,
 		   qp->s_rnr_retry_cnt,
 		   sde,
-		   sde ? sde->this_idx : 0);
+		   sde ? sde->this_idx : 0,
+		   send_context,
+		   send_context ? send_context->sw_index : 0,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+		   qp->pid);
 }
 
-void qp_comm_est(struct hfi1_qp *qp)
+void qp_comm_est(struct rvt_qp *qp)
 {
-	qp->r_flags |= HFI1_R_COMM_EST;
+	qp->r_flags |= RVT_R_COMM_EST;
 	if (qp->ibqp.event_handler) {
 		struct ib_event ev;
 
@@ -1700,24 +731,241 @@ void qp_comm_est(struct hfi1_qp *qp)
 	}
 }
 
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp)
+{
+	struct hfi1_qp_priv *priv;
+
+	priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->owner = qp;
+
+	priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
+	}
+	setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
+	qp->s_timer.function = hfi1_rc_timeout;
+	return priv;
+}
+
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	kfree(priv->s_hdr);
+	kfree(priv);
+}
+
+unsigned free_all_qps(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	int n;
+	unsigned qp_inuse = 0;
+
+	for (n = 0; n < dd->num_pports; n++) {
+		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
+
+		rcu_read_lock();
+		if (rcu_dereference(ibp->rvp.qp[0]))
+			qp_inuse++;
+		if (rcu_dereference(ibp->rvp.qp[1]))
+			qp_inuse++;
+		rcu_read_unlock();
+	}
+
+	return qp_inuse;
+}
+
+void flush_qp_waiters(struct rvt_qp *qp)
+{
+	flush_iowait(qp);
+	hfi1_stop_rc_timers(qp);
+}
+
+void stop_send_queue(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	cancel_work_sync(&priv->s_iowait.iowork);
+	hfi1_del_timers_sync(qp);
+}
+
+void quiesce_qp(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_sdma_drain(&priv->s_iowait);
+	qp_pio_drain(qp);
+	flush_tx_list(qp);
+}
+
+void notify_qp_reset(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_init(
+		&priv->s_iowait,
+		1,
+		_hfi1_do_send,
+		iowait_sleep,
+		iowait_wakeup,
+		iowait_sdma_drained);
+	priv->r_adefered = 0;
+	clear_ahg(qp);
+}
+
 /*
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  */
-void hfi1_migrate_qp(struct hfi1_qp *qp)
+void hfi1_migrate_qp(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct ib_event ev;
 
 	qp->s_mig_state = IB_MIG_MIGRATED;
 	qp->remote_ah_attr = qp->alt_ah_attr;
 	qp->port_num = qp->alt_ah_attr.port_num;
 	qp->s_pkey_index = qp->s_alt_pkey_index;
-	qp->s_flags |= HFI1_S_AHG_CLEAR;
-	qp->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
+	qp->s_flags |= RVT_S_AHG_CLEAR;
+	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 
 	ev.device = qp->ibqp.device;
 	ev.element.qp = &qp->ibqp;
 	ev.event = IB_EVENT_PATH_MIG;
 	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 }
+
+int mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu, OPA_MTU_8192);
+}
+
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	u32 mtu;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	struct hfi1_ibport *ibp;
+	u8 sc, vl;
+
+	ibp = &dd->pport[qp->port_num - 1].ibport_data;
+	sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+	vl = sc_to_vlt(dd, sc);
+
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
+	if (vl < PER_VL_SEND_CONTEXTS)
+		mtu = min_t(u32, mtu, dd->vld[vl].mtu);
+	return mtu;
+}
+
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr)
+{
+	int mtu, pidx = qp->port_num - 1;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
+	if (mtu == -1)
+		return -1; /* values less than 0 are error */
+
+	if (mtu > dd->pport[pidx].ibmtu)
+		return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
+	else
+		return attr->path_mtu;
+}
+
+void notify_error_qp(struct rvt_qp *qp)
+{
+	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	write_seqlock(&dev->iowait_lock);
+	if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->s_iowait.list);
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+	write_sequnlock(&dev->iowait_lock);
+
+	if (!(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_hdrwords = 0;
+		if (qp->s_rdma_mr) {
+			rvt_put_mr(qp->s_rdma_mr);
+			qp->s_rdma_mr = NULL;
+		}
+		flush_tx_list(qp);
+	}
+}
+
+/**
+ * hfi1_error_port_qps - put a port's RC/UC qps into error state
+ * @ibp: the ibport.
+ * @sl: the service level.
+ *
+ * This function places all RC/UC qps with a given service level into error
+ * state. It is generally called to force upper lay apps to abandon stale qps
+ * after an sl->sc mapping change.
+ */
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
+{
+	struct rvt_qp *qp = NULL;
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
+	int n;
+	int lastwqe;
+	struct ib_event ev;
+
+	rcu_read_lock();
+
+	/* Deal only with RC/UC qps that use the given SL. */
+	for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
+		for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next)) {
+			if (qp->port_num == ppd->port &&
+			    (qp->ibqp.qp_type == IB_QPT_UC ||
+			     qp->ibqp.qp_type == IB_QPT_RC) &&
+			    qp->remote_ah_attr.sl == sl &&
+			    (ib_rvt_state_ops[qp->state] &
+			     RVT_POST_SEND_OK)) {
+				spin_lock_irq(&qp->r_lock);
+				spin_lock(&qp->s_hlock);
+				spin_lock(&qp->s_lock);
+				lastwqe = rvt_error_qp(qp,
+						       IB_WC_WR_FLUSH_ERR);
+				spin_unlock(&qp->s_lock);
+				spin_unlock(&qp->s_hlock);
+				spin_unlock_irq(&qp->r_lock);
+				if (lastwqe) {
+					ev.device = qp->ibqp.device;
+					ev.element.qp = &qp->ibqp;
+					ev.event =
+						IB_EVENT_QP_LAST_WQE_REACHED;
+					qp->ibqp.event_handler(&ev,
+						qp->ibqp.qp_context);
+				}
+			}
+		}
+	}
+
+	rcu_read_unlock();
+}
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h
index 62a94c5d7dca..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/staging/rdma/hfi1/qp.h
@@ -1,14 +1,13 @@
 #ifndef _QP_H
 #define _QP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,119 +48,33 @@
  */
 
 #include <linux/hash.h>
+#include <rdma/rdmavt_qp.h>
 #include "verbs.h"
 #include "sdma.h"
 
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+extern unsigned int hfi1_qp_table_size;
 
 /*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	void *page;
-};
-
-struct hfi1_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u8  incr;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct hfi1_qp_ibdev {
-	u32 qp_table_size;
-	u32 qp_table_bits;
-	struct hfi1_qp __rcu **qp_table;
-	spinlock_t qpt_lock;
-	struct hfi1_qpn_table qpn_table;
-};
-
-static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn)
-{
-	return hash_32(qpn, dev->qp_table_bits);
-}
-
-/**
- * hfi1_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
+ * free_ahg - clear ahg from QP
  */
-static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp,
-				u32 qpn) __must_hold(RCU)
+static inline void clear_ahg(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp = NULL;
-
-	if (unlikely(qpn <= 1)) {
-		qp = rcu_dereference(ibp->qp[qpn]);
-	} else {
-		struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		u32 n = qpn_hash(dev->qp_dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn)
-				break;
-	}
-	return qp;
-}
+	struct hfi1_qp_priv *priv = qp->priv;
 
-/**
- * clear_ahg - reset ahg status in qp
- * @qp - qp pointer
- */
-static inline void clear_ahg(struct hfi1_qp *qp)
-{
-	qp->s_hdr->ahgcount = 0;
-	qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
-	if (qp->s_sde && qp->s_ahgidx >= 0)
-		sdma_ahg_free(qp->s_sde, qp->s_ahgidx);
+	priv->s_hdr->ahgcount = 0;
+	qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+	if (priv->s_sde && qp->s_ahgidx >= 0)
+		sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
 	qp->s_ahgidx = -1;
 }
 
 /**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err);
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata);
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-/**
  * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
  * @qp: the queue pair to compute the AETH for
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp);
+__be32 hfi1_compute_aeth(struct rvt_qp *qp);
 
 /**
  * hfi1_create_qp - create a queue pair for a device
@@ -179,45 +90,23 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
 			     struct ib_qp_init_attr *init_attr,
 			     struct ib_udata *udata);
 /**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int hfi1_destroy_qp(struct ib_qp *ibqp);
-
-/**
  * hfi1_get_credit - flush the send work queue of a QP
  * @qp: the qp who's send work queue to flush
  * @aeth: the Acknowledge Extended Transport Header
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth);
-
-/**
- * hfi1_qp_init - allocate QP tables
- * @dev: a pointer to the hfi1_ibdev
- */
-int hfi1_qp_init(struct hfi1_ibdev *dev);
-
-/**
- * hfi1_qp_exit - free the QP related structures
- * @dev: a pointer to the hfi1_ibdev
- */
-void hfi1_qp_exit(struct hfi1_ibdev *dev);
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
 
 /**
  * hfi1_qp_wakeup - wake up on the indicated event
  * @qp: the QP
  * @flag: flag the qp on which the qp is stalled
  */
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag);
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag);
 
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5);
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5);
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
 
 struct qp_iter;
 
@@ -244,43 +133,28 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
  * qp_comm_est - handle trap with QP established
  * @qp: the QP
  */
-void qp_comm_est(struct hfi1_qp *qp);
+void qp_comm_est(struct rvt_qp *qp);
 
-/**
- * _hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress w/o regard to the s_flags.
- *
- * It is only used in the post send, which doesn't hold
- * the s_lock.
- */
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp =
-		to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+void _hfi1_schedule_send(struct rvt_qp *qp);
+void hfi1_schedule_send(struct rvt_qp *qp);
 
-	iowait_schedule(&qp->s_iowait, ppd->hfi1_wq,
-			qp->s_sde ?
-			qp->s_sde->cpu :
-			cpumask_first(cpumask_of_node(dd->assigned_node_id)));
-}
-
-/**
- * hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress and caller should hold
- * the s_lock.
- */
-static inline void hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	if (hfi1_send_ok(qp))
-		_hfi1_schedule_send(qp);
-}
-
-void hfi1_migrate_qp(struct hfi1_qp *qp);
+void hfi1_migrate_qp(struct rvt_qp *qp);
 
+/*
+ * Functions provided by hfi1 driver for rdmavt to use
+ */
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp);
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+unsigned free_all_qps(struct rvt_dev_info *rdi);
+void notify_qp_reset(struct rvt_qp *qp);
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr);
+void flush_qp_waiters(struct rvt_qp *qp);
+void notify_error_qp(struct rvt_qp *qp);
+void stop_send_queue(struct rvt_qp *qp);
+void quiesce_qp(struct rvt_qp *qp);
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+int mtu_to_path_mtu(u32 mtu);
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
 #endif /* _QP_H */
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c
index 6326a915d7fd..9ed1963010fe 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/staging/rdma/hfi1/qsfp.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -62,7 +59,7 @@
 #define I2C_MAX_RETRY 4
 
 /*
- * Unlocked i2c write.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c write.  No set-up or lock checking.
  */
 static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		       int offset, void *bp, int len)
@@ -71,14 +68,6 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	int ret, cnt;
 	u8 *buff = bp;
 
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for write failed\n");
-		return -EIO;
-	}
-
 	cnt = 0;
 	while (cnt < len) {
 		int wlen = len - cnt;
@@ -99,48 +88,45 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	return cnt;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	      void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d write interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
 }
 
 /*
- * Unlocked i2c read.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c read.  No set-up or lock checking.
  */
 static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		      int offset, void *bp, int len)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	int ret, cnt, pass = 0;
-	int stuck = 0;
-	u8 *buff = bp;
-
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for read failed\n");
-		ret = -EIO;
-		stuck = 1;
-		goto exit;
-	}
+	int orig_offset = offset;
 
 	cnt = 0;
 	while (cnt < len) {
 		int rlen = len - cnt;
 
 		ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
-				       buff + cnt, rlen);
+				       bp + cnt, rlen);
 		/* Some QSFP's fail first try. Retry as experiment */
 		if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
 			continue;
@@ -156,14 +142,11 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	ret = cnt;
 
 exit:
-	if (stuck)
-		dd_dev_err(dd, "I2C interface bus stuck non-idle\n");
-
-	if (pass >= I2C_MAX_RETRY && ret)
+	if (ret < 0) {
 		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C failed even retrying\n");
-	else if (pass)
-		hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass);
+				 "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
+				 target, i2c_addr, orig_offset, len);
+	}
 
 	/* Must wait min 20us between qsfp i2c transactions */
 	udelay(20);
@@ -171,21 +154,35 @@ exit:
 	return ret;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	     void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d read interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
 }
 
+/*
+ * Write page n, offset m of QSFP memory as defined by SFF 8636
+ * by writing @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	       int len)
 {
@@ -195,50 +192,81 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d write interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
-		 * Set the qsfp page based on a zero-based addresss
+		 * Set the qsfp page based on a zero-based address
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate write to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nwrite = len - count;
-		if ((offset + nwrite) > QSFP_PAGESIZE)
-			nwrite = QSFP_PAGESIZE - offset;
+		/* truncate write to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY)
+			nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count,
-					nwrite);
-		if (ret <= 0)	/* stop on error or nothing read */
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  offset, bp + count, nwrite);
+		if (ret <= 0)	/* stop on error or nothing written */
 			break;
 
 		count += ret;
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
+/*
+ * Perform a stand-alone single QSFP write.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_write(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * by reading @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len)
 {
@@ -248,9 +276,17 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d read interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
@@ -258,25 +294,26 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate read to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nread = len - count;
-		if ((offset + nread) > QSFP_PAGESIZE)
-			nread = QSFP_PAGESIZE - offset;
-
-		ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count,
-					nread);
+		/* truncate read to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
+			nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
+
+		/* QSFPs require a 5-10msec delay after write operations */
+		mdelay(5);
+		ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				 offset, bp + count, nread);
 		if (ret <= 0)	/* stop on error or nothing read */
 			break;
 
@@ -284,17 +321,40 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
 /*
+ * Perform a stand-alone single QSFP read.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_read(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
  * This function caches the QSFP memory range in 128 byte chunks.
  * As an example, the next byte after address 255 is byte 128 from
  * upper page 01H (if existing) rather than byte 0 from lower page 00H.
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * in the cache by reading byte ((128 * n) + m)
+ * The calls to qsfp_{read,write} in this function correctly handle the
+ * address map difference between this mapping and the mapping implemented
+ * by those functions
  */
 int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 {
@@ -304,79 +364,84 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 	u8 *cache = &cp->cache[0];
 
 	/* ensure sane contents on invalid reads, for cable swaps */
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
-	dd_dev_info(ppd->dd, "%s: called\n", __func__);
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
+	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
+	ppd->qsfp_info.cache_valid = 0;
+	spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
+
 	if (!qsfp_mod_present(ppd)) {
 		ret = -ENODEV;
-		goto bail;
+		goto bail_no_release;
 	}
 
-	ret = qsfp_read(ppd, target, 0, cache, 256);
-	if (ret != 256) {
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret)
+		goto bail_no_release;
+
+	ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
+	if (ret != QSFP_PAGESIZE) {
 		dd_dev_info(ppd->dd,
-			"%s: Read of pages 00H failed, expected 256, got %d\n",
-			__func__, ret);
+			    "%s: Page 0 read failed, expected %d, got %d\n",
+			    __func__, QSFP_PAGESIZE, ret);
 		goto bail;
 	}
 
-	if (cache[0] != 0x0C && cache[0] != 0x0D)
-		goto bail;
-
 	/* Is paging enabled? */
 	if (!(cache[2] & 4)) {
-
 		/* Paging enabled, page 03 required */
 		if ((cache[195] & 0xC0) == 0xC0) {
 			/* all */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x80) == 0x80) {
 			/* only page 2 and 3 */
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x40) == 0x40) {
 			/* only page 1 and 3 */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else {
 			/* only page 3 */
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		}
 	}
 
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+
 	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 	ppd->qsfp_info.cache_valid = 1;
 	ppd->qsfp_info.cache_refresh_required = 0;
@@ -385,7 +450,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 	return 0;
 
 bail:
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+bail_no_release:
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
 	return ret;
 }
 
@@ -434,7 +501,7 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
 
 	if (port_num > dd->num_pports || port_num < 1) {
 		dd_dev_info(dd, "%s: Invalid port number %d\n",
-				__func__, port_num);
+			    __func__, port_num);
 		ret = -EINVAL;
 		goto set_zeroes;
 	}
@@ -485,7 +552,6 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 	lenstr[1] = '\0';
 
 	if (ppd->qsfp_info.cache_valid) {
-
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
 			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
 
@@ -529,7 +595,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 			memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK);
 			for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) {
-				sofar += scnprintf(buf + sofar, len-sofar,
+				sofar += scnprintf(buf + sofar, len - sofar,
 					" %02X", bin_buff[iidx]);
 			}
 			sofar += scnprintf(buf + sofar, len - sofar, "\n");
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h
index d30c2a6baa0b..831fe4cf1345 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/staging/rdma/hfi1/qsfp.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,23 +56,28 @@
  * Below are masks for QSFP pins.  Pins are the same for HFI0 and HFI1.
  * _N means asserted low
  */
-#define QSFP_HFI0_I2CCLK    (1 << 0)
-#define QSFP_HFI0_I2CDAT    (1 << 1)
-#define QSFP_HFI0_RESET_N   (1 << 2)
-#define QSFP_HFI0_INT_N	    (1 << 3)
-#define QSFP_HFI0_MODPRST_N (1 << 4)
+#define QSFP_HFI0_I2CCLK    BIT(0)
+#define QSFP_HFI0_I2CDAT    BIT(1)
+#define QSFP_HFI0_RESET_N   BIT(2)
+#define QSFP_HFI0_INT_N	    BIT(3)
+#define QSFP_HFI0_MODPRST_N BIT(4)
 
 /* QSFP is paged at 256 bytes */
 #define QSFP_PAGESIZE 256
+/* Reads/writes cannot cross 128 byte boundaries */
+#define QSFP_RW_BOUNDARY 128
+
+/* number of bytes in i2c offset for QSFP devices */
+#define __QSFP_OFFSET_SIZE 1                           /* num address bytes */
+#define QSFP_OFFSET_SIZE (__QSFP_OFFSET_SIZE << 8)     /* shifted value */
 
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
-/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
-/*
- * Rest of first 128 not used, although 127 is reserved for page select
- * if module is not "Flat memory".
- */
+#define QSFP_TX_CTRL_BYTE_OFFS 86
+#define QSFP_PWR_CTRL_BYTE_OFFS 93
+#define QSFP_CDR_CTRL_BYTE_OFFS 98
+
 #define QSFP_PAGE_SELECT_BYTE_OFFS 127
 /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
 #define QSFP_MOD_ID_OFFS 128
@@ -87,7 +89,8 @@
 /* Byte 130 is Connector type. Not Intel req'd */
 /* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
 /* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */
-/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec */
+#define QSFP_NOM_BIT_RATE_100_OFFS 140
 /* Byte 141 is Extended Rate Select. Not Intel req'd */
 /* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */
 /* Byte 146 is length for Copper. Units of 1 meter */
@@ -135,11 +138,18 @@ extern const char *const hfi1_qsfp_devtech[16];
  */
 #define QSFP_ATTEN_OFFS 186
 #define QSFP_ATTEN_LEN 2
-/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */
+/*
+ * Bytes 188,189 are Wavelength tolerance, if optical
+ * If copper, they are attenuation in dB:
+ * Byte 188 is at 12.5 Gb/s, Byte 189 at 25 Gb/s
+ */
+#define QSFP_CU_ATTEN_7G_OFFS 188
+#define QSFP_CU_ATTEN_12G_OFFS 189
 /* Byte 190 is Max Case Temp. Not Intel req'd */
 /* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */
 #define QSFP_CC_OFFS 191
-/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */
+#define QSFP_EQ_INFO_OFFS 193
+#define QSFP_CDR_INFO_OFFS 194
 /* Bytes 196..211 are Serial Number, String */
 #define QSFP_SN_OFFS 196
 #define QSFP_SN_LEN 16
@@ -150,6 +160,8 @@ extern const char *const hfi1_qsfp_devtech[16];
 #define QSFP_LOT_OFFS 218
 #define QSFP_LOT_LEN 2
 /* Bytes 220, 221 indicate monitoring options, Not Intel req'd */
+/* Byte 222 indicates nominal bitrate in units of 250Mbits/sec */
+#define QSFP_NOM_BIT_RATE_250_OFFS 222
 /* Byte 223 is LSB of sum of bytes 192..222 */
 #define QSFP_CC_EXT_OFFS 223
 
@@ -191,6 +203,7 @@ extern const char *const hfi1_qsfp_devtech[16];
  */
 
 #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
 #define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
 #define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
 
@@ -198,10 +211,12 @@ struct qsfp_data {
 	/* Helps to find our way */
 	struct hfi1_pportdata *ppd;
 	struct work_struct qsfp_work;
-	u8 cache[QSFP_MAX_NUM_PAGES*128];
+	u8 cache[QSFP_MAX_NUM_PAGES * 128];
+	/* protect qsfp data */
 	spinlock_t qsfp_lock;
 	u8 check_interrupt_flags;
-	u8 qsfp_interrupt_functional;
+	u8 reset_needed;
+	u8 limiting_active;
 	u8 cache_valid;
 	u8 cache_refresh_required;
 };
@@ -220,3 +235,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	       int len);
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len);
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len);
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len);
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 6f4a155f7931..0d7e1017f3cb 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,18 +46,151 @@
  */
 
 #include <linux/io.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
-static void rc_timeout(unsigned long arg);
+/**
+ * hfi1_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ *
+ * add a retry timer on the QP
+ */
+static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+			      rdi->busy_jiffies;
+	add_timer(&qp->s_timer);
+}
+
+/**
+ * hfi1_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @to - timeout in usecs
+ *
+ * add an rnr timer on the QP
+ */
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	qp->s_flags |= RVT_S_WAIT_RNR;
+	qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+	add_timer(&priv->s_rnr_timer);
+}
+
+/**
+ * hfi1_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ *
+ * Modify a potentially already running retry
+ * timer
+ */
+static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
+		  rdi->busy_jiffies);
+}
+
+/**
+ * hfi1_stop_retry_timer - stop a retry timer
+ * @qp - the QP
+ *
+ * stop a retry timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+
+	/* Remove QP from retry */
+	if (qp->s_flags & RVT_S_TIMER) {
+		qp->s_flags &= ~RVT_S_TIMER;
+		rval = del_timer(&qp->s_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_stop_rc_timers - stop all timers
+ * @qp - the QP
+ *
+ * stop any pending timers
+ */
+void hfi1_stop_rc_timers(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from all timers */
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+		del_timer(&priv->s_rnr_timer);
+	}
+}
+
+/**
+ * hfi1_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from rnr timer */
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
+		rval = del_timer(&priv->s_rnr_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void hfi1_del_timers_sync(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	del_timer_sync(&qp->s_timer);
+	del_timer_sync(&priv->s_rnr_timer);
+}
 
-static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
+/* only opcode mask for adaptive pio */
+const u32 rc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
+	BIT(OP(ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(COMPARE_SWAP & 0x1f)) |
+	BIT(OP(FETCH_ADD & 0x1f));
+
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
 	u32 len;
@@ -74,38 +204,32 @@ static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
 	return wqe->length - len;
 }
 
-static void start_timer(struct hfi1_qp *qp)
-{
-	qp->s_flags |= HFI1_S_TIMER;
-	qp->s_timer.function = rc_timeout;
-	/* 4.096 usec. * (1 << qp->timeout) */
-	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
-	add_timer(&qp->s_timer);
-}
-
 /**
  * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  * @dev: the device for this QP
  * @qp: a pointer to the QP
  * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
+ * @ps: the xmit packet state
  *
  * Return 1 if constructed; otherwise, return 0.
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
-		       struct hfi1_other_headers *ohdr, u32 pmtu)
+static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
+		       struct hfi1_other_headers *ohdr,
+		       struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 	int middle = 0;
+	u32 pmtu = qp->pmtu;
+	struct hfi1_qp_priv *priv = qp->priv;
 
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -116,7 +240,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		/* FALLTHROUGH */
@@ -133,7 +257,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & HFI1_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 			goto bail;
 		}
@@ -152,9 +276,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 				goto bail;
 			}
 			/* Copy SGE state in case we need to resend */
-			qp->s_rdma_mr = e->rdma_sge.mr;
-			if (qp->s_rdma_mr)
-				hfi1_get_mr(qp->s_rdma_mr);
+			ps->s_txreq->mr = e->rdma_sge.mr;
+			if (ps->s_txreq->mr)
+				rvt_get_mr(ps->s_txreq->mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -191,9 +315,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
-		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
-		if (qp->s_rdma_mr)
-			hfi1_get_mr(qp->s_rdma_mr);
+		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
+		if (ps->s_txreq->mr)
+			rvt_get_mr(ps->s_txreq->mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu) {
 			len = pmtu;
@@ -218,7 +342,7 @@ normal:
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~HFI1_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
@@ -234,20 +358,23 @@ normal:
 	}
 	qp->s_rdma_ack_cnt++;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_size = len;
-	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle);
+	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
 	return 1;
 
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 	/*
 	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
-	 * HFI1_S_RESP_PENDING
+	 * RVT_S_RESP_PENDING
 	 */
 	smp_wmb();
-	qp->s_flags &= ~(HFI1_S_RESP_PENDING
-				| HFI1_S_ACK_PENDING
-				| HFI1_S_AHG_VALID);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING
+				| RVT_S_ACK_PENDING
+				| RVT_S_AHG_VALID);
 	return 0;
 }
 
@@ -255,14 +382,17 @@ bail:
  * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  *
+ * Assumes s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_rc_req(struct hfi1_qp *qp)
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_sge_state *ss;
-	struct hfi1_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	u32 hwords = 5;
 	u32 len;
@@ -270,51 +400,48 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	char newreq;
-	unsigned long flags;
-	int ret = 0;
 	int middle = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout re-sends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
+	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & HFI1_S_RESP_PENDING) &&
-	    make_rc_ack(dev, qp, ohdr, pmtu))
-		goto done;
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
+	    make_rc_ack(dev, qp, ohdr, ps))
+		return 1;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_flags & (HFI1_S_WAIT_RNR | HFI1_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 
 	if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= HFI1_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 		}
 		qp->s_sending_psn = qp->s_psn;
@@ -322,10 +449,10 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	}
 
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/*
 		 * Resend an old request or start a new one.
@@ -347,11 +474,11 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= HFI1_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		/*
 		 * Note that we have to be careful not to modify the
@@ -365,21 +492,19 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
 				ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -393,14 +518,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			break;
 
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
 			ohdr->u.rc.reth.vaddr =
@@ -409,16 +534,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after RETH */
@@ -440,19 +563,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->rdma_wr.remote_addr);
@@ -477,13 +593,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
@@ -526,11 +641,8 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
-			if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -550,8 +662,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -559,9 +669,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -592,8 +702,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -601,9 +709,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -648,13 +756,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	delta = delta_psn(bth2, wqe->psn);
 	if (delta && delta % HFI1_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & HFI1_S_SEND_ONE) {
-		qp->s_flags &= ~HFI1_S_SEND_ONE;
-		qp->s_flags |= HFI1_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = ss;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(
@@ -662,16 +771,25 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		ohdr,
 		bth0 | (qp->s_state << 24),
 		bth2,
-		middle);
-done:
-	ret = 1;
-	goto unlock;
+		middle,
+		ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -682,7 +800,7 @@ unlock:
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  */
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
 		      int is_fecn)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -700,7 +818,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 	unsigned long flags;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if (qp->s_flags & HFI1_S_RESP_PENDING)
+	if (qp->s_flags & RVT_S_RESP_PENDING)
 		goto queue_ack;
 
 	/* Ensure s_rdma_ack_cnt changes are committed */
@@ -763,7 +881,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 		goto queue_ack;
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+	trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
 
 	/* write the pbc and data */
 	ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -771,13 +889,13 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 	return;
 
 queue_ack:
-	this_cpu_inc(*ibp->rc_qacks);
+	this_cpu_inc(*ibp->rvp.rc_qacks);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 	qp->s_nak_state = qp->r_nak_state;
 	qp->s_ack_psn = qp->r_ack_psn;
 	if (is_fecn)
-		qp->s_flags |= HFI1_S_ECN;
+		qp->s_flags |= RVT_S_ECN;
 
 	/* Schedule the send tasklet. */
 	hfi1_schedule_send(qp);
@@ -793,10 +911,10 @@ queue_ack:
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  */
-static void reset_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 	u32 n = qp->s_acked;
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
 	qp->s_cur = n;
@@ -819,7 +937,7 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
 			n = 0;
 		if (n == qp->s_tail)
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = cmp_psn(psn, wqe->psn);
 		if (diff < 0)
 			break;
@@ -865,23 +983,23 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
 done:
 	qp->s_psn = psn;
 	/*
-	 * Set HFI1_S_WAIT_PSN as rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in hfi1_make_rc_req() is too late.
 	 */
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= HFI1_S_WAIT_PSN;
-	qp->s_flags &= ~HFI1_S_AHG_VALID;
+		qp->s_flags |= RVT_S_WAIT_PSN;
+	qp->s_flags &= ~RVT_S_AHG_VALID;
 }
 
 /*
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
-static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
+static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct hfi1_ibport *ibp;
 
 	if (qp->s_retry == 0) {
@@ -890,42 +1008,44 @@ static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 			hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
-		} else /* need to handle delayed completion */
+		} else { /* need to handle delayed completion */
 			return;
-	} else
+		}
+	} else {
 		qp->s_retry--;
+	}
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
-	qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR |
-			 HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN |
-			 HFI1_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
-		qp->s_flags |= HFI1_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 }
 
 /*
  * This is called from s_timer for missing responses.
  */
-static void rc_timeout(unsigned long arg)
+void hfi1_rc_timeout(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct hfi1_ibport *ibp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & HFI1_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~HFI1_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
 		restart_rc(qp, qp->s_last_psn + 1, 1);
@@ -940,15 +1060,12 @@ static void rc_timeout(unsigned long arg)
  */
 void hfi1_rc_rnr_retry(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & HFI1_S_WAIT_RNR) {
-		qp->s_flags &= ~HFI1_S_WAIT_RNR;
-		del_timer(&qp->s_timer);
-		hfi1_schedule_send(qp);
-	}
+	hfi1_stop_rnr_timer(qp);
+	hfi1_schedule_send(qp);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
@@ -956,14 +1073,14 @@ void hfi1_rc_rnr_retry(unsigned long arg)
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  */
-static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (cmp_psn(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -981,16 +1098,16 @@ static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
 {
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
 	/* Find out where the BTH is */
@@ -1016,22 +1133,30 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
 	    !(qp->s_flags &
-		(HFI1_S_TIMER | HFI1_S_WAIT_RNR | HFI1_S_WAIT_PSN)) &&
-		(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
-		start_timer(qp);
+		(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+		(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+		hfi1_add_retry_timer(qp);
 
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			hfi1_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1039,26 +1164,24 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	/*
 	 * If we were waiting for sends to complete before re-sending,
 	 * and they are now complete, restart sending.
 	 */
 	trace_hfi1_rc_sendcomplete(qp, psn);
-	if (qp->s_flags & HFI1_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~HFI1_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		hfi1_schedule_send(qp);
 	}
 }
 
-static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 	qp->s_last_psn = psn;
 }
@@ -1068,9 +1191,9 @@ static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
  * This is similar to hfi1_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
-					  struct hfi1_swqe *wqe,
-					  struct hfi1_ibport *ibp)
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
+					 struct hfi1_ibport *ibp)
 {
 	struct ib_wc wc;
 	unsigned i;
@@ -1082,13 +1205,21 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 	 */
 	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		u32 s_last;
+
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			hfi1_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1096,14 +1227,12 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-		this_cpu_inc(*ibp->rc_delayed_comp);
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 		/*
 		 * If send progress not running attempt to progress
 		 * SDMA queue.
@@ -1131,7 +1260,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
@@ -1141,7 +1270,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	return wqe;
 }
@@ -1157,21 +1286,16 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
  * May be called at interrupt level, with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp;
 	enum ib_wc_status status;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
-
-	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	unsigned long to;
 
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
@@ -1182,7 +1306,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 	ack_psn = psn;
 	if (aeth >> 29)
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 	/*
@@ -1200,7 +1324,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
 		    diff == 0) {
 			ret = 1;
-			goto bail;
+			goto bail_stop;
 		}
 		/*
 		 * If this request is a RDMA read or atomic, and the ACK is
@@ -1217,11 +1341,11 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
-			if (!(qp->r_flags & HFI1_R_RDMAR_SEQ)) {
-				qp->r_flags |= HFI1_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= HFI1_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
@@ -1231,7 +1355,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 			 * No need to process the ACK/NAK since we are
 			 * restarting an earlier request.
 			 */
-			goto bail;
+			goto bail_stop;
 		}
 		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
@@ -1244,14 +1368,14 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & HFI1_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(HFI1_S_WAIT_FENCE |
-						 HFI1_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
-			} else if (qp->s_flags & HFI1_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(HFI1_S_WAIT_RDMAR |
-						 HFI1_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
 			}
 		}
@@ -1262,40 +1386,43 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
-		this_cpu_inc(*ibp->rc_acks);
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			 * We are expecting more ACKs so
-			 * reset the re-transmit timer.
+			 * mod the retry timer.
 			 */
-			start_timer(qp);
+			hfi1_mod_retry_timer(qp);
 			/*
 			 * We can stop re-sending the earlier packets and
 			 * continue with the next packet the receiver wants.
 			 */
 			if (cmp_psn(qp->s_psn, psn) <= 0)
 				reset_psn(qp, psn + 1);
-		} else if (cmp_psn(qp->s_psn, psn) <= 0) {
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = psn + 1;
+		} else {
+			/* No more acks - kill all timers */
+			hfi1_stop_rc_timers(qp);
+			if (cmp_psn(qp->s_psn, psn) <= 0) {
+				qp->s_state = OP(SEND_LAST);
+				qp->s_psn = psn + 1;
+			}
 		}
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 		hfi1_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
 		qp->s_retry = qp->s_retry_cnt;
 		update_last_psn(qp, psn);
-		ret = 1;
-		goto bail;
+		return 1;
 
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
-		if (qp->s_flags & HFI1_S_WAIT_RNR)
-			goto bail;
+			goto bail_stop;
+		if (qp->s_flags & RVT_S_WAIT_RNR)
+			goto bail_stop;
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
@@ -1306,28 +1433,27 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
 		reset_psn(qp, psn);
 
-		qp->s_flags &= ~(HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_ACK);
-		qp->s_flags |= HFI1_S_WAIT_RNR;
-		qp->s_timer.function = hfi1_rc_rnr_retry;
-		qp->s_timer.expires = jiffies + usecs_to_jiffies(
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		hfi1_stop_rc_timers(qp);
+		to =
 			ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
-					   HFI1_AETH_CREDIT_MASK]);
-		add_timer(&qp->s_timer);
-		goto bail;
+					   HFI1_AETH_CREDIT_MASK];
+		hfi1_add_rnr_timer(qp, to);
+		return 0;
 
 	case 3:         /* NAK */
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
+			goto bail_stop;
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 		switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
 			HFI1_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
@@ -1340,21 +1466,21 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
 				hfi1_send_complete(qp, wqe, status);
-				hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
 
@@ -1364,15 +1490,16 @@ class_b:
 		}
 		qp->s_retry = qp->s_retry_cnt;
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		goto bail;
+		goto bail_stop;
 
 	default:                /* 2: reserved */
 reserved:
 		/* Ignore reserved NAK codes. */
-		goto bail;
+		goto bail_stop;
 	}
-
-bail:
+	return ret;
+bail_stop:
+	hfi1_stop_rc_timers(qp);
 	return ret;
 }
 
@@ -1380,18 +1507,15 @@ bail:
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
-static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 			 struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	hfi1_stop_rc_timers(qp);
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 	while (cmp_psn(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1401,11 +1525,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= HFI1_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -1429,11 +1553,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
  */
 static void rc_rcv_resp(struct hfi1_ibport *ibp,
 			struct hfi1_other_headers *ohdr,
-			void *data, u32 tlen, struct hfi1_qp *qp,
+			void *data, u32 tlen, struct rvt_qp *qp,
 			u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
 			struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	enum ib_wc_status status;
 	unsigned long flags;
 	int diff;
@@ -1446,7 +1570,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 	trace_hfi1_rc_ack(qp, psn);
 
 	/* Ignore invalid responses. */
-	if (cmp_psn(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 
 	/* Ignore duplicate responses. */
@@ -1465,15 +1590,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
-	if (qp->r_flags & HFI1_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
-		qp->r_flags &= ~HFI1_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
@@ -1484,14 +1609,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
 			__be32 *p = ohdr->u.at.atomic_ack_eth;
 
-			val = ((u64) be32_to_cpu(p[0]) << 32) |
+			val = ((u64)be32_to_cpu(p[0]) << 32) |
 				be32_to_cpu(p[1]);
-		} else
+		} else {
 			val = 0;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 		/*
@@ -1519,10 +1645,10 @@ read_middle:
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
-		qp->s_flags |= HFI1_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 
@@ -1536,7 +1662,7 @@ read_middle:
 		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
 		goto bail;
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1556,7 +1682,7 @@ read_middle:
 		 * have to be careful to copy the data to the right
 		 * location.
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 		goto read_last;
@@ -1580,9 +1706,9 @@ read_last:
 		if (unlikely(tlen != qp->s_rdma_read_len))
 			goto ack_len_err;
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
 		WARN_ON(qp->s_rdma_read_sge.num_sge);
-		(void) do_rc_ack(qp, aeth, psn,
+		(void)do_rc_ack(qp, aeth, psn,
 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
 		goto ack_done;
 	}
@@ -1600,7 +1726,7 @@ ack_len_err:
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 		hfi1_send_complete(qp, wqe, status);
-		hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1609,22 +1735,24 @@ bail:
 }
 
 static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
-				  struct hfi1_qp *qp)
+				  struct rvt_qp *qp)
 {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_DEFERED_ACK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
 
-static inline void rc_cancel_ack(struct hfi1_qp *qp)
+static inline void rc_cancel_ack(struct rvt_qp *qp)
 {
-	qp->r_adefered = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	priv->r_adefered = 0;
 	if (list_empty(&qp->rspwait))
 		return;
 	list_del_init(&qp->rspwait);
-	qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+	qp->r_flags &= ~RVT_R_RSP_NAK;
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
 }
@@ -1645,11 +1773,11 @@ static inline void rc_cancel_ack(struct hfi1_qp *qp)
  * schedule a response to be sent.
  */
 static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
-			struct hfi1_qp *qp, u32 opcode, u32 psn, int diff,
-			struct hfi1_ctxtdata *rcd)
+				 struct rvt_qp *qp, u32 opcode, u32 psn,
+				 int diff, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	u8 i, prev;
 	int old_req;
@@ -1662,7 +1790,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		 * Don't queue the NAK if we already sent one.
 		 */
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
@@ -1694,7 +1822,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 	 */
 	e = NULL;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1747,7 +1875,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		if (len != 0) {
@@ -1755,8 +1883,8 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
-					  IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto unlock_done;
 		} else {
@@ -1778,7 +1906,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		 * or the send tasklet is already backed up to send an
 		 * earlier entry, we can ignore this request.
 		 */
-		if (!e || e->opcode != (u8) opcode || old_req)
+		if (!e || e->opcode != (u8)opcode || old_req)
 			goto unlock_done;
 		qp->s_tail_ack_queue = prev;
 		break;
@@ -1810,7 +1938,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	hfi1_schedule_send(qp);
 
@@ -1823,13 +1951,13 @@ send_ack:
 	return 0;
 }
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = hfi1_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	if (lastwqe) {
@@ -1842,7 +1970,7 @@ void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
 	}
 }
 
-static inline void update_ack_queue(struct hfi1_qp *qp, unsigned n)
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 	unsigned next;
 
@@ -1864,14 +1992,14 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
 
 	spin_lock_irqsave(&ppd->cc_log_lock, flags);
 
-	ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8);
+	ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
 	ppd->threshold_event_counter++;
 
 	cc_event = &ppd->cc_events[ppd->cc_log_idx++];
 	if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
 		ppd->cc_log_idx = 0;
-	cc_event->lqpn = lqpn & HFI1_QPN_MASK;
-	cc_event->rqpn = rqpn & HFI1_QPN_MASK;
+	cc_event->lqpn = lqpn & RVT_QPN_MASK;
+	cc_event->rqpn = rqpn & RVT_QPN_MASK;
 	cc_event->sl = sl;
 	cc_event->svc_type = svc_type;
 	cc_event->rlid = rlid;
@@ -1897,7 +2025,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		return;
 
 	/*
@@ -1957,7 +2085,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_other_headers *ohdr = packet->ohdr;
@@ -1972,6 +2100,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 	unsigned long flags;
 	u32 bth1;
 	int ret, is_fecn = 0;
+	int copy_last = 0;
 
 	bth0 = be32_to_cpu(ohdr->bth[0]);
 	if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
@@ -2054,13 +2183,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 		break;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
 	switch (opcode) {
 	case OP(SEND_FIRST):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2076,12 +2205,12 @@ send_middle:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		/* consume RWQE */
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2090,7 +2219,7 @@ send_middle:
 
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2104,8 +2233,10 @@ send_last_imm:
 		wc.ex.imm_data = ohdr->u.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		goto send_last;
-	case OP(SEND_LAST):
 	case OP(RDMA_WRITE_LAST):
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+		/* fall through */
+	case OP(SEND_LAST):
 no_immediate_data:
 		wc.wc_flags = 0;
 		wc.ex.imm_data = 0;
@@ -2121,10 +2252,10 @@ send_last:
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2154,12 +2285,14 @@ send_last:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (bth0 & IB_BTH_SOLICITED) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (bth0 & IB_BTH_SOLICITED) != 0);
 		break;
 
-	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY):
+		copy_last = 1;
+		/* fall through */
+	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto nack_inv;
@@ -2174,8 +2307,8 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto nack_acc;
 			qp->r_sge.num_sge = 1;
@@ -2190,7 +2323,7 @@ send_last:
 			goto send_middle;
 		else if (opcode == OP(RDMA_WRITE_ONLY))
 			goto no_immediate_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2200,7 +2333,7 @@ send_last:
 		goto send_last;
 
 	case OP(RDMA_READ_REQUEST): {
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u8 next;
 
@@ -2218,7 +2351,7 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		reth = &ohdr->u.rc.reth;
@@ -2229,8 +2362,8 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
 			/*
@@ -2261,7 +2394,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2273,7 +2406,7 @@ send_last:
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
@@ -2293,29 +2426,29 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+		vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
 			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   vaddr, rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  vaddr, rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->sent = 0;
@@ -2328,7 +2461,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2347,11 +2480,13 @@ send_last:
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
 	if (psn & IB_BTH_REQ_ACK) {
+		struct hfi1_qp_priv *priv = qp->priv;
+
 		if (packet->numpkt == 0) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		if (qp->r_adefered >= HFI1_PSN_CREDIT) {
+		if (priv->r_adefered >= HFI1_PSN_CREDIT) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
@@ -2359,13 +2494,13 @@ send_last:
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		qp->r_adefered++;
+		priv->r_adefered++;
 		rc_defered_ack(rcd, qp);
 	}
 	return;
 
 rnr_nak:
-	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+	qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	rc_defered_ack(rcd, qp);
@@ -2403,7 +2538,7 @@ void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp)
+	struct rvt_qp *qp)
 {
 	int has_grh = rcv_flags & HFI1_HAS_GRH;
 	struct hfi1_other_headers *ohdr;
@@ -2428,7 +2563,7 @@ void hfi1_rc_hdrerr(
 	if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 		diff = delta_psn(psn, qp->r_psn);
 		if (!qp->r_nak_state && diff >= 0) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 4a91975b68d7..08813cdbd475 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,8 @@
 #include "hfi.h"
 #include "mad.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
+#include "trace.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of microseconds.
@@ -97,16 +95,16 @@ const u32 ib_hfi1_rnr_table[32] = {
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  */
-static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
+static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 	int i, j, ret;
 	struct ib_wc wc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
@@ -114,8 +112,8 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-				  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
 		j++;
@@ -127,9 +125,9 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
 
 bad_lkey:
 	while (j) {
-		struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
@@ -138,14 +136,14 @@ bad_lkey:
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 bail:
 	return ret;
 }
 
 /**
- * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE
+ * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
  *
@@ -154,19 +152,19 @@ bail:
  *
  * Can be called from interrupt level.
  */
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
-	struct hfi1_rq *rq;
-	struct hfi1_rwq *wq;
-	struct hfi1_srq *srq;
-	struct hfi1_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	int ret;
 
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
@@ -176,7 +174,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		goto unlock;
 	}
@@ -192,7 +190,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	}
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
@@ -208,7 +206,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	qp->r_wr_id = wqe->wr_id;
 
 	ret = 1;
-	set_bit(HFI1_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -265,7 +263,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  * The s_lock will be acquired around the hfi1_migrate_qp() call.
  */
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0)
+		       int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
 	unsigned long flags;
@@ -279,11 +277,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->alt_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+				&hdr->u.l.grh.sgid,
+				qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+				qp->alt_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -312,11 +312,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
 				goto err;
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->remote_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+			     &hdr->u.l.grh.sgid,
+			     qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+			     qp->remote_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -355,12 +357,12 @@ err:
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  */
-static void ruc_loopback(struct hfi1_qp *sqp)
+static void ruc_loopback(struct rvt_qp *sqp)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct hfi1_qp *qp;
-	struct hfi1_swqe *wqe;
-	struct hfi1_sge *sge;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
@@ -368,6 +370,8 @@ static void ruc_loopback(struct hfi1_qp *sqp)
 	enum ib_wc_status send_status;
 	int release;
 	int ret;
+	int copy_last = 0;
+	u32 to;
 
 	rcu_read_lock();
 
@@ -375,25 +379,27 @@ static void ruc_loopback(struct hfi1_qp *sqp)
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 */
-	qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    sqp->remote_qpn);
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) ||
-	    !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
-	sqp->s_flags |= HFI1_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 	/* Return if it is not OK to start a new work request. */
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -411,9 +417,9 @@ again:
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
@@ -439,7 +445,7 @@ again:
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
 		/* FALLTHROUGH */
 	case IB_WR_SEND:
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
@@ -451,21 +457,24 @@ again:
 			goto inv_err;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
 			goto rnr_nak;
-		/* FALLTHROUGH */
+		/* skip copy_last set and qp_access_flags recheck */
+		goto do_write;
 	case IB_WR_RDMA_WRITE:
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto inv_err;
+do_write:
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_WRITE)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_WRITE)))
 			goto acc_err;
 		qp->r_sge.sg_list = NULL;
 		qp->r_sge.num_sge = 1;
@@ -475,10 +484,10 @@ again:
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_READ)))
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
 		release = 0;
 		sqp->s_sge.sg_list = NULL;
@@ -493,20 +502,20 @@ again:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   wqe->atomic_wr.remote_addr,
-					   wqe->atomic_wr.rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  wqe->atomic_wr.remote_addr,
+					  wqe->atomic_wr.rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
+		*(u64 *)sqp->s_sge.sge.vaddr =
 			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 
@@ -524,17 +533,17 @@ again:
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (!release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -547,9 +556,9 @@ again:
 		sqp->s_len -= len;
 	}
 	if (release)
-		hfi1_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -565,12 +574,12 @@ again:
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	hfi1_send_complete(sqp, wqe, send_status);
@@ -580,7 +589,7 @@ rnr_nak:
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
@@ -592,13 +601,10 @@ rnr_nak:
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
-	sqp->s_flags |= HFI1_S_WAIT_RNR;
-	sqp->s_timer.function = hfi1_rc_rnr_retry;
-	sqp->s_timer.expires = jiffies +
-		usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]);
-	add_timer(&sqp->s_timer);
+	to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
+	hfi1_add_rnr_timer(sqp, to);
 	goto clr_busy;
 
 op_err:
@@ -622,9 +628,9 @@ serr:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	hfi1_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
-		sqp->s_flags &= ~HFI1_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 			struct ib_event ev;
@@ -637,7 +643,7 @@ serr:
 		goto done;
 	}
 clr_busy:
-	sqp->s_flags &= ~HFI1_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
@@ -666,7 +672,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id =
 		grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
 		ibp->guids[grh->sgid_index - 1] :
@@ -690,29 +696,31 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
  * Subsequent middles use the copied entry, editing the
  * PSN with 1 or 2 edits.
  */
-static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
+static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 {
-	if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
 		clear_ahg(qp);
-	if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
+	if (!(qp->s_flags & RVT_S_AHG_VALID)) {
 		/* first middle that needs copy  */
 		if (qp->s_ahgidx < 0)
-			qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde);
+			qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
 		if (qp->s_ahgidx >= 0) {
 			qp->s_ahgpsn = npsn;
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 			/* save to protect a change in another thread */
-			qp->s_hdr->sde = qp->s_sde;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_flags |= HFI1_S_AHG_VALID;
+			priv->s_hdr->sde = priv->s_sde;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			qp->s_flags |= RVT_S_AHG_VALID;
 		}
 	} else {
 		/* subsequent middle after valid */
 		if (qp->s_ahgidx >= 0) {
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_hdr->ahgcount++;
-			qp->s_hdr->ahgdesc[0] =
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			priv->s_hdr->ahgcount++;
+			priv->s_hdr->ahgdesc[0] =
 				sdma_build_ahg_descriptor(
 					(__force u16)cpu_to_be16((u16)npsn),
 					BTH2_OFFSET,
@@ -720,8 +728,8 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
 					16);
 			if ((npsn & 0xffff0000) !=
 					(qp->s_ahgpsn & 0xffff0000)) {
-				qp->s_hdr->ahgcount++;
-				qp->s_hdr->ahgdesc[1] =
+				priv->s_hdr->ahgcount++;
+				priv->s_hdr->ahgdesc[1] =
 					sdma_build_ahg_descriptor(
 						(__force u16)cpu_to_be16(
 							(u16)(npsn >> 16)),
@@ -733,10 +741,12 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
 	}
 }
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle)
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp = ps->ibp;
 	u16 lrh0;
 	u32 nwords;
 	u32 extra_bytes;
@@ -747,13 +757,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = HFI1_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &qp->remote_ah_attr.grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&qp->remote_ah_attr.grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
 		middle = 0;
 	}
-	lrh0 |= (qp->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+	lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
 	/*
 	 * reset s_hdr/AHG fields
 	 *
@@ -765,10 +776,10 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	 * build_ahg() will modify as appropriate
 	 * to use the AHG feature.
 	 */
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->sde = NULL;
 	if (qp->s_mig_state == IB_MIG_MIGRATED)
 		bth0 |= IB_BTH_MIG_REQ;
 	else
@@ -776,19 +787,19 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~HFI1_S_AHG_VALID;
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->ibh.lrh[2] =
+		qp->s_flags &= ~RVT_S_AHG_VALID;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	bth1 = qp->remote_qpn;
-	if (qp->s_flags & HFI1_S_ECN) {
-		qp->s_flags &= ~HFI1_S_ECN;
+	if (qp->s_flags & RVT_S_ECN) {
+		qp->s_flags &= ~RVT_S_ECN;
 		/* we recently received a FECN, so return a BECN */
 		bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
 	}
@@ -799,6 +810,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 /* when sending, force a reschedule every one of these periods */
 #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
 
+void _hfi1_do_send(struct work_struct *work)
+{
+	struct iowait *wait = container_of(work, struct iowait, iowork);
+	struct rvt_qp *qp = iowait_to_qp(wait);
+
+	hfi1_do_send(qp);
+}
+
 /**
  * hfi1_do_send - perform a send on a QP
  * @work: contains a pointer to the QP
@@ -807,34 +826,45 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  */
-void hfi1_do_send(struct work_struct *work)
+void hfi1_do_send(struct rvt_qp *qp)
 {
-	struct iowait *wait = container_of(work, struct iowait, iowork);
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
 	struct hfi1_pkt_state ps;
-	int (*make_req)(struct hfi1_qp *qp);
+	struct hfi1_qp_priv *priv = qp->priv;
+	int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 	unsigned long flags;
 	unsigned long timeout;
+	unsigned long timeout_int;
+	int cpu;
 
 	ps.dev = to_idev(qp->ibqp.device);
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
 
-	if ((qp->ibqp.qp_type == IB_QPT_RC ||
-	     qp->ibqp.qp_type == IB_QPT_UC) &&
-	    !loopback &&
-	    (qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc) - 1)) ==
-	    ps.ppd->lid) {
-		ruc_loopback(qp);
-		return;
-	}
-
-	if (qp->ibqp.qp_type == IB_QPT_RC)
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_rc_req;
-	else if (qp->ibqp.qp_type == IB_QPT_UC)
+		timeout_int = (qp->timeout_jiffies);
+		break;
+	case IB_QPT_UC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_uc_req;
-	else
+		timeout_int = SEND_RESCHED_TIMEOUT;
+		break;
+	default:
 		make_req = hfi1_make_ud_req;
+		timeout_int = SEND_RESCHED_TIMEOUT;
+	}
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -844,57 +874,83 @@ void hfi1_do_send(struct work_struct *work)
 		return;
 	}
 
-	qp->s_flags |= HFI1_S_BUSY;
+	qp->s_flags |= RVT_S_BUSY;
 
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	timeout = jiffies + SEND_RESCHED_TIMEOUT;
+	timeout = jiffies + (timeout_int) / 8;
+	cpu = priv->s_sde ? priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+	/* insure a pre-built packet is handled  */
+	ps.s_txreq = get_waiting_verbs_txreq(qp);
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
 			if (hfi1_verbs_send(qp, &ps))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
+			/* allow other tasks to run */
+			if (unlikely(time_after(jiffies, timeout))) {
+				if (workqueue_congested(cpu,
+							ps.ppd->hfi1_wq)) {
+					spin_lock_irqsave(&qp->s_lock, flags);
+					qp->s_flags &= ~RVT_S_BUSY;
+					hfi1_schedule_send(qp);
+					spin_unlock_irqrestore(&qp->s_lock,
+							       flags);
+					this_cpu_inc(
+						*ps.ppd->dd->send_schedule);
+					return;
+				}
+				if (!irqs_disabled()) {
+					cond_resched();
+					this_cpu_inc(
+					   *ps.ppd->dd->send_schedule);
+				}
+				timeout = jiffies + (timeout_int) / 8;
+			}
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
+	} while (make_req(qp, &ps));
 
-		/* allow other tasks to run */
-		if (unlikely(time_after(jiffies, timeout))) {
-			cond_resched();
-			ps.ppd->dd->verbs_dev.n_send_schedule++;
-			timeout = jiffies + SEND_RESCHED_TIMEOUT;
-		}
-	} while (make_req(qp));
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /*
  * This should be called with s_lock held.
  */
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status)
 {
 	u32 old_last, last;
 	unsigned i;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct hfi1_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
@@ -906,15 +962,10 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
-		hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-			      status != IB_WC_SUCCESS);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+			     status != IB_WC_SUCCESS);
 	}
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c
index 9a15f1f32b45..abb8ebc1fcac 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/staging/rdma/hfi1/sdma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -112,10 +109,10 @@ MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
 	| SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
 
 /* sdma_sendctrl operations */
-#define SDMA_SENDCTRL_OP_ENABLE    (1U << 0)
-#define SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
-#define SDMA_SENDCTRL_OP_HALT      (1U << 2)
-#define SDMA_SENDCTRL_OP_CLEANUP   (1U << 3)
+#define SDMA_SENDCTRL_OP_ENABLE    BIT(0)
+#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
+#define SDMA_SENDCTRL_OP_HALT      BIT(2)
+#define SDMA_SENDCTRL_OP_CLEANUP   BIT(3)
 
 /* handle long defines */
 #define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
@@ -325,9 +322,9 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
 		if (lcnt++ > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				  __func__, sde->this_idx, (u32)reg);
+				   __func__, sde->this_idx, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		udelay(1);
@@ -361,6 +358,28 @@ static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
 	write_sde_csr(sde, SD(DESC_CNT), reg);
 }
 
+static inline void complete_tx(struct sdma_engine *sde,
+			       struct sdma_txreq *tx,
+			       int res)
+{
+	/* protect against complete modifying */
+	struct iowait *wait = tx->wait;
+	callback_t complete = tx->complete;
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	trace_hfi1_sdma_out_sn(sde, tx->sn);
+	if (WARN_ON_ONCE(sde->head_sn != tx->sn))
+		dd_dev_err(sde->dd, "expected %llu got %llu\n",
+			   sde->head_sn, tx->sn);
+	sde->head_sn++;
+#endif
+	sdma_txclean(sde->dd, tx);
+	if (complete)
+		(*complete)(tx, res);
+	if (iowait_sdma_dec(wait) && wait)
+		iowait_drain_wakeup(wait);
+}
+
 /*
  * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
  *
@@ -395,27 +414,8 @@ static void sdma_flush(struct sdma_engine *sde)
 	}
 	spin_unlock_irqrestore(&sde->flushlist_lock, flags);
 	/* flush from flush list */
-	list_for_each_entry_safe(txp, txp_next, &flushlist, list) {
-		int drained = 0;
-		/* protect against complete modifying */
-		struct iowait *wait = txp->wait;
-
-		list_del_init(&txp->list);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-		trace_hfi1_sdma_out_sn(sde, txp->sn);
-		if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-			dd_dev_err(sde->dd, "expected %llu got %llu\n",
-				sde->head_sn, txp->sn);
-		sde->head_sn++;
-#endif
-		sdma_txclean(sde->dd, txp);
-		if (wait)
-			drained = atomic_dec_and_test(&wait->sdma_busy);
-		if (txp->complete)
-			(*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained);
-		if (wait && drained)
-			iowait_drain_wakeup(wait);
-	}
+	list_for_each_entry_safe(txp, txp_next, &flushlist, list)
+		complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 }
 
 /*
@@ -455,8 +455,8 @@ static void sdma_err_halt_wait(struct work_struct *work)
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(sde->dd,
-				"SDMA engine %d - timeout waiting for engine to halt\n",
-				sde->this_idx);
+				   "SDMA engine %d - timeout waiting for engine to halt\n",
+				   sde->this_idx);
 			/*
 			 * Continue anyway.  This could happen if there was
 			 * an uncorrectable error in the wrong spot.
@@ -472,7 +472,6 @@ static void sdma_err_halt_wait(struct work_struct *work)
 static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
 {
 	if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
-
 		unsigned index;
 		struct hfi1_devdata *dd = sde->dd;
 
@@ -531,7 +530,7 @@ static void sdma_err_progress_check(unsigned long data)
 
 static void sdma_hw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	u64 statuscsr;
 
 	while (1) {
@@ -577,31 +576,10 @@ static void sdma_flush_descq(struct sdma_engine *sde)
 		head = ++sde->descq_head & sde->sdma_mask;
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == head) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
+			complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 			trace_hfi1_sdma_progress(sde, head, tail, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_ABORTED,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
-			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
 		progress++;
@@ -612,7 +590,7 @@ static void sdma_flush_descq(struct sdma_engine *sde)
 
 static void sdma_sw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sde->tail_lock, flags);
@@ -627,7 +605,6 @@ static void sdma_sw_clean_up_task(unsigned long opaque)
 	 *   descq are ours to play with.
 	 */
 
-
 	/*
 	 * In the error clean up sequence, software clean must be called
 	 * before the hardware clean so we can use the hardware head in
@@ -676,7 +653,7 @@ static void sdma_start_hw_clean_up(struct sdma_engine *sde)
 }
 
 static void sdma_set_state(struct sdma_engine *sde,
-	enum sdma_states next_state)
+			   enum sdma_states next_state)
 {
 	struct sdma_state *ss = &sde->state;
 	const struct sdma_set_state_action *action = sdma_action_table;
@@ -692,8 +669,8 @@ static void sdma_set_state(struct sdma_engine *sde,
 	ss->previous_op = ss->current_op;
 	ss->current_state = next_state;
 
-	if (ss->previous_state != sdma_state_s99_running
-		&& next_state == sdma_state_s99_running)
+	if (ss->previous_state != sdma_state_s99_running &&
+	    next_state == sdma_state_s99_running)
 		sdma_flush(sde);
 
 	if (action[next_state].op_enable)
@@ -890,6 +867,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 	newmap->actual_vls = num_vls;
 	newmap->vls = roundup_pow_of_two(num_vls);
 	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	/* initialize back-map */
+	for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
+		newmap->engine_to_vl[i] = -1;
 	for (i = 0; i < newmap->vls; i++) {
 		/* save for wrap around */
 		int first_engine = engine;
@@ -913,6 +893,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 					/* wrap back to first engine */
 					engine = first_engine;
 			}
+			/* assign back-map */
+			for (j = 0; j < vl_engines[i]; j++)
+				newmap->engine_to_vl[first_engine + j] = i;
 		} else {
 			/* just re-use entry without allocating */
 			newmap->map[i] = newmap->map[i % num_vls];
@@ -922,7 +905,7 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 	/* newmap in hand, save old map */
 	spin_lock_irq(&dd->sde_map_lock);
 	oldmap = rcu_dereference_protected(dd->sdma_map,
-			lockdep_is_held(&dd->sde_map_lock));
+					   lockdep_is_held(&dd->sde_map_lock));
 
 	/* publish newmap */
 	rcu_assign_pointer(dd->sdma_map, newmap);
@@ -983,7 +966,7 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
 		sde->tx_ring = NULL;
 	}
 	spin_lock_irq(&dd->sde_map_lock);
-	kfree(rcu_access_pointer(dd->sdma_map));
+	sdma_map_free(rcu_access_pointer(dd->sdma_map));
 	RCU_INIT_POINTER(dd->sdma_map, NULL);
 	spin_unlock_irq(&dd->sde_map_lock);
 	synchronize_rcu();
@@ -1020,19 +1003,19 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 		return 0;
 	}
 	if (mod_num_sdma &&
-		/* can't exceed chip support */
-		mod_num_sdma <= dd->chip_sdma_engines &&
-		/* count must be >= vls */
-		mod_num_sdma >= num_vls)
+	    /* can't exceed chip support */
+	    mod_num_sdma <= dd->chip_sdma_engines &&
+	    /* count must be >= vls */
+	    mod_num_sdma >= num_vls)
 		num_engines = mod_num_sdma;
 
 	dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
 	dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
 	dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
-		dd->chip_sdma_mem_size);
+		    dd->chip_sdma_mem_size);
 
 	per_sdma_credits =
-		dd->chip_sdma_mem_size/(num_engines * SDMA_BLOCK_SIZE);
+		dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
 
 	/* set up freeze waitqueue */
 	init_waitqueue_head(&dd->sdma_unfreeze_wq);
@@ -1040,7 +1023,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 
 	descq_cnt = sdma_get_descq_cnt();
 	dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
-		num_engines, descq_cnt);
+		    num_engines, descq_cnt);
 
 	/* alloc memory for array of send engines */
 	dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
@@ -1061,18 +1044,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 		sde->desc_avail = sdma_descq_freecnt(sde);
 		sde->sdma_shift = ilog2(descq_cnt);
 		sde->sdma_mask = (1 << sde->sdma_shift) - 1;
-		sde->descq_full_count = 0;
-
-		/* Create a mask for all 3 chip interrupt sources */
-		sde->imask = (u64)1 << (0*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (1*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_idle */
-		sde->idle_mask =
-			(u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_progress */
-		sde->progress_mask =
-			(u64)1 << (TXE_NUM_SDMA_ENGINES + this_idx);
+
+		/* Create a mask specifically for each interrupt source */
+		sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
+					   this_idx);
+		sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
+						this_idx);
+		sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
+					    this_idx);
+		/* Create a combined mask to cover all 3 interrupt sources */
+		sde->imask = sde->int_mask | sde->progress_mask |
+			     sde->idle_mask;
+
 		spin_lock_init(&sde->tail_lock);
 		seqlock_init(&sde->head_lock);
 		spin_lock_init(&sde->senddmactrl_lock);
@@ -1100,10 +1083,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 				SDMA_DESC1_INT_REQ_FLAG;
 
 		tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 
 		tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 		INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
 		INIT_WORK(&sde->flush_worker, sdma_field_flush);
 
@@ -1251,11 +1234,10 @@ void sdma_exit(struct hfi1_devdata *dd)
 
 	for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
 			++this_idx) {
-
 		sde = &dd->per_sdma[this_idx];
 		if (!list_empty(&sde->dmawait))
 			dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
-				sde->this_idx);
+				   sde->this_idx);
 		sdma_process_event(sde, sdma_event_e00_go_hw_down);
 
 		del_timer_sync(&sde->err_progress_check_timer);
@@ -1358,8 +1340,8 @@ retry:
 	use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
 					(dd->flags & HFI1_HAS_SDMA_TIMEOUT);
 	hwhead = use_dmahead ?
-		(u16) le64_to_cpu(*sde->head_dma) :
-		(u16) read_sde_csr(sde, SD(HEAD));
+		(u16)le64_to_cpu(*sde->head_dma) :
+		(u16)read_sde_csr(sde, SD(HEAD));
 
 	if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
 		u16 cnt;
@@ -1385,9 +1367,9 @@ retry:
 
 		if (unlikely(!sane)) {
 			dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
-				sde->this_idx,
-				use_dmahead ? "dma" : "kreg",
-				hwhead, swhead, swtail, cnt);
+				   sde->this_idx,
+				   use_dmahead ? "dma" : "kreg",
+				   hwhead, swhead, swtail, cnt);
 			if (use_dmahead) {
 				/* try one more time, using csr */
 				use_dmahead = 0;
@@ -1464,7 +1446,7 @@ static void sdma_make_progress(struct sdma_engine *sde, u64 status)
 {
 	struct sdma_txreq *txp = NULL;
 	int progress = 0;
-	u16 hwhead, swhead, swtail;
+	u16 hwhead, swhead;
 	int idle_check_done = 0;
 
 	hwhead = sdma_gethead(sde);
@@ -1485,29 +1467,9 @@ retry:
 
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == swhead) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_OK,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
+			complete_tx(sde, txp, SDMA_TXREQ_S_OK);
 			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
@@ -1525,6 +1487,8 @@ retry:
 	 * of sdma_make_progress(..) which is ensured by idle_check_done flag
 	 */
 	if ((status & sde->idle_mask) && !idle_check_done) {
+		u16 swtail;
+
 		swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 		if (swtail != hwhead) {
 			hwhead = (u16)read_sde_csr(sde, SD(HEAD));
@@ -1552,6 +1516,12 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
 	trace_hfi1_sdma_engine_interrupt(sde, status);
 	write_seqlock(&sde->head_lock);
 	sdma_set_desc_cnt(sde, sdma_desct_intr);
+	if (status & sde->idle_mask)
+		sde->idle_int_cnt++;
+	else if (status & sde->progress_mask)
+		sde->progress_int_cnt++;
+	else if (status & sde->int_mask)
+		sde->sdma_int_cnt++;
 	sdma_make_progress(sde, status);
 	write_sequnlock(&sde->head_lock);
 }
@@ -1577,10 +1547,10 @@ void sdma_engine_error(struct sdma_engine *sde, u64 status)
 		__sdma_process_event(sde, sdma_event_e60_hw_halted);
 	if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
 		dd_dev_err(sde->dd,
-			"SDMA (%u) engine error: 0x%llx state %s\n",
-			sde->this_idx,
-			(unsigned long long)status,
-			sdma_state_names[sde->state.current_state]);
+			   "SDMA (%u) engine error: 0x%llx state %s\n",
+			   sde->this_idx,
+			   (unsigned long long)status,
+			   sdma_state_names[sde->state.current_state]);
 		dump_sdma_state(sde);
 	}
 	write_sequnlock(&sde->head_lock);
@@ -1624,8 +1594,8 @@ static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
 
 	if (op & SDMA_SENDCTRL_OP_CLEANUP)
 		write_sde_csr(sde, SD(CTRL),
-			sde->p_senddmactrl |
-			SD(CTRL_SDMA_CLEANUP_SMASK));
+			      sde->p_senddmactrl |
+			      SD(CTRL_SDMA_CLEANUP_SMASK));
 	else
 		write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
 
@@ -1649,12 +1619,10 @@ static void sdma_setlengen(struct sdma_engine *sde)
 	 * generation counter.
 	 */
 	write_sde_csr(sde, SD(LEN_GEN),
-		(sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT)
-	);
+		      (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
 	write_sde_csr(sde, SD(LEN_GEN),
-		((sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT))
-		| (4ULL << SD(LEN_GEN_GENERATION_SHIFT))
-	);
+		      ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
+		      (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
 }
 
 static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
@@ -1714,7 +1682,6 @@ static void set_sdma_integrity(struct sdma_engine *sde)
 	write_sde_csr(sde, SD(CHECK_ENABLE), reg);
 }
 
-
 static void init_sdma_regs(
 	struct sdma_engine *sde,
 	u32 credits,
@@ -1735,17 +1702,16 @@ static void init_sdma_regs(
 	write_sde_csr(sde, SD(DESC_CNT), 0);
 	write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
 	write_sde_csr(sde, SD(MEMORY),
-		((u64)credits <<
-			SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
-		((u64)(credits * sde->this_idx) <<
-			SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
+		      ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
+		      ((u64)(credits * sde->this_idx) <<
+		       SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
 	write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
 	set_sdma_integrity(sde);
 	opmask = OPCODE_CHECK_MASK_DISABLED;
 	opval = OPCODE_CHECK_VAL_DISABLED;
 	write_sde_csr(sde, SD(CHECK_OPCODE),
-		(opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
-		(opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
+		      (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
+		      (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
 }
 
 #ifdef CONFIG_SDMA_VERBOSITY
@@ -1824,12 +1790,9 @@ static void dump_sdma_state(struct sdma_engine *sde)
 	descq = sde->descq;
 
 	dd_dev_err(sde->dd,
-		"SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
-		sde->this_idx,
-		head,
-		tail,
-		cnt,
-		!list_empty(&sde->flushlist));
+		   "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
+		   sde->this_idx, head, tail, cnt,
+		   !list_empty(&sde->flushlist));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1850,20 +1813,23 @@ static void dump_sdma_state(struct sdma_engine *sde)
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		dd_dev_err(sde->dd,
-			"SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			 head, flags, addr, gen, len);
+			   "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		dd_dev_err(sde->dd,
-			"\tdesc0:0x%016llx desc1 0x%016llx\n",
-			 desc[0], desc[1]);
+			   "\tdesc0:0x%016llx desc1 0x%016llx\n",
+			   desc[0], desc[1]);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			dd_dev_err(sde->dd,
-				"\taidx: %u amode: %u alen: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK)
-					>> SDMA_DESC1_HEADER_DWS_SHIFT));
+				   "\taidx: %u amode: %u alen: %u\n",
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_DWS_SMASK) >>
+					SDMA_DESC1_HEADER_DWS_SHIFT));
 		head++;
 		head &= sde->sdma_mask;
 	}
@@ -1890,29 +1856,26 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
 	head = sde->descq_head & sde->sdma_mask;
 	tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 	seq_printf(s, SDE_FMT, sde->this_idx,
-		sde->cpu,
-		sdma_state_name(sde->state.current_state),
-		(unsigned long long)read_sde_csr(sde, SD(CTRL)),
-		(unsigned long long)read_sde_csr(sde, SD(STATUS)),
-		(unsigned long long)read_sde_csr(sde,
-			SD(ENG_ERR_STATUS)),
-		(unsigned long long)read_sde_csr(sde, SD(TAIL)),
-		tail,
-		(unsigned long long)read_sde_csr(sde, SD(HEAD)),
-		head,
-		(unsigned long long)le64_to_cpu(*sde->head_dma),
-		(unsigned long long)read_sde_csr(sde, SD(MEMORY)),
-		(unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
-		(unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
-		(unsigned long long)sde->last_status,
-		(unsigned long long)sde->ahg_bits,
-		sde->tx_tail,
-		sde->tx_head,
-		sde->descq_tail,
-		sde->descq_head,
+		   sde->cpu,
+		   sdma_state_name(sde->state.current_state),
+		   (unsigned long long)read_sde_csr(sde, SD(CTRL)),
+		   (unsigned long long)read_sde_csr(sde, SD(STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
+		   (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
+		   (unsigned long long)le64_to_cpu(*sde->head_dma),
+		   (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
+		   (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
+		   (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
+		   (unsigned long long)sde->last_status,
+		   (unsigned long long)sde->ahg_bits,
+		   sde->tx_tail,
+		   sde->tx_head,
+		   sde->descq_tail,
+		   sde->descq_head,
 		   !list_empty(&sde->flushlist),
-		sde->descq_full_count,
-		(unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
+		   sde->descq_full_count,
+		   (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1933,14 +1896,16 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		seq_printf(s,
-			"\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			head, flags, addr, gen, len);
+			   "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT));
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT));
 		head = (head + 1) & sde->sdma_mask;
 	}
 }
@@ -2041,8 +2006,9 @@ static int sdma_check_progress(
 		ret = wait->sleep(sde, wait, tx, seq);
 		if (ret == -EAGAIN)
 			sde->desc_avail = sdma_descq_freecnt(sde);
-	} else
+	} else {
 		ret = -EBUSY;
+	}
 	return ret;
 }
 
@@ -2080,14 +2046,14 @@ retry:
 		goto nodesc;
 	tail = submit_tx(sde, tx);
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	sdma_update_tail(sde, tail);
 unlock:
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
 	return ret;
 unlock_noconn:
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 	tx->sn = sde->tail_sn++;
@@ -2132,13 +2098,12 @@ nodesc:
  * side locking.
  *
  * Return:
- * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring
- * (wait == NULL)
+ * > 0 - Success (value is number of sdma_txreq's submitted),
+ * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
-int sdma_send_txlist(struct sdma_engine *sde,
-		    struct iowait *wait,
-		    struct list_head *tx_list)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
+		     struct list_head *tx_list)
 {
 	struct sdma_txreq *tx, *tx_next;
 	int ret = 0;
@@ -2169,18 +2134,18 @@ retry:
 	}
 update_tail:
 	if (wait)
-		atomic_add(count, &wait->sdma_busy);
+		iowait_sdma_add(wait, count);
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
-	return ret;
+	return ret == 0 ? count : ret;
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
 		tx->wait = wait;
 		list_del_init(&tx->list);
 		if (wait)
-			atomic_inc(&wait->sdma_busy);
+			iowait_sdma_inc(wait);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 		tx->sn = sde->tail_sn++;
@@ -2206,8 +2171,7 @@ nodesc:
 	goto update_tail;
 }
 
-static void sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
 {
 	unsigned long flags;
 
@@ -2224,7 +2188,7 @@ static void sdma_process_event(struct sdma_engine *sde,
 }
 
 static void __sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+				 enum sdma_events event)
 {
 	struct sdma_state *ss = &sde->state;
 	int need_progress = 0;
@@ -2247,14 +2211,15 @@ static void __sdma_process_event(struct sdma_engine *sde,
 			 * of link up, then we need to start up.
 			 * This can happen when hw down is requested while
 			 * bringing the link up with traffic active on
-			 * 7220, e.g. */
+			 * 7220, e.g.
+			 */
 			ss->go_s99_running = 1;
 			/* fall through and start dma engine */
 		case sdma_event_e10_go_hw_start:
 			/* This reference means the state machine is started */
 			sdma_get(&sde->state);
 			sdma_set_state(sde,
-				sdma_state_s10_hw_start_up_halt_wait);
+				       sdma_state_s10_hw_start_up_halt_wait);
 			break;
 		case sdma_event_e15_hw_halt_done:
 			break;
@@ -2292,7 +2257,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
 			break;
 		case sdma_event_e15_hw_halt_done:
 			sdma_set_state(sde,
-				sdma_state_s15_hw_start_up_clean_wait);
+				       sdma_state_s15_hw_start_up_clean_wait);
 			sdma_start_hw_clean_up(sde);
 			break;
 		case sdma_event_e25_hw_clean_up_done:
@@ -2767,7 +2732,7 @@ enomem:
  * This function calls _extend_sdma_tx_descs to extend or allocate
  * coalesce buffer. If there is a allocated coalesce buffer, it will
  * copy the input packet data into the coalesce buffer. It also adds
- * coalesce buffer descriptor once whe whole packet is received.
+ * coalesce buffer descriptor once when whole packet is received.
  *
  * Return:
  * <0 - error
@@ -3030,7 +2995,8 @@ void sdma_freeze(struct hfi1_devdata *dd)
 	 * continuing.
 	 */
 	ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
-				atomic_read(&dd->sdma_unfreeze_count) <= 0);
+				       atomic_read(&dd->sdma_unfreeze_count) <=
+				       0);
 	/* interrupted or count is negative, then unloading - just exit */
 	if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
 		return;
@@ -3047,7 +3013,7 @@ void sdma_freeze(struct hfi1_devdata *dd)
 	 * software clean will read engine CSRs, so must be completed before
 	 * the next step, which will clear the engine CSRs.
 	 */
-	(void) wait_event_interruptible(dd->sdma_unfreeze_wq,
+	(void)wait_event_interruptible(dd->sdma_unfreeze_wq,
 				atomic_read(&dd->sdma_unfreeze_count) <= 0);
 	/* no need to check results - done no matter what */
 }
@@ -3067,7 +3033,7 @@ void sdma_unfreeze(struct hfi1_devdata *dd)
 	/* tell all engines start freeze clean up */
 	for (i = 0; i < dd->num_sdma; i++)
 		sdma_process_event(&dd->per_sdma[i],
-					sdma_event_e82_hw_unfreeze);
+				   sdma_event_e82_hw_unfreeze);
 }
 
 /**
@@ -3081,5 +3047,6 @@ void _sdma_engine_progress_schedule(
 	trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
 	/* assume we have selected a good cpu */
 	write_csr(sde->dd,
-		  CCE_INT_FORCE + (8*(IS_SDMA_START/64)), sde->progress_mask);
+		  CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
+		  sde->progress_mask);
 }
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h
index da89e6458162..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/staging/rdma/hfi1/sdma.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_SDMA_H
 #define _HFI1_SDMA_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -58,15 +55,13 @@
 
 #include "hfi.h"
 #include "verbs.h"
+#include "sdma_txreq.h"
 
-/* increased for AHG */
-#define NUM_DESC 6
 /* Hardware limit */
 #define MAX_DESC 64
 /* Hardware limit for SDMA packet size */
 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
 
-
 #define SDMA_TXREQ_S_OK        0
 #define SDMA_TXREQ_S_SENDERROR 1
 #define SDMA_TXREQ_S_ABORTED   2
@@ -109,8 +104,8 @@
 /*
  * Bits defined in the send DMA descriptor.
  */
-#define SDMA_DESC0_FIRST_DESC_FLAG      (1ULL << 63)
-#define SDMA_DESC0_LAST_DESC_FLAG       (1ULL << 62)
+#define SDMA_DESC0_FIRST_DESC_FLAG      BIT_ULL(63)
+#define SDMA_DESC0_LAST_DESC_FLAG       BIT_ULL(62)
 #define SDMA_DESC0_BYTE_COUNT_SHIFT     48
 #define SDMA_DESC0_BYTE_COUNT_WIDTH     14
 #define SDMA_DESC0_BYTE_COUNT_MASK \
@@ -154,8 +149,8 @@
 	((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
 #define SDMA_DESC1_GENERATION_SMASK \
 	(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
-#define SDMA_DESC1_INT_REQ_FLAG         (1ULL << 1)
-#define SDMA_DESC1_HEAD_TO_HOST_FLAG    (1ULL << 0)
+#define SDMA_DESC1_INT_REQ_FLAG         BIT_ULL(1)
+#define SDMA_DESC1_HEAD_TO_HOST_FLAG    BIT_ULL(0)
 
 enum sdma_states {
 	sdma_state_s00_hw_down,
@@ -311,83 +306,6 @@ struct hw_sdma_desc {
 	__le64 qw[2];
 };
 
-/*
- * struct sdma_desc - canonical fragment descriptor
- *
- * This is the descriptor carried in the tx request
- * corresponding to each fragment.
- *
- */
-struct sdma_desc {
-	/* private:  don't use directly */
-	u64 qw[2];
-};
-
-struct sdma_txreq;
-typedef void (*callback_t)(struct sdma_txreq *, int, int);
-
-/**
- * struct sdma_txreq - the sdma_txreq structure (one per packet)
- * @list: for use by user and by queuing for wait
- *
- * This is the representation of a packet which consists of some
- * number of fragments.   Storage is provided to within the structure.
- * for all fragments.
- *
- * The storage for the descriptors are automatically extended as needed
- * when the currently allocation is exceeded.
- *
- * The user (Verbs or PSM) may overload this structure with fields
- * specific to their use by putting this struct first in their struct.
- * The method of allocation of the overloaded structure is user dependent
- *
- * The list is the only public field in the structure.
- *
- */
-
-struct sdma_txreq {
-	struct list_head list;
-	/* private: */
-	struct sdma_desc *descp;
-	/* private: */
-	void *coalesce_buf;
-	/* private: */
-	u16 coalesce_idx;
-	/* private: */
-	struct iowait *wait;
-	/* private: */
-	callback_t                  complete;
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-	u64 sn;
-#endif
-	/* private: - used in coalesce/pad processing */
-	u16                         packet_len;
-	/* private: - down-counted to trigger last */
-	u16                         tlen;
-	/* private: flags */
-	u16                         flags;
-	/* private: */
-	u16                         num_desc;
-	/* private: */
-	u16                         desc_limit;
-	/* private: */
-	u16                         next_descq_idx;
-	/* private: */
-	struct sdma_desc descs[NUM_DESC];
-};
-
-struct verbs_txreq {
-	struct hfi1_pio_header	phdr;
-	struct sdma_txreq       txreq;
-	struct hfi1_qp           *qp;
-	struct hfi1_swqe         *wqe;
-	struct hfi1_mregion	*mr;
-	struct hfi1_sge_state    *ss;
-	struct sdma_engine     *sde;
-	u16                     hdr_dwords;
-	u16                     hdr_inx;
-};
-
 /**
  * struct sdma_engine - Data pertaining to each SDMA engine.
  * @dd: a back-pointer to the device data
@@ -409,6 +327,7 @@ struct sdma_engine {
 	u64 imask;			/* clear interrupt mask */
 	u64 idle_mask;
 	u64 progress_mask;
+	u64 int_mask;
 	/* private: */
 	volatile __le64      *head_dma; /* DMA'ed by chip */
 	/* private: */
@@ -465,6 +384,12 @@ struct sdma_engine {
 	u16                   tx_head;
 	/* private: */
 	u64                   last_status;
+	/* private */
+	u64                     err_cnt;
+	/* private */
+	u64                     sdma_int_cnt;
+	u64                     idle_int_cnt;
+	u64                     progress_int_cnt;
 
 	/* private: */
 	struct list_head      dmawait;
@@ -484,12 +409,12 @@ struct sdma_engine {
 	u32                   progress_check_head;
 	/* private: */
 	struct work_struct flush_worker;
+	/* protect flush list */
 	spinlock_t flushlist_lock;
 	/* private: */
 	struct list_head flushlist;
 };
 
-
 int sdma_init(struct hfi1_devdata *dd, u8 port);
 void sdma_start(struct hfi1_devdata *dd);
 void sdma_exit(struct hfi1_devdata *dd);
@@ -535,7 +460,6 @@ static inline int __sdma_running(struct sdma_engine *engine)
 	return engine->state.current_state == sdma_state_s99_running;
 }
 
-
 /**
  * sdma_running() - state suitability test
  * @engine: sdma engine
@@ -565,7 +489,6 @@ void _sdma_txreq_ahgadd(
 	u32 *ahg,
 	u8 ahg_hlen);
 
-
 /**
  * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
  * @tx: tx request to initialize
@@ -626,7 +549,7 @@ static inline int sdma_txinit_ahg(
 	u8 num_ahg,
 	u32 *ahg,
 	u8 ahg_hlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	if (tlen == 0)
 		return -ENODATA;
@@ -640,7 +563,8 @@ static inline int sdma_txinit_ahg(
 	tx->complete = cb;
 	tx->coalesce_buf = NULL;
 	tx->wait = NULL;
-	tx->tlen = tx->packet_len = tlen;
+	tx->packet_len = tlen;
+	tx->tlen = tx->packet_len;
 	tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
 	tx->descs[0].qw[1] = 0;
 	if (flags & SDMA_TXREQ_F_AHG_COPY)
@@ -689,7 +613,7 @@ static inline int sdma_txinit(
 	struct sdma_txreq *tx,
 	u16 flags,
 	u16 tlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
 }
@@ -753,7 +677,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
 		dd->default_desc1;
 	if (tx->flags & SDMA_TXREQ_F_URGENT)
 		tx->descp[tx->num_desc].qw[1] |=
-			(SDMA_DESC1_HEAD_TO_HOST_FLAG|
+			(SDMA_DESC1_HEAD_TO_HOST_FLAG |
 			 SDMA_DESC1_INT_REQ_FLAG);
 }
 
@@ -1080,6 +1004,7 @@ struct sdma_map_elem {
 
 /**
  * struct sdma_map_el - mapping for a vl
+ * @engine_to_vl - map of an engine to a vl
  * @list - rcu head for free callback
  * @mask - vl mask to "mod" the vl to produce an index to map array
  * @actual_vls - number of vls
@@ -1091,6 +1016,7 @@ struct sdma_map_elem {
  * in turn point to an array of sde's for that vl.
  */
 struct sdma_vl_map {
+	s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
 	struct rcu_head list;
 	u32 mask;
 	u8 actual_vls;
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h
new file mode 100644
index 000000000000..bf7d777d756e
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/sdma_txreq.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_SDMA_TXREQ_H
+#define HFI1_SDMA_TXREQ_H
+
+/* increased for AHG */
+#define NUM_DESC 6
+
+/*
+ * struct sdma_desc - canonical fragment descriptor
+ *
+ * This is the descriptor carried in the tx request
+ * corresponding to each fragment.
+ *
+ */
+struct sdma_desc {
+	/* private:  don't use directly */
+	u64 qw[2];
+};
+
+/**
+ * struct sdma_txreq - the sdma_txreq structure (one per packet)
+ * @list: for use by user and by queuing for wait
+ *
+ * This is the representation of a packet which consists of some
+ * number of fragments.   Storage is provided to within the structure.
+ * for all fragments.
+ *
+ * The storage for the descriptors are automatically extended as needed
+ * when the currently allocation is exceeded.
+ *
+ * The user (Verbs or PSM) may overload this structure with fields
+ * specific to their use by putting this struct first in their struct.
+ * The method of allocation of the overloaded structure is user dependent
+ *
+ * The list is the only public field in the structure.
+ *
+ */
+
+#define SDMA_TXREQ_S_OK        0
+#define SDMA_TXREQ_S_SENDERROR 1
+#define SDMA_TXREQ_S_ABORTED   2
+#define SDMA_TXREQ_S_SHUTDOWN  3
+
+/* flags bits */
+#define SDMA_TXREQ_F_URGENT       0x0001
+#define SDMA_TXREQ_F_AHG_COPY     0x0002
+#define SDMA_TXREQ_F_USE_AHG      0x0004
+
+struct sdma_txreq;
+typedef void (*callback_t)(struct sdma_txreq *, int);
+
+struct iowait;
+struct sdma_txreq {
+	struct list_head list;
+	/* private: */
+	struct sdma_desc *descp;
+	/* private: */
+	void *coalesce_buf;
+	/* private: */
+	struct iowait *wait;
+	/* private: */
+	callback_t                  complete;
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	u64 sn;
+#endif
+	/* private: - used in coalesce/pad processing */
+	u16                         packet_len;
+	/* private: - down-counted to trigger last */
+	u16                         tlen;
+	/* private: */
+	u16                         num_desc;
+	/* private: */
+	u16                         desc_limit;
+	/* private: */
+	u16                         next_descq_idx;
+	/* private: */
+	u16 coalesce_idx;
+	/* private: flags */
+	u16                         flags;
+	/* private: */
+	struct sdma_desc descs[NUM_DESC];
+};
+
+static inline int sdma_txreq_built(struct sdma_txreq *tx)
+{
+	return tx->num_desc;
+}
+
+#endif                          /* HFI1_SDMA_TXREQ_H */
diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c
deleted file mode 100644
index 67786d417493..000000000000
--- a/drivers/staging/rdma/hfi1/srq.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "verbs.h"
-
-/**
- * hfi1_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
-{
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libibverbs when creating a user SRQ
- */
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata)
-{
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-	struct hfi1_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > hfi1_max_srq_sges ||
-	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct hfi1_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    hfi1_create_mmap_info(dev, s, ibpd->uobject->context,
-					  srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == hfi1_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * hfi1_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for libibverbs.so
- */
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata)
-{
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct hfi1_rwq *owq;
-		struct hfi1_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > hfi1_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct hfi1_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head and tail pointer values and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		tail = owq->tail;
-		if (head >= srq->rq.size || tail >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct hfi1_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct hfi1_rwqe *)((char *)p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct hfi1_mmap_info *ip = srq->ip;
-			struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct hfi1_rwq) + size * sz;
-
-			hfi1_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See hfi1_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			/*
-			 * Put user mapping info onto the pending list
-			 * unless it already is on the list.
-			 */
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * hfi1_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int hfi1_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c
index 1dd6727dd5ef..c7f1271190af 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/staging/rdma/hfi1/sysfs.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,6 @@
 #include "mad.h"
 #include "trace.h"
 
-
 /*
  * Start of per-port congestion control structures and support code
  */
@@ -62,8 +58,8 @@
  * Congestion control table size followed by table entries
  */
 static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				 struct bin_attribute *bin_attr,
+				 char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -84,7 +80,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -99,10 +95,6 @@ static void port_release(struct kobject *kobj)
 	/* nothing to do since memory is freed by hfi1_free_devdata() */
 }
 
-static struct kobj_type port_cc_ktype = {
-	.release = port_release,
-};
-
 static struct bin_attribute cc_table_bin_attr = {
 	.attr = {.name = "cc_table_bin", .mode = 0444},
 	.read = read_cc_table_bin,
@@ -115,8 +107,8 @@ static struct bin_attribute cc_table_bin_attr = {
  * trigger threshold and the minimum injection rate delay.
  */
 static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				   struct bin_attribute *bin_attr,
+				   char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -135,7 +127,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -151,6 +143,68 @@ static struct bin_attribute cc_setting_bin_attr = {
 	.size = PAGE_SIZE,
 };
 
+struct hfi1_port_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct hfi1_pportdata *, char *);
+	ssize_t	(*store)(struct hfi1_pportdata *, const char *, size_t);
+};
+
+static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+{
+	return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+}
+
+static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+				size_t count)
+{
+	if (!memcmp(buf, "on", 2))
+		ppd->cc_prescan = true;
+	else if (!memcmp(buf, "off", 3))
+		ppd->cc_prescan = false;
+
+	return count;
+}
+
+static struct hfi1_port_attr cc_prescan_attr =
+		__ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
+
+static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
+			    char *buf)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->show(ppd, buf);
+}
+
+static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->store(ppd, buf, count);
+}
+
+static const struct sysfs_ops port_cc_sysfs_ops = {
+	.show = cc_attr_show,
+	.store = cc_attr_store
+};
+
+static struct attribute *port_cc_default_attributes[] = {
+	&cc_prescan_attr.attr
+};
+
+static struct kobj_type port_cc_ktype = {
+	.release = port_release,
+	.sysfs_ops = &port_cc_sysfs_ops,
+	.default_attrs = port_cc_default_attributes
+};
+
 /* Start sc2vl */
 #define HFI1_SC2VL_ATTR(N)				    \
 	static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
@@ -196,7 +250,6 @@ HFI1_SC2VL_ATTR(29);
 HFI1_SC2VL_ATTR(30);
 HFI1_SC2VL_ATTR(31);
 
-
 static struct attribute *sc2vl_default_attributes[] = {
 	&hfi1_sc2vl_attr_0.attr,
 	&hfi1_sc2vl_attr_1.attr,
@@ -302,7 +355,6 @@ HFI1_SL2SC_ATTR(29);
 HFI1_SL2SC_ATTR(30);
 HFI1_SL2SC_ATTR(31);
 
-
 static struct attribute *sl2sc_default_attributes[] = {
 	&hfi1_sl2sc_attr_0.attr,
 	&hfi1_sl2sc_attr_1.attr,
@@ -435,7 +487,6 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
 	.default_attrs = vl2mtu_default_attributes
 };
 
-
 /* end of per-port file structures and support code */
 
 /*
@@ -446,7 +497,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -455,7 +506,7 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -470,19 +521,18 @@ static ssize_t show_boardversion(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
 }
 
-
 static ssize_t show_nctxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/*
@@ -497,10 +547,10 @@ static ssize_t show_nctxts(struct device *device,
 }
 
 static ssize_t show_nfreectxts(struct device *device,
-			   struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -511,11 +561,10 @@ static ssize_t show_serial(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
-
 }
 
 static ssize_t store_chip_reset(struct device *device,
@@ -523,7 +572,7 @@ static ssize_t store_chip_reset(struct device *device,
 				size_t count)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -552,7 +601,7 @@ static ssize_t show_tempsense(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	struct hfi1_temp temp;
 	int ret;
@@ -608,8 +657,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 
 	if (!port_num || port_num > dd->num_pports) {
 		dd_dev_err(dd,
-			"Skipping infiniband class with invalid port %u\n",
-			port_num);
+			   "Skipping infiniband class with invalid port %u\n",
+			   port_num);
 		return -ENODEV;
 	}
 	ppd = &dd->pport[port_num - 1];
@@ -644,39 +693,36 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 	}
 	kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
 
-
 	ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
 				   kobj, "CCMgtA");
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_vl2mtu;
 	}
 
 	kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_setting_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc;
 	}
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_table_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc_entry_bin;
 	}
 
 	dd_dev_info(dd,
-		"IB%u: Congestion Control Agent enabled for port %d\n",
-		dd->unit, port_num);
+		    "IB%u: Congestion Control Agent enabled for port %d\n",
+		    dd->unit, port_num);
 
 	return 0;
 
@@ -700,7 +746,7 @@ bail:
  */
 int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c
index 10122e84cb2f..8b62fefcf903 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/staging/rdma/hfi1/trace.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -109,17 +106,17 @@ const char *parse_everbs_hdrs(
 	case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		trace_seq_printf(p, IMM_PRN,
-			be32_to_cpu(eh->imm_data));
+				 be32_to_cpu(eh->imm_data));
 		break;
 	/* reth + imm */
 	case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, RETH_PRN " " IMM_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length),
-			be32_to_cpu(eh->rc.imm_data));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length),
+				 be32_to_cpu(eh->rc.imm_data));
 		break;
 	/* reth */
 	case OP(RC, RDMA_READ_REQUEST):
@@ -128,10 +125,10 @@ const char *parse_everbs_hdrs(
 	case OP(RC, RDMA_WRITE_ONLY):
 	case OP(UC, RDMA_WRITE_ONLY):
 		trace_seq_printf(p, RETH_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length));
 		break;
 	case OP(RC, RDMA_READ_RESPONSE_FIRST):
 	case OP(RC, RDMA_READ_RESPONSE_LAST):
@@ -154,19 +151,20 @@ const char *parse_everbs_hdrs(
 	case OP(RC, COMPARE_SWAP):
 	case OP(RC, FETCH_ADD):
 		trace_seq_printf(p, ATOMICETH_PRN,
-			(unsigned long long)ib_u64_get(eh->atomic_eth.vaddr),
-			eh->atomic_eth.rkey,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->atomic_eth.swap_data),
-			(unsigned long long) ib_u64_get(
+				 (unsigned long long)ib_u64_get(
+				 eh->atomic_eth.vaddr),
+				 eh->atomic_eth.rkey,
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->atomic_eth.swap_data),
+				 (unsigned long long)ib_u64_get(
 				 (__be32 *)&eh->atomic_eth.compare_data));
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
 	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, DETH_PRN,
-			be32_to_cpu(eh->ud.deth[0]),
-			be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK);
+				 be32_to_cpu(eh->ud.deth[0]),
+				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
 		break;
 	}
 	trace_seq_putc(p, 0);
@@ -187,12 +185,12 @@ const char *parse_sdma_flags(
 	trace_seq_printf(p, "%s", flags);
 	if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG)
 		trace_seq_printf(p, " amode:%u aidx:%u alen:%u",
-			(u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT)
-				& SDMA_DESC1_HEADER_MODE_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT)
-				& SDMA_DESC1_HEADER_INDEX_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT)
-				& SDMA_DESC1_HEADER_DWS_MASK));
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) &
+				      SDMA_DESC1_HEADER_MODE_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) &
+				      SDMA_DESC1_HEADER_INDEX_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) &
+				      SDMA_DESC1_HEADER_DWS_MASK));
 	return ret;
 }
 
@@ -234,3 +232,4 @@ __hfi1_trace_fn(DC8051);
 __hfi1_trace_fn(FIRMWARE);
 __hfi1_trace_fn(RCVCTRL);
 __hfi1_trace_fn(TID);
+__hfi1_trace_fn(MMU);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h
index 86c12ebfd4f0..963dc948c38a 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/staging/rdma/hfi1/trace.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -76,304 +73,295 @@ __print_symbolic(etype,                         \
 #define TRACE_SYSTEM hfi1_rx
 
 TRACE_EVENT(hfi1_rcvhdr,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 u64 eflags,
-		 u32 ctxt,
-		 u32 etype,
-		 u32 hlen,
-		 u32 tlen,
-		 u32 updegr,
-		 u32 etail),
-	TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u64, eflags)
-		__field(u32, ctxt)
-		__field(u32, etype)
-		__field(u32, hlen)
-		__field(u32, tlen)
-		__field(u32, updegr)
-		__field(u32, etail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->eflags = eflags;
-		__entry->ctxt = ctxt;
-		__entry->etype = etype;
-		__entry->hlen = hlen;
-		__entry->tlen = tlen;
-		__entry->updegr = updegr;
-		__entry->etail = etail;
-	),
-	TP_printk(
-"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->eflags,
-		__entry->etype, show_packettype(__entry->etype),
-		__entry->hlen,
-		__entry->tlen,
-		__entry->updegr,
-		__entry->etail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     u64 eflags,
+		     u32 ctxt,
+		     u32 etype,
+		     u32 hlen,
+		     u32 tlen,
+		     u32 updegr,
+		     u32 etail
+		     ),
+	    TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u64, eflags)
+			     __field(u32, ctxt)
+			     __field(u32, etype)
+			     __field(u32, hlen)
+			     __field(u32, tlen)
+			     __field(u32, updegr)
+			     __field(u32, etail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->eflags = eflags;
+			   __entry->ctxt = ctxt;
+			   __entry->etype = etype;
+			   __entry->hlen = hlen;
+			   __entry->tlen = tlen;
+			   __entry->updegr = updegr;
+			   __entry->etail = etail;
+			   ),
+	    TP_printk(
+		      "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->eflags,
+		      __entry->etype, show_packettype(__entry->etype),
+		      __entry->hlen,
+		      __entry->tlen,
+		      __entry->updegr,
+		      __entry->etail
+		      )
 );
 
 TRACE_EVENT(hfi1_receive_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
-	TP_ARGS(dd, ctxt),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, ctxt)
-		__field(u8, slow_path)
-		__field(u8, dma_rtail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->ctxt = ctxt;
-		if (dd->rcd[ctxt]->do_interrupt ==
-		    &handle_receive_interrupt) {
-			__entry->slow_path = 1;
-			__entry->dma_rtail = 0xFF;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			&handle_receive_interrupt_dma_rtail){
-			__entry->dma_rtail = 1;
-			__entry->slow_path = 0;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			 &handle_receive_interrupt_nodma_rtail) {
-			__entry->dma_rtail = 0;
-			__entry->slow_path = 0;
-		}
-	),
-	TP_printk(
-		"[%s] ctxt %d SlowPath: %d DmaRtail: %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->slow_path,
-		__entry->dma_rtail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+	    TP_ARGS(dd, ctxt),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, ctxt)
+			     __field(u8, slow_path)
+			     __field(u8, dma_rtail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = ctxt;
+			   if (dd->rcd[ctxt]->do_interrupt ==
+			       &handle_receive_interrupt) {
+				__entry->slow_path = 1;
+				__entry->dma_rtail = 0xFF;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_dma_rtail){
+				__entry->dma_rtail = 1;
+				__entry->slow_path = 0;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_nodma_rtail) {
+				__entry->dma_rtail = 0;
+				__entry->slow_path = 0;
+			   }
+			   ),
+	    TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->slow_path,
+		      __entry->dma_rtail
+		      )
 );
 
-const char *print_u64_array(struct trace_seq *, u64 *, int);
+TRACE_EVENT(hfi1_exp_tid_reg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
+		     u32 npages, unsigned long va, unsigned long pa,
+		     dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+	    TP_STRUCT__entry(
+		    __field(unsigned, ctxt)
+		    __field(u16, subctxt)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
+		    ),
+	    TP_fast_assign(
+		    __entry->ctxt = ctxt;
+		    __entry->subctxt = subctxt;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
+		    ),
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
+		    )
+	);
 
-TRACE_EVENT(hfi1_exp_tid_map,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, int dir,
-		     unsigned long *maps, u16 count),
-	    TP_ARGS(ctxt, subctxt, dir, maps, count),
+TRACE_EVENT(hfi1_exp_tid_unreg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
+		     unsigned long va, unsigned long pa, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(int, dir)
-		    __field(u16, count)
-		    __dynamic_array(unsigned long, maps, sizeof(*maps) * count)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->dir = dir;
-		    __entry->count = count;
-		    memcpy(__get_dynamic_array(maps), maps,
-			   sizeof(*maps) * count);
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%3u:%02u] %s tidmaps %s",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      (__entry->dir ? ">" : "<"),
-		      print_u64_array(p, __get_dynamic_array(maps),
-				      __entry->count)
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_set,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long vaddr, u64 phys_addr, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page),
+TRACE_EVENT(hfi1_exp_tid_inval,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
+		     u32 npages, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, vaddr)
-		    __field(u64, phys_addr)
-		    __field(void *, page)
+		    __field(unsigned long, va)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->vaddr = vaddr;
-		    __entry->phys_addr = phys_addr;
-		    __entry->page = page;
+		    __entry->va = va;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->vaddr,
-		      __entry->phys_addr,
-		      __entry->page
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_free,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long phys, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, phys, page),
+TRACE_EVENT(hfi1_mmu_invalidate,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
+		     unsigned long start, unsigned long end),
+	    TP_ARGS(ctxt, subctxt, type, start, end),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, phys)
-		    __field(void *, page)
+		    __string(type, type)
+		    __field(unsigned long, start)
+		    __field(unsigned long, end)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->phys = phys;
-		    __entry->page = page;
+		    __assign_str(type, type);
+		    __entry->start = start;
+		    __entry->end = end;
 		    ),
-	    TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p",
+	    TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->phys,
-		      __entry->page
+		      __get_str(type),
+		      __entry->start,
+		      __entry->end
 		    )
 	);
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_tx
 
 TRACE_EVENT(hfi1_piofree,
-	TP_PROTO(struct send_context *sc, int extra),
-	TP_ARGS(sc, extra),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(int, extra)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->extra = extra;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) extra %d",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->extra
-	)
+	    TP_PROTO(struct send_context *sc, int extra),
+	    TP_ARGS(sc, extra),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(int, extra)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->extra = extra;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) extra %d",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->extra
+		      )
 );
 
 TRACE_EVENT(hfi1_wantpiointr,
-	TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
-	TP_ARGS(sc, needint, credit_ctrl),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(u32, needint)
-		__field(u64, credit_ctrl)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->needint = needint;
-		__entry->credit_ctrl = credit_ctrl;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->needint,
-		(unsigned long long)__entry->credit_ctrl
-	)
+	    TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+	    TP_ARGS(sc, needint, credit_ctrl),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(u32, needint)
+			     __field(u64, credit_ctrl)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->needint = needint;
+			   __entry->credit_ctrl = credit_ctrl;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->needint,
+		      (unsigned long long)__entry->credit_ctrl
+		       )
 );
 
 DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 flags),
-	TP_ARGS(qp, flags),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, flags)
-		__field(u32, s_flags)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->flags = flags;
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->flags,
-		__entry->s_flags
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 flags),
+		    TP_ARGS(qp, flags),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			    __field(u32, qpn)
+			    __field(u32, flags)
+			    __field(u32, s_flags)
+			    ),
+		    TP_fast_assign(
+			    DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			    __entry->flags = flags;
+			    __entry->qpn = qp->ibqp.qp_num;
+			    __entry->s_flags = qp->s_flags;
+			    ),
+		    TP_printk(
+			    "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+			    __get_str(dev),
+			    __entry->qpn,
+			    __entry->flags,
+			    __entry->s_flags
+			    )
 );
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_qphash
-DECLARE_EVENT_CLASS(hfi1_qphash_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, bucket)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->bucket = bucket;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x bucket %u",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->bucket
-	)
-);
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_ibhdrs
 
 u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(
-	struct trace_seq *p,
-	u8 opcode,
-	void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
 
 #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
 
-const char *parse_sdma_flags(
-	struct trace_seq *p,
-	u64 desc0, u64 desc1);
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
 
 #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
 
-
 #define lrh_name(lrh) { HFI1_##lrh, #lrh }
 #define show_lnh(lrh)                    \
 __print_symbolic(lrh,                    \
@@ -420,7 +408,6 @@ __print_symbolic(opcode,                                   \
 	ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE),       \
 	ib_opcode_name(CNP))
 
-
 #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
 #define BTH_PRN \
 	"op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
@@ -428,124 +415,130 @@ __print_symbolic(opcode,                                   \
 #define EHDR_PRN "%s"
 
 DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 struct hfi1_ib_header *hdr),
-	TP_ARGS(dd, hdr),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		/* LRH */
-		__field(u8, vl)
-		__field(u8, lver)
-		__field(u8, sl)
-		__field(u8, lnh)
-		__field(u16, dlid)
-		__field(u16, len)
-		__field(u16, slid)
-		/* BTH */
-		__field(u8, opcode)
-		__field(u8, se)
-		__field(u8, m)
-		__field(u8, pad)
-		__field(u8, tver)
-		__field(u16, pkey)
-		__field(u8, f)
-		__field(u8, b)
-		__field(u32, qpn)
-		__field(u8, a)
-		__field(u32, psn)
-		/* extended headers */
-		__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
-	),
-	TP_fast_assign(
-		struct hfi1_other_headers *ohdr;
-
-		DD_DEV_ASSIGN(dd);
-		/* LRH */
-		__entry->vl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
-		__entry->lver =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
-		__entry->sl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-		__entry->lnh =
-			(u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-		__entry->dlid =
-			be16_to_cpu(hdr->lrh[1]);
-		/* allow for larger len */
-		__entry->len =
-			be16_to_cpu(hdr->lrh[2]);
-		__entry->slid =
-			be16_to_cpu(hdr->lrh[3]);
-		/* BTH */
-		if (__entry->lnh == HFI1_LRH_BTH)
-			ohdr = &hdr->u.oth;
-		else
-			ohdr = &hdr->u.l.oth;
-		__entry->opcode =
-			(be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-		__entry->se =
-			(be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
-		__entry->m =
-			 (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
-		__entry->pad =
-			(be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		__entry->tver =
-			(be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
-		__entry->pkey =
-			be32_to_cpu(ohdr->bth[0]) & 0xffff;
-		__entry->f =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT)
-			& HFI1_FECN_MASK;
-		__entry->b =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT)
-			& HFI1_BECN_MASK;
-		__entry->qpn =
-			be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		__entry->a =
-			(be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
-		/* allow for larger PSN */
-		__entry->psn =
-			be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
-		/* extended headers */
-		 memcpy(
-			__get_dynamic_array(ehdrs),
-			&ohdr->u,
-			ibhdr_exhdr_len(hdr));
-	),
-	TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
-		__get_str(dev),
-		/* LRH */
-		__entry->vl,
-		__entry->lver,
-		__entry->sl,
-		__entry->lnh, show_lnh(__entry->lnh),
-		__entry->dlid,
-		__entry->len,
-		__entry->slid,
-		/* BTH */
-		__entry->opcode, show_ib_opcode(__entry->opcode),
-		__entry->se,
-		__entry->m,
-		__entry->pad,
-		__entry->tver,
-		__entry->pkey,
-		__entry->f,
-		__entry->b,
-		__entry->qpn,
-		__entry->a,
-		__entry->psn,
-		/* extended headers */
-		__parse_ib_ehdrs(
-			__entry->opcode,
-			(void *)__get_dynamic_array(ehdrs))
-	)
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct hfi1_ib_header *hdr),
+		    TP_ARGS(dd, hdr),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd)
+			    /* LRH */
+			    __field(u8, vl)
+			    __field(u8, lver)
+			    __field(u8, sl)
+			    __field(u8, lnh)
+			    __field(u16, dlid)
+			    __field(u16, len)
+			    __field(u16, slid)
+			    /* BTH */
+			    __field(u8, opcode)
+			    __field(u8, se)
+			    __field(u8, m)
+			    __field(u8, pad)
+			    __field(u8, tver)
+			    __field(u16, pkey)
+			    __field(u8, f)
+			    __field(u8, b)
+			    __field(u32, qpn)
+			    __field(u8, a)
+			    __field(u32, psn)
+			    /* extended headers */
+			    __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+			    ),
+		    TP_fast_assign(
+			   struct hfi1_other_headers *ohdr;
+
+			   DD_DEV_ASSIGN(dd);
+			   /* LRH */
+			   __entry->vl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+			   __entry->lver =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+			   __entry->sl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+			   __entry->lnh =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+			   __entry->dlid =
+			   be16_to_cpu(hdr->lrh[1]);
+			   /* allow for larger len */
+			   __entry->len =
+			   be16_to_cpu(hdr->lrh[2]);
+			   __entry->slid =
+			   be16_to_cpu(hdr->lrh[3]);
+			   /* BTH */
+			   if (__entry->lnh == HFI1_LRH_BTH)
+				ohdr = &hdr->u.oth;
+			   else
+				ohdr = &hdr->u.l.oth;
+			  __entry->opcode =
+			  (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+			  __entry->se =
+			  (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+			  __entry->m =
+			  (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+			  __entry->pad =
+			  (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+			  __entry->tver =
+			  (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+			  __entry->pkey =
+			  be32_to_cpu(ohdr->bth[0]) & 0xffff;
+			  __entry->f =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+			  HFI1_FECN_MASK;
+			  __entry->b =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+			  HFI1_BECN_MASK;
+			  __entry->qpn =
+			  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+			  __entry->a =
+			  (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+			  /* allow for larger PSN */
+			  __entry->psn =
+			  be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+			  /* extended headers */
+			  memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+				 ibhdr_exhdr_len(hdr));
+			 ),
+		    TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+			      __get_str(dev),
+			      /* LRH */
+			      __entry->vl,
+			      __entry->lver,
+			      __entry->sl,
+			      __entry->lnh, show_lnh(__entry->lnh),
+			      __entry->dlid,
+			      __entry->len,
+			      __entry->slid,
+			      /* BTH */
+			      __entry->opcode, show_ib_opcode(__entry->opcode),
+			      __entry->se,
+			      __entry->m,
+			      __entry->pad,
+			      __entry->tver,
+			      __entry->pkey,
+			      __entry->f,
+			      __entry->b,
+			      __entry->qpn,
+			      __entry->a,
+			      __entry->psn,
+			      /* extended headers */
+			      __parse_ib_ehdrs(
+					__entry->opcode,
+					(void *)__get_dynamic_array(ehdrs))
+			     )
 );
 
 DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
-DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
@@ -556,15 +549,14 @@ DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_snoop
 
-
 TRACE_EVENT(snoop_capture,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 int hdr_len,
-		 struct hfi1_ib_header *hdr,
-		 int data_len,
-		 void *data),
-	TP_ARGS(dd, hdr_len, hdr, data_len, data),
-	TP_STRUCT__entry(
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     int hdr_len,
+		     struct hfi1_ib_header *hdr,
+		     int data_len,
+		     void *data),
+	    TP_ARGS(dd, hdr_len, hdr, data_len, data),
+	    TP_STRUCT__entry(
 		DD_DEV_ENTRY(dd)
 		__field(u16, slid)
 		__field(u16, dlid)
@@ -577,8 +569,8 @@ TRACE_EVENT(snoop_capture,
 		__field(u8, lnh)
 		__dynamic_array(u8, raw_hdr, hdr_len)
 		__dynamic_array(u8, raw_pkt, data_len)
-	),
-	TP_fast_assign(
+		),
+	    TP_fast_assign(
 		struct hfi1_other_headers *ohdr;
 
 		__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -589,7 +581,7 @@ TRACE_EVENT(snoop_capture,
 		DD_DEV_ASSIGN(dd);
 		__entry->slid = be16_to_cpu(hdr->lrh[3]);
 		__entry->dlid = be16_to_cpu(hdr->lrh[1]);
-		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		__entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
 		__entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
 		__entry->pkey =	be32_to_cpu(ohdr->bth[0]) & 0xffff;
@@ -597,8 +589,9 @@ TRACE_EVENT(snoop_capture,
 		__entry->data_len = data_len;
 		memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
 		memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-	),
-	TP_printk("[%s] " SNOOP_PRN,
+		),
+	    TP_printk(
+		"[%s] " SNOOP_PRN,
 		__get_str(dev),
 		__entry->slid,
 		__entry->dlid,
@@ -609,7 +602,7 @@ TRACE_EVENT(snoop_capture,
 		__entry->pkey,
 		__entry->hdr_len,
 		__entry->data_len
-	)
+		)
 );
 
 #undef TRACE_SYSTEM
@@ -621,41 +614,39 @@ TRACE_EVENT(snoop_capture,
 TRACE_EVENT(hfi1_uctxtdata,
 	    TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
 	    TP_ARGS(dd, uctxt),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(u32, credits)
-		    __field(u64, hw_free)
-		    __field(u64, piobase)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u64, rcvhdrq_phys)
-		    __field(u32, eager_cnt)
-		    __field(u64, rcvegr_phys)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = uctxt->ctxt;
-		    __entry->credits = uctxt->sc->credits;
-		    __entry->hw_free = (u64)uctxt->sc->hw_free;
-		    __entry->piobase = (u64)uctxt->sc->base_addr;
-		    __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-		    __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
-		    __entry->eager_cnt = uctxt->egrbufs.alloced;
-		    __entry->rcvegr_phys = uctxt->egrbufs.rcvtids[0].phys;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u " UCTXT_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->credits,
-		    __entry->hw_free,
-		    __entry->piobase,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_phys,
-		    __entry->eager_cnt,
-		    __entry->rcvegr_phys
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(u32, credits)
+			     __field(u64, hw_free)
+			     __field(u64, piobase)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u64, rcvhdrq_phys)
+			     __field(u32, eager_cnt)
+			     __field(u64, rcvegr_phys)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = uctxt->ctxt;
+			   __entry->credits = uctxt->sc->credits;
+			   __entry->hw_free = (u64)uctxt->sc->hw_free;
+			   __entry->piobase = (u64)uctxt->sc->base_addr;
+			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+			   __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+			   __entry->eager_cnt = uctxt->egrbufs.alloced;
+			   __entry->rcvegr_phys =
+			   uctxt->egrbufs.rcvtids[0].phys;
+			   ),
+	    TP_printk("[%s] ctxt %u " UCTXT_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->credits,
+		      __entry->hw_free,
+		      __entry->piobase,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_phys,
+		      __entry->eager_cnt,
+		      __entry->rcvegr_phys
+		      )
+);
 
 #define CINFO_FMT \
 	"egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
@@ -663,38 +654,35 @@ TRACE_EVENT(hfi1_ctxt_info,
 	    TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
 		     struct hfi1_ctxt_info cinfo),
 	    TP_ARGS(dd, ctxt, subctxt, cinfo),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(unsigned, subctxt)
-		    __field(u16, egrtids)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u16, rcvhdrq_size)
-		    __field(u16, sdma_ring_size)
-		    __field(u32, rcvegr_size)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = ctxt;
-		    __entry->subctxt = subctxt;
-		    __entry->egrtids = cinfo.egrtids;
-		    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
-		    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
-		    __entry->sdma_ring_size = cinfo.sdma_ring_size;
-		    __entry->rcvegr_size = cinfo.rcvegr_size;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u:%u " CINFO_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->subctxt,
-		    __entry->egrtids,
-		    __entry->rcvegr_size,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_size,
-		    __entry->sdma_ring_size
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(unsigned, subctxt)
+			     __field(u16, egrtids)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u16, rcvhdrq_size)
+			     __field(u16, sdma_ring_size)
+			     __field(u32, rcvegr_size)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			    __entry->ctxt = ctxt;
+			    __entry->subctxt = subctxt;
+			    __entry->egrtids = cinfo.egrtids;
+			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
+			    __entry->rcvegr_size = cinfo.rcvegr_size;
+			    ),
+	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __entry->egrtids,
+		      __entry->rcvegr_size,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_size,
+		      __entry->sdma_ring_size
+		      )
+);
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_sma
@@ -708,52 +696,48 @@ TRACE_EVENT(hfi1_ctxt_info,
 	)
 
 DECLARE_EVENT_CLASS(hfi1_bct_template,
-	TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
-	TP_ARGS(dd, bc),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__dynamic_array(u8, bct, sizeof(*bc))
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		memcpy(
-			__get_dynamic_array(bct),
-			bc,
-			sizeof(*bc));
-	),
-	TP_printk(BCT_FORMAT,
-		BCT(overall_shared_limit),
-
-		BCT(vl[0].dedicated),
-		BCT(vl[0].shared),
-
-		BCT(vl[1].dedicated),
-		BCT(vl[1].shared),
-
-		BCT(vl[2].dedicated),
-		BCT(vl[2].shared),
-
-		BCT(vl[3].dedicated),
-		BCT(vl[3].shared),
-
-		BCT(vl[4].dedicated),
-		BCT(vl[4].shared),
-
-		BCT(vl[5].dedicated),
-		BCT(vl[5].shared),
-
-		BCT(vl[6].dedicated),
-		BCT(vl[6].shared),
-
-		BCT(vl[7].dedicated),
-		BCT(vl[7].shared),
-
-		BCT(vl[15].dedicated),
-		BCT(vl[15].shared)
-	)
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct buffer_control *bc),
+		    TP_ARGS(dd, bc),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+				     __dynamic_array(u8, bct, sizeof(*bc))
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(dd);
+				   memcpy(__get_dynamic_array(bct), bc,
+					  sizeof(*bc));
+				   ),
+		    TP_printk(BCT_FORMAT,
+			      BCT(overall_shared_limit),
+
+			      BCT(vl[0].dedicated),
+			      BCT(vl[0].shared),
+
+			      BCT(vl[1].dedicated),
+			      BCT(vl[1].shared),
+
+			      BCT(vl[2].dedicated),
+			      BCT(vl[2].shared),
+
+			      BCT(vl[3].dedicated),
+			      BCT(vl[3].shared),
+
+			      BCT(vl[4].dedicated),
+			      BCT(vl[4].shared),
+
+			      BCT(vl[5].dedicated),
+			      BCT(vl[5].shared),
+
+			      BCT(vl[6].dedicated),
+			      BCT(vl[6].shared),
+
+			      BCT(vl[7].dedicated),
+			      BCT(vl[7].shared),
+
+			      BCT(vl[15].dedicated),
+			      BCT(vl[15].shared)
+			      )
 );
 
-
 DEFINE_EVENT(hfi1_bct_template, bct_set,
 	     TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
 	     TP_ARGS(dd, bc));
@@ -766,252 +750,209 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
 #define TRACE_SYSTEM hfi1_sdma
 
 TRACE_EVENT(hfi1_sdma_descriptor,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 desc0,
-		u64 desc1,
-		u16 e,
-		void *descp),
+	    TP_PROTO(struct sdma_engine *sde,
+		     u64 desc0,
+		     u64 desc1,
+		     u16 e,
+		     void *descp),
 	TP_ARGS(sde, desc0, desc1, e, descp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(void *, descp)
-		__field(u64, desc0)
-		__field(u64, desc1)
-		__field(u16, e)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->desc0 = desc0;
-		__entry->desc1 = desc1;
-		__entry->idx = sde->this_idx;
-		__entry->descp = descp;
-		__entry->e = e;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(void *, descp)
+			 __field(u64, desc0)
+			 __field(u64, desc1)
+			 __field(u16, e)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->desc0 = desc0;
+		       __entry->desc1 = desc1;
+		       __entry->idx = sde->this_idx;
+		       __entry->descp = descp;
+		       __entry->e = e;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
-		__get_str(dev),
-		__entry->idx,
-		__parse_sdma_flags(__entry->desc0, __entry->desc1),
-		(__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT)
-			& SDMA_DESC0_PHY_ADDR_MASK,
-		(u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT)
-			& SDMA_DESC1_GENERATION_MASK),
-		(u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT)
-			& SDMA_DESC0_BYTE_COUNT_MASK),
-		__entry->desc0,
-		__entry->desc1,
-		__entry->descp,
-		__entry->e
-	)
+		  "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __parse_sdma_flags(__entry->desc0, __entry->desc1),
+		  (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+		  SDMA_DESC0_PHY_ADDR_MASK,
+		  (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+		       SDMA_DESC1_GENERATION_MASK),
+		  (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+			SDMA_DESC0_BYTE_COUNT_MASK),
+		  __entry->desc0,
+		  __entry->desc1,
+		  __entry->descp,
+		  __entry->e
+		  )
 );
 
 TRACE_EVENT(hfi1_sdma_engine_select,
-	TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
-	TP_ARGS(dd, sel, vl, idx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, sel)
-		__field(u8, vl)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->sel = sel;
-		__entry->vl = vl;
-		__entry->idx = idx;
-	),
-	TP_printk(
-		"[%s] selecting SDE %u sel 0x%x vl %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sel,
-		__entry->vl
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+	    TP_ARGS(dd, sel, vl, idx),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, sel)
+			     __field(u8, vl)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->sel = sel;
+			   __entry->vl = vl;
+			   __entry->idx = idx;
+			   ),
+	    TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sel,
+		      __entry->vl
+		      )
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, status)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->status = status;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) status %llx",
-		__get_str(dev),
-		__entry->idx,
-		(unsigned long long)__entry->status
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 status),
+		    TP_ARGS(sde, status),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, status)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->status = status;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) status %llx",
+			      __get_str(dev),
+			      __entry->idx,
+			      (unsigned long long)__entry->status
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	),
-	TP_ARGS(sde, aidx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(int, aidx)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->idx = sde->this_idx;
-		__entry->aidx = aidx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) aidx %d",
-		__get_str(dev),
-		__entry->idx,
-		__entry->aidx
-	)
+		    TP_PROTO(struct sdma_engine *sde, int aidx),
+		    TP_ARGS(sde, aidx),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(int, aidx)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->idx = sde->this_idx;
+				   __entry->aidx = aidx;
+				   ),
+		    TP_printk("[%s] SDE(%u) aidx %d",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->aidx
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 TRACE_EVENT(hfi1_sdma_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
-	),
-	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-		__entry->sn = txp ? txp->sn : ~0;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead,
+		     u16 swhead,
+		     struct sdma_txreq *txp
+		     ),
+	    TP_ARGS(sde, hwhead, swhead, txp),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __field(u64, sn)
+			     __field(u16, hwhead)
+			     __field(u16, swhead)
+			     __field(u16, txnext)
+			     __field(u16, tx_tail)
+			     __field(u16, tx_head)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+			   __entry->hwhead = hwhead;
+			   __entry->swhead = swhead;
+			   __entry->tx_tail = sde->tx_tail;
+			   __entry->tx_head = sde->tx_head;
+			   __entry->txnext = txp ? txp->next_descq_idx : ~0;
+			   __entry->idx = sde->this_idx;
+			   __entry->sn = txp ? txp->sn : ~0;
+			   ),
+	    TP_printk(
+		      "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sn,
+		      __entry->hwhead,
+		      __entry->swhead,
+		      __entry->txnext,
+		      __entry->tx_head,
+		      __entry->tx_tail
+		      )
 );
 #else
 TRACE_EVENT(hfi1_sdma_progress,
-	    TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead, u16 swhead,
+		     struct sdma_txreq *txp
 	    ),
 	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(u16, hwhead)
+			 __field(u16, swhead)
+			 __field(u16, txnext)
+			 __field(u16, tx_tail)
+			 __field(u16, tx_head)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->hwhead = hwhead;
+		       __entry->swhead = swhead;
+		       __entry->tx_tail = sde->tx_tail;
+		       __entry->tx_head = sde->tx_head;
+		       __entry->txnext = txp ? txp->next_descq_idx : ~0;
+		       __entry->idx = sde->this_idx;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+		  "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __entry->hwhead,
+		  __entry->swhead,
+		  __entry->txnext,
+		  __entry->tx_head,
+		  __entry->tx_tail
+		  )
 );
 #endif
 
 DECLARE_EVENT_CLASS(hfi1_sdma_sn,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	),
-	TP_ARGS(sde, sn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->sn = sn;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 sn),
+		    TP_ARGS(sde, sn),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, sn)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->sn = sn;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) sn %llu",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->sn
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
@@ -1023,10 +964,7 @@ DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, u64 sn),
 	     TP_ARGS(sde, sn)
 );
 
@@ -1227,88 +1165,85 @@ TRACE_EVENT(hfi1_sdma_user_header_ahg,
 	);
 
 TRACE_EVENT(hfi1_sdma_state,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		const char *cstate,
-		const char *nstate
-	),
-	TP_ARGS(sde, cstate, nstate),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__string(curstate, cstate)
-		__string(newstate, nstate)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__assign_str(curstate, cstate);
-		__assign_str(newstate, nstate);
-	),
+	    TP_PROTO(struct sdma_engine *sde,
+		     const char *cstate,
+		     const char *nstate
+		     ),
+	    TP_ARGS(sde, cstate, nstate),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __string(curstate, cstate)
+			     __string(newstate, nstate)
+			     ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __assign_str(curstate, cstate);
+		       __assign_str(newstate, nstate);
+		       ),
 	TP_printk("[%s] current state %s new state %s",
-		__get_str(dev),
-		__get_str(curstate),
-		__get_str(newstate)
-	)
+		  __get_str(dev),
+		  __get_str(curstate),
+		  __get_str(newstate)
+		  )
 );
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_rc
 
 DECLARE_EVENT_CLASS(hfi1_rc_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 psn),
-	TP_ARGS(qp, psn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, s_flags)
-		__field(u32, psn)
-		__field(u32, s_psn)
-		__field(u32, s_next_psn)
-		__field(u32, s_sending_psn)
-		__field(u32, s_sending_hpsn)
-		__field(u32, r_psn)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-		__entry->psn = psn;
-		__entry->s_psn = qp->s_psn;
-		__entry->s_next_psn = qp->s_next_psn;
-		__entry->s_sending_psn = qp->s_sending_psn;
-		__entry->s_sending_hpsn = qp->s_sending_hpsn;
-		__entry->r_psn = qp->r_psn;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->s_flags,
-		__entry->psn,
-		__entry->s_psn,
-		__entry->s_next_psn,
-		__entry->s_sending_psn,
-		__entry->s_sending_hpsn,
-		__entry->r_psn
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 psn),
+		    TP_ARGS(qp, psn),
+		    TP_STRUCT__entry(
+			DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			__field(u32, qpn)
+			__field(u32, s_flags)
+			__field(u32, psn)
+			__field(u32, s_psn)
+			__field(u32, s_next_psn)
+			__field(u32, s_sending_psn)
+			__field(u32, s_sending_hpsn)
+			__field(u32, r_psn)
+			),
+		    TP_fast_assign(
+			DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			__entry->qpn = qp->ibqp.qp_num;
+			__entry->s_flags = qp->s_flags;
+			__entry->psn = psn;
+			__entry->s_psn = qp->s_psn;
+			__entry->s_next_psn = qp->s_next_psn;
+			__entry->s_sending_psn = qp->s_sending_psn;
+			__entry->s_sending_hpsn = qp->s_sending_hpsn;
+			__entry->r_psn = qp->r_psn;
+			),
+		    TP_printk(
+			"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+			__get_str(dev),
+			__entry->qpn,
+			__entry->s_flags,
+			__entry->psn,
+			__entry->s_psn,
+			__entry->s_next_psn,
+			__entry->s_sending_psn,
+			__entry->s_sending_hpsn,
+			__entry->r_psn
+			)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
@@ -1316,21 +1251,20 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
 #define TRACE_SYSTEM hfi1_misc
 
 TRACE_EVENT(hfi1_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
-		 int src),
-	TP_ARGS(dd, is_entry, src),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__array(char, buf, 64)
-		__field(int, src)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd)
-		is_entry->is_name(__entry->buf, 64, src - is_entry->start);
-		__entry->src = src;
-	),
-	TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
-		  __entry->src)
+	    TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+		     int src),
+	    TP_ARGS(dd, is_entry, src),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __array(char, buf, 64)
+			     __field(int, src)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd)
+			   is_entry->is_name(__entry->buf, 64,
+					     src - is_entry->start);
+			   __entry->src = src;
+			   ),
+	    TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+		      __entry->src)
 );
 
 /*
@@ -1345,21 +1279,21 @@ TRACE_EVENT(hfi1_interrupt,
 #define MAX_MSG_LEN 512
 
 DECLARE_EVENT_CLASS(hfi1_trace_template,
-	TP_PROTO(const char *function, struct va_format *vaf),
-	TP_ARGS(function, vaf),
-	TP_STRUCT__entry(
-		__string(function, function)
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		__assign_str(function, function);
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-		     MAX_MSG_LEN, vaf->fmt,
-		     *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("(%s) %s",
-		  __get_str(function),
-		  __get_str(msg))
+		    TP_PROTO(const char *function, struct va_format *vaf),
+		    TP_ARGS(function, vaf),
+		    TP_STRUCT__entry(__string(function, function)
+				     __dynamic_array(char, msg, MAX_MSG_LEN)
+				     ),
+		    TP_fast_assign(__assign_str(function, function);
+				   WARN_ON_ONCE(vsnprintf
+						(__get_dynamic_array(msg),
+						 MAX_MSG_LEN, vaf->fmt,
+						 *vaf->va) >=
+						MAX_MSG_LEN);
+				   ),
+		    TP_printk("(%s) %s",
+			      __get_str(function),
+			      __get_str(msg))
 );
 
 /*
@@ -1406,6 +1340,7 @@ __hfi1_trace_def(DC8051);
 __hfi1_trace_def(FIRMWARE);
 __hfi1_trace_def(RCVCTRL);
 __hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
 
 #define hfi1_cdbg(which, fmt, ...) \
 	__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c
index ea54fd2700ad..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/staging/rdma/hfi1/twsi.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -119,9 +116,9 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 	 * Allow for slow slaves by simple
 	 * delay for falling edge, sampling on rise.
 	 */
-	if (!bit)
+	if (!bit) {
 		udelay(2);
-	else {
+	} else {
 		int rise_usec;
 
 		for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
@@ -131,11 +128,24 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 		}
 		if (rise_usec <= 0)
 			dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
-				    SCL_WAIT_USEC);
+				   SCL_WAIT_USEC);
 	}
 	i2c_wait_for_writes(dd, target);
 }
 
+static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
+{
+	u32 read_val, mask;
+
+	mask = QSFP_HFI0_I2CCLK;
+	/* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+	hfi1_gpio_mod(dd, target, 0, 0, mask);
+	read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
+	if (wait)
+		i2c_wait_for_writes(dd, target);
+	return (read_val & mask) >> GPIO_SCL_NUM;
+}
+
 static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 {
 	u32 mask;
@@ -274,13 +284,12 @@ static void stop_cmd(struct hfi1_devdata *dd, u32 target)
 /**
  * hfi1_twsi_reset - reset I2C communication
  * @dd: the hfi1_ib device
+ * returns 0 if ok, -EIO on error
  */
-
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
 {
 	int clock_cycles_left = 9;
-	int was_high = 0;
-	u32 pins, mask;
+	u32 mask;
 
 	/* Both SCL and SDA should be high. If not, there
 	 * is something wrong.
@@ -294,43 +303,23 @@ int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
 	 */
 	hfi1_gpio_mod(dd, target, 0, 0, mask);
 
-	/*
-	 * Clock nine times to get all listeners into a sane state.
-	 * If SDA does not go high at any point, we are wedged.
-	 * One vendor recommends then issuing START followed by STOP.
-	 * we cannot use our "normal" functions to do that, because
-	 * if SCL drops between them, another vendor's part will
-	 * wedge, dropping SDA and keeping it low forever, at the end of
-	 * the next transaction (even if it was not the device addressed).
-	 * So our START and STOP take place with SCL held high.
+	/* Check if SCL is low, if it is low then we have a slave device
+	 * misbehaving and there is not much we can do.
+	 */
+	if (!scl_in(dd, target, 0))
+		return -EIO;
+
+	/* Check if SDA is low, if it is low then we have to clock SDA
+	 * up to 9 times for the device to release the bus
 	 */
 	while (clock_cycles_left--) {
+		if (sda_in(dd, target, 0))
+			return 0;
 		scl_out(dd, target, 0);
 		scl_out(dd, target, 1);
-		/* Note if SDA is high, but keep clocking to sync slave */
-		was_high |= sda_in(dd, target, 0);
-	}
-
-	if (was_high) {
-		/*
-		 * We saw a high, which we hope means the slave is sync'd.
-		 * Issue START, STOP, pause for T_BUF.
-		 */
-
-		pins = hfi1_gpio_mod(dd, target, 0, 0, 0);
-		if ((pins & mask) != mask)
-			dd_dev_err(dd, "GPIO pins not at rest: %d\n",
-				    pins & mask);
-		/* Drop SDA to issue START */
-		udelay(1); /* Guarantee .6 uSec setup */
-		sda_out(dd, target, 0);
-		udelay(1); /* Guarantee .6 uSec hold */
-		/* At this point, SCL is high, SDA low. Raise SDA for STOP */
-		sda_out(dd, target, 1);
-		udelay(TWSI_BUF_WAIT_USEC);
 	}
 
-	return !was_high;
+	return -EIO;
 }
 
 #define HFI1_TWSI_START 0x100
@@ -365,17 +354,25 @@ static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device from which data should
  * be read.
  */
 int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     void *buffer, int len)
 {
-	int ret;
 	u8 *bp = buffer;
+	int ret = 1;
+	int i;
+	int offset_size;
+
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
 
-	ret = 1;
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
 
 	if (dev == HFI1_TWSI_NO_DEV) {
 		/* legacy not-really-I2C */
@@ -383,34 +380,29 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
 	} else {
 		/* Actual I2C */
-		ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START);
-		if (ret) {
-			stop_cmd(dd, target);
-			ret = 1;
-			goto bail;
-		}
-		/*
-		 * SFF spec claims we do _not_ stop after the addr
-		 * but simply issue a start with the "read" dev-addr.
-		 * Since we are implicitly waiting for ACK here,
-		 * we need t_buf (nominally 20uSec) before that start,
-		 * and cannot rely on the delay built in to the STOP
-		 */
-		ret = twsi_wr(dd, target, addr, 0);
-		udelay(TWSI_BUF_WAIT_USEC);
+		if (offset_size) {
+			ret = twsi_wr(dd, target,
+				      dev | WRITE_CMD, HFI1_TWSI_START);
+			if (ret) {
+				stop_cmd(dd, target);
+				goto bail;
+			}
 
-		if (ret) {
-			dd_dev_err(dd,
-				"Failed to write interface read addr %02X\n",
-				addr);
-			ret = 1;
-			goto bail;
+			for (i = 0; i < offset_size; i++) {
+				ret = twsi_wr(dd, target,
+					      (addr >> (i * 8)) & 0xff, 0);
+				udelay(TWSI_BUF_WAIT_USEC);
+				if (ret) {
+					dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+						   i, addr);
+					goto bail;
+				}
+			}
 		}
 		ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
 	}
 	if (ret) {
 		stop_cmd(dd, target);
-		ret = 1;
 		goto bail;
 	}
 
@@ -442,76 +434,55 @@ bail:
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device to which data should
  * be written.
  */
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len)
 {
-	int sub_len;
 	const u8 *bp = buffer;
-	int max_wait_time, i;
 	int ret = 1;
+	int i;
+	int offset_size;
 
-	while (len > 0) {
-		if (dev == HFI1_TWSI_NO_DEV) {
-			if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
-				    HFI1_TWSI_START)) {
-				goto failed_write;
-			}
-		} else {
-			/* Real I2C */
-			if (twsi_wr(dd, target,
-				    dev | WRITE_CMD, HFI1_TWSI_START))
-				goto failed_write;
-			ret = twsi_wr(dd, target, addr, 0);
-			if (ret) {
-				dd_dev_err(dd,
-					"Failed to write interface write addr %02X\n",
-					addr);
-				goto failed_write;
-			}
-		}
-
-		sub_len = min(len, 4);
-		addr += sub_len;
-		len -= sub_len;
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
 
-		for (i = 0; i < sub_len; i++)
-			if (twsi_wr(dd, target, *bp++, 0))
-				goto failed_write;
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
 
-		stop_cmd(dd, target);
+	if (dev == HFI1_TWSI_NO_DEV) {
+		if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
+			    HFI1_TWSI_START)) {
+			goto failed_write;
+		}
+	} else {
+		/* Real I2C */
+		if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
+			goto failed_write;
+	}
 
-		/*
-		 * Wait for write complete by waiting for a successful
-		 * read (the chip replies with a zero after the write
-		 * cmd completes, and before it writes to the eeprom.
-		 * The startcmd for the read will fail the ack until
-		 * the writes have completed.   We do this inline to avoid
-		 * the debug prints that are in the real read routine
-		 * if the startcmd fails.
-		 * We also use the proper device address, so it doesn't matter
-		 * whether we have real eeprom_dev. Legacy likes any address.
-		 */
-		max_wait_time = 100;
-		while (twsi_wr(dd, target,
-			       dev | READ_CMD, HFI1_TWSI_START)) {
-			stop_cmd(dd, target);
-			if (!--max_wait_time)
-				goto failed_write;
+	for (i = 0; i < offset_size; i++) {
+		ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
+		udelay(TWSI_BUF_WAIT_USEC);
+		if (ret) {
+			dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+				   i, addr);
+			goto bail;
 		}
-		/* now read (and ignore) the resulting byte */
-		rd_byte(dd, target, 1);
 	}
 
+	for (i = 0; i < len; i++)
+		if (twsi_wr(dd, target, *bp++, 0))
+			goto failed_write;
+
 	ret = 0;
-	goto bail;
 
 failed_write:
 	stop_cmd(dd, target);
-	ret = 1;
 
 bail:
 	return ret;
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h
index 5907e029613d..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/staging/rdma/hfi1/twsi.h
@@ -1,14 +1,13 @@
 #ifndef _TWSI_H
 #define _TWSI_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,8 +51,9 @@
 
 struct hfi1_devdata;
 
-/* Bit position of SDA pin in ASIC_QSFP* registers  */
+/* Bit position of SDA/SCL pins in ASIC_QSFP* registers  */
 #define  GPIO_SDA_NUM 1
+#define  GPIO_SCL_NUM 0
 
 /* these functions must be called with qsfp_lock held */
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
@@ -64,5 +62,4 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len);
 
-
 #endif /* _TWSI_H */
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index 4f2a7889a852..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,71 +46,82 @@
  */
 
 #include "hfi.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
 
+/* only opcode mask for adaptive pio */
+const u32 uc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+
 /**
  * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_uc_req(struct hfi1_qp *qp)
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords = 5;
 	u32 bth0 = 0;
 	u32 len;
 	u32 pmtu = qp->pmtu;
-	int ret = 0;
 	int middle = 0;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] &
-		    HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head) {
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
 			clear_ahg(qp);
 			goto bail;
 		}
 		/*
 		 * Start a new request.
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -128,9 +136,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
@@ -157,9 +165,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the RETH */
@@ -188,9 +196,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -213,9 +221,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state =
 				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
@@ -231,19 +239,28 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			     mask_psn(qp->s_next_psn++), middle);
-done:
-	ret = 1;
-	goto unlock;
+			     mask_psn(qp->s_psn++), middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -266,7 +283,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_other_headers *ohdr = packet->ohdr;
 	u32 bth0, opcode;
 	u32 hdrsize = packet->hlen;
@@ -291,14 +308,14 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 			u16 rlid = be16_to_cpu(hdr->lrh[3]);
 			u8 sl, sc5;
 
-			lqpn = bth1 & HFI1_QPN_MASK;
+			lqpn = bth1 & RVT_QPN_MASK;
 			rqpn = qp->remote_qpn;
 
 			sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 			sl = ibp->sc_to_sl[sc5];
 
 			process_becn(ppd, sl, rlid, lqpn, rqpn,
-					IB_CC_SVCTYPE_UC);
+				     IB_CC_SVCTYPE_UC);
 		}
 
 		if (bth1 & HFI1_FECN_SMASK) {
@@ -331,10 +348,11 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
-		} else
-			hfi1_put_ss(&qp->r_sge);
+		} else {
+			rvt_put_ss(&qp->r_sge);
+		}
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		case OP(SEND_FIRST):
@@ -381,7 +399,7 @@ inv:
 		goto inv;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
@@ -390,10 +408,10 @@ inv:
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
 			qp->r_sge = qp->s_rdma_read_sge;
-		else {
-			ret = hfi1_get_rwqe(qp, 0);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 0);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -417,7 +435,7 @@ send_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto rewind;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
 		break;
 
 	case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -442,8 +460,8 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
-		hfi1_put_ss(&qp->s_rdma_read_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -468,9 +486,9 @@ last_imm:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (ohdr->bth[0] &
-				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (ohdr->bth[0] &
+			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
 
 	case OP(RDMA_WRITE_FIRST):
@@ -491,8 +509,8 @@ rdma_first:
 			int ok;
 
 			/* Check rkey */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
-					  vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto drop;
 			qp->r_sge.num_sge = 1;
@@ -503,9 +521,9 @@ rdma_first:
 			qp->r_sge.sge.length = 0;
 			qp->r_sge.sge.sge_length = 0;
 		}
-		if (opcode == OP(RDMA_WRITE_ONLY))
+		if (opcode == OP(RDMA_WRITE_ONLY)) {
 			goto rdma_last;
-		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
+		} else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
 			wc.ex.imm_data = ohdr->u.rc.imm_data;
 			goto rdma_last_imm;
 		}
@@ -517,7 +535,7 @@ rdma_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -535,10 +553,10 @@ rdma_last_imm:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-			hfi1_put_ss(&qp->s_rdma_read_sge);
-		else {
-			ret = hfi1_get_rwqe(qp, 1);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
+			rvt_put_ss(&qp->s_rdma_read_sge);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 1);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -546,8 +564,8 @@ rdma_last_imm:
 		}
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
 	case OP(RDMA_WRITE_LAST):
@@ -562,8 +580,8 @@ rdma_last:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		break;
 
 	default:
@@ -575,14 +593,12 @@ rdma_last:
 	return;
 
 rewind:
-	set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 
 op_err:
 	hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	return;
-
 }
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index bd1b402c1e14..ae8a70f703eb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,6 +50,7 @@
 
 #include "hfi.h"
 #include "mad.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /**
@@ -65,24 +63,25 @@
  * Note that the receive interrupt handler may be calling hfi1_ud_rcv()
  * while this is being called.
  */
-static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
+static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 	struct hfi1_pportdata *ppd;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
-	struct hfi1_sge_state ssge;
-	struct hfi1_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
 	rcu_read_lock();
 
-	qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    swqe->ud_wr.remote_qpn);
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		rcu_read_unlock();
 		return;
 	}
@@ -93,12 +92,12 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 	if (dqptype != sqptype ||
-	    !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -161,35 +160,36 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			goto bail_unlock;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 	}
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+	}
 	ssge.sg_list = swqe->sg_list + 1;
 	ssge.sge = *swqe->sg_list;
 	ssge.num_sge = swqe->wr.num_sge;
@@ -202,7 +202,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -210,7 +210,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -222,8 +222,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 		}
 		length -= len;
 	}
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -242,14 +242,14 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 	wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
 	/* Check for loopback when the port lid is not set */
 	if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
-		wc.slid = HFI1_PERMISSIVE_LID;
+		wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
 	wc.sl = ah_attr->sl;
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     swqe->wr.send_flags & IB_SEND_SOLICITED);
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
@@ -260,47 +260,53 @@ drop:
  * hfi1_make_ud_req - construct a UD request packet
  * @qp: the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_ud_req(struct hfi1_qp *qp)
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_ibport *ibp;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
 	u16 lrh0;
 	u16 lid;
-	int ret = 0;
 	int next_cur;
 	u8 sc5;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
@@ -308,13 +314,15 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
-	    ah_attr->dlid == HFI1_PERMISSIVE_LID) {
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+	    ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
-		if (unlikely(!loopback && (lid == ppd->lid ||
-		    (lid == HFI1_PERMISSIVE_LID &&
-		     qp->ibqp.qp_type == IB_QPT_GSI)))) {
+		if (unlikely(!loopback &&
+			     (lid == ppd->lid ||
+			      (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
+			      qp->ibqp.qp_type == IB_QPT_GSI)))) {
+			unsigned long flags;
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
@@ -322,16 +330,17 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 			 * Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 */
-			if (atomic_read(&qp->s_iowait.sdma_busy)) {
-				qp->s_flags |= HFI1_S_WAIT_DMA;
+			if (iowait_sdma_pending(&priv->s_iowait)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 			}
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
 			hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
-			goto done;
+			goto done_free_tx;
 		}
 	}
 
@@ -353,11 +362,12 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &ah_attr->grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&ah_attr->grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -365,37 +375,42 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = HFI1_LRH_BTH;
-		ohdr = &qp->s_hdr->ibh.u.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
 		ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
 		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-	} else
+	} else {
 		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+	}
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	lrh0 |= (ah_attr->sl & 0xf) << 4;
 	if (qp->ibqp.qp_type == IB_QPT_SMI) {
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-		qp->s_sc = 0xf;
+		priv->s_sc = 0xf;
 	} else {
 		lrh0 |= (sc5 & 0xf) << 12;
-		qp->s_sc = sc5;
+		priv->s_sc = sc5;
 	}
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->ibh.lrh[2] =
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+	ps->s_txreq->sde = priv->s_sde;
+	priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+	ps->s_txreq->psc = priv->s_sendcontext;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE))
-		qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
-	else {
+	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+		ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+	} else {
 		lid = ppd->lid;
 		if (lid) {
 			lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-			qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid);
-		} else
-			qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
+			ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
+		} else {
+			ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+		}
 	}
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
@@ -406,7 +421,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 		bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
+	ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -415,20 +430,28 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 					 qp->qkey : wqe->ud_wr.remote_qkey);
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 	/* disarm any ahg */
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->sde = NULL;
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+
+	return 1;
 
-done:
-	ret = 1;
-	goto unlock;
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /*
@@ -476,7 +499,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
 	return -1;
 }
 
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh)
 {
@@ -550,7 +573,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
  * opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
  */
 static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
-			 struct hfi1_qp *qp, u16 slid, struct opa_smp *smp)
+			 struct rvt_qp *qp, u16 slid, struct opa_smp *smp)
 {
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
@@ -607,7 +630,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
 	case IB_MGMT_METHOD_REPORT_RESP:
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return 0;
 		if (pkey == FULL_MGMT_P_KEY) {
 			smp->status |= IB_SMP_UNSUP_METHOD;
@@ -624,7 +647,6 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
 	return 0;
 }
 
-
 /**
  * hfi1_ud_rcv - receive an incoming UD packet
  * @ibp: the port the packet came in on
@@ -654,7 +676,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
 	bool sc4_bit = has_sc4_bit(packet);
 	u8 sc;
@@ -663,18 +685,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	struct ib_grh *grh = NULL;
 
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 	dlid = be16_to_cpu(hdr->lrh[1]);
-	is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) &&
-			(dlid != HFI1_PERMISSIVE_LID);
+	is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+			(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 	bth1 = be32_to_cpu(ohdr->bth[1]);
 	if (unlikely(bth1 & HFI1_BECN_SMASK)) {
 		/*
 		 * In pre-B0 h/w the CNP_OPCODE is handled via an
-		 * error path (errata 291394).
+		 * error path.
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		u8 sl, sc5;
 
 		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -750,7 +772,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 			mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 			if (mgmt_pkey_idx < 0)
 				goto drop;
-
 		}
 		if (unlikely(qkey != qp->qkey)) {
 			hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
@@ -788,7 +809,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 		if (mgmt_pkey_idx < 0)
 			goto drop;
-
 	}
 
 	if (qp->ibqp.qp_num > 1 &&
@@ -799,8 +819,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
 		wc.ex.imm_data = 0;
 		wc.wc_flags = 0;
-	} else
+	} else {
 		goto drop;
+	}
 
 	/*
 	 * A GRH is expected to precede the data even if not
@@ -811,36 +832,38 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			return;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 	}
 	if (has_grh) {
 		hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
-	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	}
+	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+		      1, 0);
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -862,8 +885,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 			}
 		}
 		wc.pkey_index = (unsigned)mgmt_pkey_idx;
-	} else
+	} else {
 		wc.pkey_index = 0;
+	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
 	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -873,15 +897,15 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
-	wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      (ohdr->bth[0] &
-			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     (ohdr->bth[0] &
+		      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
new file mode 100644
index 000000000000..0861e095df8d
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -0,0 +1,1044 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <asm/page.h>
+
+#include "user_exp_rcv.h"
+#include "trace.h"
+#include "mmu_rb.h"
+
+struct tid_group {
+	struct list_head list;
+	unsigned base;
+	u8 size;
+	u8 used;
+	u8 map;
+};
+
+struct tid_rb_node {
+	struct mmu_rb_node mmu;
+	unsigned long phys;
+	struct tid_group *grp;
+	u32 rcventry;
+	dma_addr_t dma_addr;
+	bool freed;
+	unsigned npages;
+	struct page *pages[0];
+};
+
+struct tid_pageset {
+	u16 idx;
+	u16 count;
+};
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define num_user_pages(vaddr, len)				       \
+	(1 + (((((unsigned long)(vaddr) +			       \
+		 (unsigned long)(len) - 1) & PAGE_MASK) -	       \
+	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
+			    struct rb_root *);
+static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
+static int set_rcvarray_entry(struct file *, unsigned long, u32,
+			      struct tid_group *, struct page **, unsigned);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
+			    struct tid_pageset *, unsigned, u16, struct page **,
+			    u32 *, unsigned *, unsigned *);
+static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+	.insert = mmu_rb_insert,
+	.remove = mmu_rb_remove,
+	.invalidate = mmu_rb_invalidate
+};
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+	u32 pair = rcventry & ~0x1;
+
+	return EXP_TID_SET(IDX, pair >> 1) |
+		EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+static inline void exp_tid_group_init(struct exp_tid_set *set)
+{
+	INIT_LIST_HEAD(&set->list);
+	set->count = 0;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+				    struct exp_tid_set *set)
+{
+	list_del_init(&grp->list);
+	set->count--;
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+				      struct exp_tid_set *set)
+{
+	list_add_tail(&grp->list, &set->list);
+	set->count++;
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+	struct tid_group *grp =
+		list_first_entry(&set->list, struct tid_group, list);
+	list_del_init(&grp->list);
+	set->count--;
+	return grp;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+				  struct exp_tid_set *s1,
+				  struct exp_tid_set *s2)
+{
+	tid_group_remove(group, s1);
+	tid_group_add_tail(group, s2);
+}
+
+/*
+ * Initialize context and file private data needed for Expected
+ * receive caching. This needs to be done after the context has
+ * been configured with the eager/expected RcvEntry counts.
+ */
+int hfi1_user_exp_rcv_init(struct file *fp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned tidbase;
+	int i, ret = 0;
+
+	spin_lock_init(&fd->tid_lock);
+	spin_lock_init(&fd->invalid_lock);
+	fd->tid_rb_root = RB_ROOT;
+
+	if (!uctxt->subctxt_cnt || !fd->subctxt) {
+		exp_tid_group_init(&uctxt->tid_group_list);
+		exp_tid_group_init(&uctxt->tid_used_list);
+		exp_tid_group_init(&uctxt->tid_full_list);
+
+		tidbase = uctxt->expected_base;
+		for (i = 0; i < uctxt->expected_count /
+			     dd->rcv_entries.group_size; i++) {
+			struct tid_group *grp;
+
+			grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+			if (!grp) {
+				/*
+				 * If we fail here, the groups already
+				 * allocated will be freed by the close
+				 * call.
+				 */
+				ret = -ENOMEM;
+				goto done;
+			}
+			grp->size = dd->rcv_entries.group_size;
+			grp->base = tidbase;
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			tidbase += dd->rcv_entries.group_size;
+		}
+	}
+
+	fd->entry_to_rb = kcalloc(uctxt->expected_count,
+				     sizeof(struct rb_node *),
+				     GFP_KERNEL);
+	if (!fd->entry_to_rb)
+		return -ENOMEM;
+
+	if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+		fd->invalid_tid_idx = 0;
+		fd->invalid_tids = kzalloc(uctxt->expected_count *
+					   sizeof(u32), GFP_KERNEL);
+		if (!fd->invalid_tids) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		/*
+		 * Register MMU notifier callbacks. If the registration
+		 * fails, continue but turn off the TID caching for
+		 * all user contexts.
+		 */
+		ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+		if (ret) {
+			dd_dev_info(dd,
+				    "Failed MMU notifier registration %d\n",
+				    ret);
+			HFI1_CAP_USET(TID_UNMAP);
+			ret = 0;
+		}
+	}
+
+	/*
+	 * PSM does not have a good way to separate, count, and
+	 * effectively enforce a limit on RcvArray entries used by
+	 * subctxts (when context sharing is used) when TID caching
+	 * is enabled. To help with that, we calculate a per-process
+	 * RcvArray entry share and enforce that.
+	 * If TID caching is not in use, PSM deals with usage on its
+	 * own. In that case, we allow any subctxt to take all of the
+	 * entries.
+	 *
+	 * Make sure that we set the tid counts only after successful
+	 * init.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+		u16 remainder;
+
+		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
+		remainder = uctxt->expected_count % uctxt->subctxt_cnt;
+		if (remainder && fd->subctxt < remainder)
+			fd->tid_limit++;
+	} else {
+		fd->tid_limit = uctxt->expected_count;
+	}
+	spin_unlock(&fd->tid_lock);
+done:
+	return ret;
+}
+
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_group *grp, *gptr;
+
+	/*
+	 * The notifier would have been removed when the process'es mm
+	 * was freed.
+	 */
+	if (!HFI1_CAP_IS_USET(TID_UNMAP))
+		hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+
+	kfree(fd->invalid_tids);
+
+	if (!uctxt->cnt) {
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_full_list,
+					&fd->tid_rb_root);
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_used_list,
+					&fd->tid_rb_root);
+		list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+					 list) {
+			list_del_init(&grp->list);
+			kfree(grp);
+		}
+		hfi1_clear_tids(uctxt);
+	}
+
+	kfree(fd->entry_to_rb);
+	return 0;
+}
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+	/*
+	 * Doing the WC fill writes only makes sense if the device is
+	 * present and the RcvArray has been mapped as WC memory.
+	 */
+	if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+		writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+/*
+ * RcvArray entry allocation for Expected Receives is done by the
+ * following algorithm:
+ *
+ * The context keeps 3 lists of groups of RcvArray entries:
+ *   1. List of empty groups - tid_group_list
+ *      This list is created during user context creation and
+ *      contains elements which describe sets (of 8) of empty
+ *      RcvArray entries.
+ *   2. List of partially used groups - tid_used_list
+ *      This list contains sets of RcvArray entries which are
+ *      not completely used up. Another mapping request could
+ *      use some of all of the remaining entries.
+ *   3. List of full groups - tid_full_list
+ *      This is the list where sets that are completely used
+ *      up go.
+ *
+ * An attempt to optimize the usage of RcvArray entries is
+ * made by finding all sets of physically contiguous pages in a
+ * user's buffer.
+ * These physically contiguous sets are further split into
+ * sizes supported by the receive engine of the HFI. The
+ * resulting sets of pages are stored in struct tid_pageset,
+ * which describes the sets as:
+ *    * .count - number of pages in this set
+ *    * .idx - starting index into struct page ** array
+ *                    of this set
+ *
+ * From this point on, the algorithm deals with the page sets
+ * described above. The number of pagesets is divided by the
+ * RcvArray group size to produce the number of full groups
+ * needed.
+ *
+ * Groups from the 3 lists are manipulated using the following
+ * rules:
+ *   1. For each set of 8 pagesets, a complete group from
+ *      tid_group_list is taken, programmed, and moved to
+ *      the tid_full_list list.
+ *   2. For all remaining pagesets:
+ *      2.1 If the tid_used_list is empty and the tid_group_list
+ *          is empty, stop processing pageset and return only
+ *          what has been programmed up to this point.
+ *      2.2 If the tid_used_list is empty and the tid_group_list
+ *          is not empty, move a group from tid_group_list to
+ *          tid_used_list.
+ *      2.3 For each group is tid_used_group, program as much as
+ *          can fit into the group. If the group becomes fully
+ *          used, move it to tid_full_list.
+ */
+int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0, need_group = 0, pinned;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+		tididx = 0, mapped, mapped_pages = 0;
+	unsigned long vaddr = tinfo->vaddr;
+	struct page **pages = NULL;
+	u32 *tidlist = NULL;
+	struct tid_pageset *pagesets = NULL;
+
+	/* Get the number of pages the user buffer spans */
+	npages = num_user_pages(vaddr, tinfo->length);
+	if (!npages)
+		return -EINVAL;
+
+	if (npages > uctxt->expected_count) {
+		dd_dev_err(dd, "Expected buffer too big\n");
+		return -EINVAL;
+	}
+
+	/* Verify that access is OK for the user buffer */
+	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
+		       npages * PAGE_SIZE)) {
+		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
+			   (void *)vaddr, npages);
+		return -EFAULT;
+	}
+
+	pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
+			   GFP_KERNEL);
+	if (!pagesets)
+		return -ENOMEM;
+
+	/* Allocate the array of struct page pointers needed for pinning */
+	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	/*
+	 * Pin all the pages of the user buffer. If we can't pin all the
+	 * pages, accept the amount pinned so far and program only that.
+	 * User space knows how to deal with partially programmed buffers.
+	 */
+	if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages))
+		return -ENOMEM;
+	pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+	if (pinned <= 0) {
+		ret = pinned;
+		goto bail;
+	}
+	fd->tid_n_pinned += npages;
+
+	/* Find sets of physically contiguous pages */
+	npagesets = find_phys_blocks(pages, pinned, pagesets);
+
+	/*
+	 * We don't need to access this under a lock since tid_used is per
+	 * process and the same process cannot be in hfi1_user_exp_rcv_clear()
+	 * and hfi1_user_exp_rcv_setup() at the same time.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (fd->tid_used + npagesets > fd->tid_limit)
+		pageset_count = fd->tid_limit - fd->tid_used;
+	else
+		pageset_count = npagesets;
+	spin_unlock(&fd->tid_lock);
+
+	if (!pageset_count)
+		goto bail;
+
+	ngroups = pageset_count / dd->rcv_entries.group_size;
+	tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
+	if (!tidlist) {
+		ret = -ENOMEM;
+		goto nomem;
+	}
+
+	tididx = 0;
+
+	/*
+	 * From this point on, we are going to be using shared (between master
+	 * and subcontexts) context resources. We need to take the lock.
+	 */
+	mutex_lock(&uctxt->exp_lock);
+	/*
+	 * The first step is to program the RcvArray entries which are complete
+	 * groups.
+	 */
+	while (ngroups && uctxt->tid_group_list.count) {
+		struct tid_group *grp =
+			tid_group_pop(&uctxt->tid_group_list);
+
+		ret = program_rcvarray(fp, vaddr, grp, pagesets,
+				       pageidx, dd->rcv_entries.group_size,
+				       pages, tidlist, &tididx, &mapped);
+		/*
+		 * If there was a failure to program the RcvArray
+		 * entries for the entire group, reset the grp fields
+		 * and add the grp back to the free group list.
+		 */
+		if (ret <= 0) {
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			hfi1_cdbg(TID,
+				  "Failed to program RcvArray group %d", ret);
+			goto unlock;
+		}
+
+		tid_group_add_tail(grp, &uctxt->tid_full_list);
+		ngroups--;
+		pageidx += ret;
+		mapped_pages += mapped;
+	}
+
+	while (pageidx < pageset_count) {
+		struct tid_group *grp, *ptr;
+		/*
+		 * If we don't have any partially used tid groups, check
+		 * if we have empty groups. If so, take one from there and
+		 * put in the partially used list.
+		 */
+		if (!uctxt->tid_used_list.count || need_group) {
+			if (!uctxt->tid_group_list.count)
+				goto unlock;
+
+			grp = tid_group_pop(&uctxt->tid_group_list);
+			tid_group_add_tail(grp, &uctxt->tid_used_list);
+			need_group = 0;
+		}
+		/*
+		 * There is an optimization opportunity here - instead of
+		 * fitting as many page sets as we can, check for a group
+		 * later on in the list that could fit all of them.
+		 */
+		list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
+					 list) {
+			unsigned use = min_t(unsigned, pageset_count - pageidx,
+					     grp->size - grp->used);
+
+			ret = program_rcvarray(fp, vaddr, grp, pagesets,
+					       pageidx, use, pages, tidlist,
+					       &tididx, &mapped);
+			if (ret < 0) {
+				hfi1_cdbg(TID,
+					  "Failed to program RcvArray entries %d",
+					  ret);
+				ret = -EFAULT;
+				goto unlock;
+			} else if (ret > 0) {
+				if (grp->used == grp->size)
+					tid_group_move(grp,
+						       &uctxt->tid_used_list,
+						       &uctxt->tid_full_list);
+				pageidx += ret;
+				mapped_pages += mapped;
+				need_group = 0;
+				/* Check if we are done so we break out early */
+				if (pageidx >= pageset_count)
+					break;
+			} else if (WARN_ON(ret == 0)) {
+				/*
+				 * If ret is 0, we did not program any entries
+				 * into this group, which can only happen if
+				 * we've screwed up the accounting somewhere.
+				 * Warn and try to continue.
+				 */
+				need_group = 1;
+			}
+		}
+	}
+unlock:
+	mutex_unlock(&uctxt->exp_lock);
+nomem:
+	hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
+		  mapped_pages, ret);
+	if (tididx) {
+		spin_lock(&fd->tid_lock);
+		fd->tid_used += tididx;
+		spin_unlock(&fd->tid_lock);
+		tinfo->tidcnt = tididx;
+		tinfo->length = mapped_pages * PAGE_SIZE;
+
+		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+				 tidlist, sizeof(tidlist[0]) * tididx)) {
+			/*
+			 * On failure to copy to the user level, we need to undo
+			 * everything done so far so we don't leak resources.
+			 */
+			tinfo->tidlist = (unsigned long)&tidlist;
+			hfi1_user_exp_rcv_clear(fp, tinfo);
+			tinfo->tidlist = 0;
+			ret = -EFAULT;
+			goto bail;
+		}
+	}
+
+	/*
+	 * If not everything was mapped (due to insufficient RcvArray entries,
+	 * for example), unpin all unmapped pages so we can pin them nex time.
+	 */
+	if (mapped_pages != pinned) {
+		hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+					pinned - mapped_pages,
+					false);
+		fd->tid_n_pinned -= pinned - mapped_pages;
+	}
+bail:
+	kfree(pagesets);
+	kfree(pages);
+	kfree(tidlist);
+	return ret > 0 ? 0 : ret;
+}
+
+int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	u32 *tidinfo;
+	unsigned tididx;
+
+	tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
+	if (!tidinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(tidinfo, (void __user *)(unsigned long)
+			   tinfo->tidlist, sizeof(tidinfo[0]) *
+			   tinfo->tidcnt)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&uctxt->exp_lock);
+	for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
+		ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+		if (ret) {
+			hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
+				  ret);
+			break;
+		}
+	}
+	spin_lock(&fd->tid_lock);
+	fd->tid_used -= tididx;
+	spin_unlock(&fd->tid_lock);
+	tinfo->tidcnt = tididx;
+	mutex_unlock(&uctxt->exp_lock);
+done:
+	kfree(tidinfo);
+	return ret;
+}
+
+int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	unsigned long *ev = uctxt->dd->events +
+		(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+		  HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
+	u32 *array;
+	int ret = 0;
+
+	if (!fd->invalid_tids)
+		return -EINVAL;
+
+	/*
+	 * copy_to_user() can sleep, which will leave the invalid_lock
+	 * locked and cause the MMU notifier to be blocked on the lock
+	 * for a long time.
+	 * Copy the data to a local buffer so we can release the lock.
+	 */
+	array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL);
+	if (!array)
+		return -EFAULT;
+
+	spin_lock(&fd->invalid_lock);
+	if (fd->invalid_tid_idx) {
+		memcpy(array, fd->invalid_tids, sizeof(*array) *
+		       fd->invalid_tid_idx);
+		memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
+		       fd->invalid_tid_idx);
+		tinfo->tidcnt = fd->invalid_tid_idx;
+		fd->invalid_tid_idx = 0;
+		/*
+		 * Reset the user flag while still holding the lock.
+		 * Otherwise, PSM can miss events.
+		 */
+		clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+	} else {
+		tinfo->tidcnt = 0;
+	}
+	spin_unlock(&fd->invalid_lock);
+
+	if (tinfo->tidcnt) {
+		if (copy_to_user((void __user *)tinfo->tidlist,
+				 array, sizeof(*array) * tinfo->tidcnt))
+			ret = -EFAULT;
+	}
+	kfree(array);
+
+	return ret;
+}
+
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+			    struct tid_pageset *list)
+{
+	unsigned pagecount, pageidx, setcount = 0, i;
+	unsigned long pfn, this_pfn;
+
+	if (!npages)
+		return 0;
+
+	/*
+	 * Look for sets of physically contiguous pages in the user buffer.
+	 * This will allow us to optimize Expected RcvArray entry usage by
+	 * using the bigger supported sizes.
+	 */
+	pfn = page_to_pfn(pages[0]);
+	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+		this_pfn = i < npages ? page_to_pfn(pages[i]) : 0;
+
+		/*
+		 * If the pfn's are not sequential, pages are not physically
+		 * contiguous.
+		 */
+		if (this_pfn != ++pfn) {
+			/*
+			 * At this point we have to loop over the set of
+			 * physically contiguous pages and break them down it
+			 * sizes supported by the HW.
+			 * There are two main constraints:
+			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
+			 *        If the total set size is bigger than that
+			 *        program only a MAX_EXPECTED_BUFFER chunk.
+			 *     2. The buffer size has to be a power of two. If
+			 *        it is not, round down to the closes power of
+			 *        2 and program that size.
+			 */
+			while (pagecount) {
+				int maxpages = pagecount;
+				u32 bufsize = pagecount * PAGE_SIZE;
+
+				if (bufsize > MAX_EXPECTED_BUFFER)
+					maxpages =
+						MAX_EXPECTED_BUFFER >>
+						PAGE_SHIFT;
+				else if (!is_power_of_2(bufsize))
+					maxpages =
+						rounddown_pow_of_two(bufsize) >>
+						PAGE_SHIFT;
+
+				list[setcount].idx = pageidx;
+				list[setcount].count = maxpages;
+				pagecount -= maxpages;
+				pageidx += maxpages;
+				setcount++;
+			}
+			pageidx = i;
+			pagecount = 1;
+			pfn = this_pfn;
+		} else {
+			pagecount++;
+		}
+	}
+	return setcount;
+}
+
+/**
+ * program_rcvarray() - program an RcvArray group with receive buffers
+ * @fp: file pointer
+ * @vaddr: starting user virtual address
+ * @grp: RcvArray group
+ * @sets: array of struct tid_pageset holding information on physically
+ *        contiguous chunks from the user buffer
+ * @start: starting index into sets array
+ * @count: number of struct tid_pageset's to program
+ * @pages: an array of struct page * for the user buffer
+ * @tidlist: the array of u32 elements when the information about the
+ *           programmed RcvArray entries is to be encoded.
+ * @tididx: starting offset into tidlist
+ * @pmapped: (output parameter) number of pages programmed into the RcvArray
+ *           entries.
+ *
+ * This function will program up to 'count' number of RcvArray entries from the
+ * group 'grp'. To make best use of write-combining writes, the function will
+ * perform writes to the unused RcvArray entries which will be ignored by the
+ * HW. Each RcvArray entry will be programmed with a physically contiguous
+ * buffer chunk from the user's virtual buffer.
+ *
+ * Return:
+ * -EINVAL if the requested count is larger than the size of the group,
+ * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
+ * number of RcvArray entries programmed.
+ */
+static int program_rcvarray(struct file *fp, unsigned long vaddr,
+			    struct tid_group *grp,
+			    struct tid_pageset *sets,
+			    unsigned start, u16 count, struct page **pages,
+			    u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	u16 idx;
+	u32 tidinfo = 0, rcventry, useidx = 0;
+	int mapped = 0;
+
+	/* Count should never be larger than the group size */
+	if (count > grp->size)
+		return -EINVAL;
+
+	/* Find the first unused entry in the group */
+	for (idx = 0; idx < grp->size; idx++) {
+		if (!(grp->map & (1 << idx))) {
+			useidx = idx;
+			break;
+		}
+		rcv_array_wc_fill(dd, grp->base + idx);
+	}
+
+	idx = 0;
+	while (idx < count) {
+		u16 npages, pageidx, setidx = start + idx;
+		int ret = 0;
+
+		/*
+		 * If this entry in the group is used, move to the next one.
+		 * If we go past the end of the group, exit the loop.
+		 */
+		if (useidx >= grp->size) {
+			break;
+		} else if (grp->map & (1 << useidx)) {
+			rcv_array_wc_fill(dd, grp->base + useidx);
+			useidx++;
+			continue;
+		}
+
+		rcventry = grp->base + useidx;
+		npages = sets[setidx].count;
+		pageidx = sets[setidx].idx;
+
+		ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+					 rcventry, grp, pages + pageidx,
+					 npages);
+		if (ret)
+			return ret;
+		mapped += npages;
+
+		tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
+			EXP_TID_SET(LEN, npages);
+		tidlist[(*tididx)++] = tidinfo;
+		grp->used++;
+		grp->map |= 1 << useidx++;
+		idx++;
+	}
+
+	/* Fill the rest of the group with "blank" writes */
+	for (; useidx < grp->size; useidx++)
+		rcv_array_wc_fill(dd, grp->base + useidx);
+	*pmapped = mapped;
+	return idx;
+}
+
+static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+			      u32 rcventry, struct tid_group *grp,
+			      struct page **pages, unsigned npages)
+{
+	int ret;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_rb_node *node;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct rb_root *root = &fd->tid_rb_root;
+	dma_addr_t phys;
+
+	/*
+	 * Allocate the node first so we can handle a potential
+	 * failure before we've programmed anything.
+	 */
+	node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
+		       GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	phys = pci_map_single(dd->pcidev,
+			      __va(page_to_phys(pages[0])),
+			      npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	if (dma_mapping_error(&dd->pcidev->dev, phys)) {
+		dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
+			   phys);
+		kfree(node);
+		return -EFAULT;
+	}
+
+	node->mmu.addr = vaddr;
+	node->mmu.len = npages * PAGE_SIZE;
+	node->phys = page_to_phys(pages[0]);
+	node->npages = npages;
+	node->rcventry = rcventry;
+	node->dma_addr = phys;
+	node->grp = grp;
+	node->freed = false;
+	memcpy(node->pages, pages, sizeof(struct page *) * npages);
+
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		ret = mmu_rb_insert(root, &node->mmu);
+	else
+		ret = hfi1_mmu_rb_insert(root, &node->mmu);
+
+	if (ret) {
+		hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+			  node->rcventry, node->mmu.addr, node->phys, ret);
+		pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+				 PCI_DMA_FROMDEVICE);
+		kfree(node);
+		return -EFAULT;
+	}
+	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
+	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+			       node->mmu.addr, node->phys, phys);
+	return 0;
+}
+
+static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+			      struct tid_group **grp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct tid_rb_node *node;
+	u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+	u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
+
+	if (tididx >= uctxt->expected_count) {
+		dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+			   tididx, uctxt->ctxt);
+		return -EINVAL;
+	}
+
+	if (tidctrl == 0x3)
+		return -EINVAL;
+
+	rcventry = tididx + (tidctrl - 1);
+
+	node = fd->entry_to_rb[rcventry];
+	if (!node || node->rcventry != (uctxt->expected_base + rcventry))
+		return -EBADF;
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
+	else
+		hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
+	if (grp)
+		*grp = node->grp;
+	clear_tid_node(fd, fd->subctxt, node);
+	return 0;
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
+			   struct tid_rb_node *node)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+
+	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
+				 node->npages, node->mmu.addr, node->phys,
+				 node->dma_addr);
+
+	hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
+	/*
+	 * Make sure device has seen the write before we unpin the
+	 * pages.
+	 */
+	flush_wc();
+
+	pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
+			 PCI_DMA_FROMDEVICE);
+	hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+	fd->tid_n_pinned -= node->npages;
+
+	node->grp->used--;
+	node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
+
+	if (node->grp->used == node->grp->size - 1)
+		tid_group_move(node->grp, &uctxt->tid_full_list,
+			       &uctxt->tid_used_list);
+	else if (!node->grp->used)
+		tid_group_move(node->grp, &uctxt->tid_used_list,
+			       &uctxt->tid_group_list);
+	kfree(node);
+}
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+			    struct exp_tid_set *set, struct rb_root *root)
+{
+	struct tid_group *grp, *ptr;
+	struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
+						tid_rb_root);
+	int i;
+
+	list_for_each_entry_safe(grp, ptr, &set->list, list) {
+		list_del_init(&grp->list);
+
+		for (i = 0; i < grp->size; i++) {
+			if (grp->map & (1 << i)) {
+				u16 rcventry = grp->base + i;
+				struct tid_rb_node *node;
+
+				node = fd->entry_to_rb[rcventry -
+							  uctxt->expected_base];
+				if (!node || node->rcventry != rcventry)
+					continue;
+				if (HFI1_CAP_IS_USET(TID_UNMAP))
+					mmu_rb_remove(&fd->tid_rb_root,
+						      &node->mmu, false);
+				else
+					hfi1_mmu_rb_remove(&fd->tid_rb_root,
+							   &node->mmu);
+				clear_tid_node(fd, -1, node);
+			}
+		}
+	}
+}
+
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+	struct tid_rb_node *node =
+		container_of(mnode, struct tid_rb_node, mmu);
+
+	if (node->freed)
+		return 0;
+
+	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+				 node->rcventry, node->npages, node->dma_addr);
+	node->freed = true;
+
+	spin_lock(&fdata->invalid_lock);
+	if (fdata->invalid_tid_idx < uctxt->expected_count) {
+		fdata->invalid_tids[fdata->invalid_tid_idx] =
+			rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+		fdata->invalid_tids[fdata->invalid_tid_idx] |=
+			EXP_TID_SET(LEN, node->npages);
+		if (!fdata->invalid_tid_idx) {
+			unsigned long *ev;
+
+			/*
+			 * hfi1_set_uevent_bits() sets a user event flag
+			 * for all processes. Because calling into the
+			 * driver to process TID cache invalidations is
+			 * expensive and TID cache invalidations are
+			 * handled on a per-process basis, we can
+			 * optimize this to set the flag only for the
+			 * process in question.
+			 */
+			ev = uctxt->dd->events +
+				(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+				  HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+			set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+		}
+		fdata->invalid_tid_idx++;
+	}
+	spin_unlock(&fdata->invalid_lock);
+	return 0;
+}
+
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
+	return 0;
+}
+
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
+			  bool notifier)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h
index 4f4876e1d353..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,6 +47,8 @@
  *
  */
 
+#include "hfi.h"
+
 #define EXP_TID_TIDLEN_MASK   0x7FFULL
 #define EXP_TID_TIDLEN_SHIFT  0
 #define EXP_TID_TIDCTRL_MASK  0x3ULL
@@ -71,4 +70,10 @@
 		(tid) |= EXP_TID_SET(field, (value));			\
 	} while (0)
 
+int hfi1_user_exp_rcv_init(struct file *);
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
+int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+
 #endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c
index 692de658f0dc..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/staging/rdma/hfi1/user_pages.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,36 +48,62 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 
-/**
- * hfi1_map_page - a safety wrapper around pci_map_page()
+static unsigned long cache_size = 256;
+module_param(cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
+
+/*
+ * Determine whether the caller can pin pages.
+ *
+ * This function should be used in the implementation of buffer caches.
+ * The cache implementation should call this function prior to attempting
+ * to pin buffer pages in order to determine whether they should do so.
+ * The function computes cache limits based on the configured ulimit and
+ * cache size. Use of this function is especially important for caches
+ * which are not limited in any other way (e.g. by HW resources) and, thus,
+ * could keeping caching buffers.
  *
  */
-dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page,
-			 unsigned long offset, size_t size, int direction)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
 {
-	dma_addr_t phys;
+	unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
+		size = (cache_size * (1UL << 20)); /* convert to bytes */
+	unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+	bool can_lock = capable(CAP_IPC_LOCK);
 
-	phys = pci_map_page(hwdev, page, offset, size, direction);
+	/*
+	 * Calculate per-cache size. The calculation below uses only a quarter
+	 * of the available per-context limit. This leaves space for other
+	 * pinning. Should we worry about shared ctxts?
+	 */
+	cache_limit = (ulimit / usr_ctxts) / 4;
 
-	return phys;
-}
+	/* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
+	if (ulimit != (-1UL) && size > cache_limit)
+		size = cache_limit;
 
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
-			    struct page **pages)
-{
-	unsigned long pinned, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-	bool can_lock = capable(CAP_IPC_LOCK);
-	int ret;
+	/* Convert to number of pages */
+	size = DIV_ROUND_UP(size, PAGE_SIZE);
 
 	down_read(&current->mm->mmap_sem);
 	pinned = current->mm->pinned_vm;
 	up_read(&current->mm->mmap_sem);
 
-	if (pinned + npages > lock_limit && !can_lock)
-		return -ENOMEM;
+	/* First, check the absolute limit against all pinned pages. */
+	if (pinned + npages >= ulimit && !can_lock)
+		return false;
+
+	return ((nlocked + npages) <= size) || can_lock;
+}
+
+int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
+			    struct page **pages)
+{
+	int ret;
 
 	ret = get_user_pages_fast(vaddr, npages, writable, pages);
 	if (ret < 0)
@@ -93,7 +116,8 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
 	return ret;
 }
 
-void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+			     size_t npages, bool dirty)
 {
 	size_t i;
 
@@ -103,9 +127,9 @@ void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
 		put_page(p[i]);
 	}
 
-	if (current->mm) { /* during close after signal, mm can be NULL */
-		down_write(&current->mm->mmap_sem);
-		current->mm->pinned_vm -= npages;
-		up_write(&current->mm->mmap_sem);
+	if (mm) { /* during close after signal, mm can be NULL */
+		down_write(&mm->mmap_sem);
+		mm->pinned_vm -= npages;
+		up_write(&mm->mmap_sem);
 	}
 }
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index d3de771a0770..ab6b6a42000f 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -67,10 +64,10 @@
 #include "hfi.h"
 #include "sdma.h"
 #include "user_sdma.h"
-#include "sdma.h"
 #include "verbs.h"  /* for the headers */
 #include "common.h" /* for struct hfi1_tid_info */
 #include "trace.h"
+#include "mmu_rb.h"
 
 static uint hfi1_sdma_comp_ring_size = 128;
 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
@@ -147,7 +144,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 
 /* Last packet in the request */
 #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
 
 #define SDMA_REQ_IN_USE     0
 #define SDMA_REQ_FOR_THREAD 1
@@ -171,16 +167,28 @@ static unsigned initial_pkt_count = 8;
 #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
 
 struct user_sdma_iovec {
+	struct list_head list;
 	struct iovec iov;
 	/* number of pages in this vector */
 	unsigned npages;
 	/* array of pinned pages for this vector */
 	struct page **pages;
-	/* offset into the virtual address space of the vector at
-	 * which we last left off. */
+	/*
+	 * offset into the virtual address space of the vector at
+	 * which we last left off.
+	 */
 	u64 offset;
 };
 
+struct sdma_mmu_node {
+	struct mmu_rb_node rb;
+	struct list_head list;
+	struct hfi1_user_sdma_pkt_q *pq;
+	atomic_t refcount;
+	struct page **pages;
+	unsigned npages;
+};
+
 struct user_sdma_request {
 	struct sdma_req_info info;
 	struct hfi1_user_sdma_pkt_q *pq;
@@ -214,15 +222,6 @@ struct user_sdma_request {
 	 */
 	u8 omfactor;
 	/*
-	 * pointer to the user's mm_struct. We are going to
-	 * get a reference to it so it doesn't get freed
-	 * since we might not be in process context when we
-	 * are processing the iov's.
-	 * Using this mm_struct, we can get vma based on the
-	 * iov's address (find_vma()).
-	 */
-	struct mm_struct *user_mm;
-	/*
 	 * We copy the iovs for this request (based on
 	 * info.iovcnt). These are only the data vectors
 	 */
@@ -239,13 +238,12 @@ struct user_sdma_request {
 	u16 tididx;
 	u32 sent;
 	u64 seqnum;
+	u64 seqcomp;
+	u64 seqsubmitted;
 	struct list_head txps;
-	spinlock_t txcmp_lock;  /* protect txcmp list */
-	struct list_head txcmp;
 	unsigned long flags;
 	/* status of the last txreq completed */
 	int status;
-	struct work_struct worker;
 };
 
 /*
@@ -260,11 +258,6 @@ struct user_sdma_txreq {
 	struct sdma_txreq txreq;
 	struct list_head list;
 	struct user_sdma_request *req;
-	struct {
-		struct user_sdma_iovec *vec;
-		u8 flags;
-	} iovecs[3];
-	int idx;
 	u16 flags;
 	unsigned busycount;
 	u64 seqnum;
@@ -280,21 +273,21 @@ struct user_sdma_txreq {
 
 static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
 static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
-static void user_sdma_delayed_completion(struct work_struct *);
-static void user_sdma_free_request(struct user_sdma_request *);
+static void user_sdma_txreq_cb(struct sdma_txreq *, int);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+static void user_sdma_free_request(struct user_sdma_request *, bool);
 static int pin_vector_pages(struct user_sdma_request *,
 			    struct user_sdma_iovec *);
-static void unpin_vector_pages(struct user_sdma_request *,
-			       struct user_sdma_iovec *);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
 static int check_header_template(struct user_sdma_request *,
 				 struct hfi1_pkt_header *, u32, u32);
 static int set_txreq_header(struct user_sdma_request *,
 			    struct user_sdma_txreq *, u32);
 static int set_txreq_header_ahg(struct user_sdma_request *,
 				struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct user_sdma_request *,
-					enum hfi1_sdma_comp_state, int);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+				  struct hfi1_user_sdma_comp_q *,
+				  u16, enum hfi1_sdma_comp_state, int);
 static inline u32 set_pkt_bth_psn(__be32, u8, u32);
 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
 
@@ -304,6 +297,17 @@ static int defer_packet_queue(
 	struct sdma_txreq *,
 	unsigned seq);
 static void activate_packet_queue(struct iowait *, int);
+static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
+static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+
+static struct mmu_rb_ops sdma_rb_ops = {
+	.filter = sdma_rb_filter,
+	.insert = sdma_rb_insert,
+	.remove = sdma_rb_remove,
+	.invalidate = sdma_rb_invalidate
+};
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
@@ -346,7 +350,7 @@ static void activate_packet_queue(struct iowait *wait, int reason)
 
 static void sdma_kmem_cache_ctor(void *obj)
 {
-	struct user_sdma_txreq *tx = (struct user_sdma_txreq *)obj;
+	struct user_sdma_txreq *tx = obj;
 
 	memset(tx, 0, sizeof(*tx));
 }
@@ -381,7 +385,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 		goto pq_nomem;
 
 	memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
-	pq->reqs = kmalloc(memsize, GFP_KERNEL);
+	pq->reqs = kzalloc(memsize, GFP_KERNEL);
 	if (!pq->reqs)
 		goto pq_reqs_nomem;
 
@@ -393,9 +397,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	pq->state = SDMA_PKT_Q_INACTIVE;
 	atomic_set(&pq->n_reqs, 0);
 	init_waitqueue_head(&pq->wait);
+	pq->sdma_rb_root = RB_ROOT;
+	INIT_LIST_HEAD(&pq->evict);
+	spin_lock_init(&pq->evict_lock);
 
 	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
-		    activate_packet_queue);
+		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
 	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
 		 fd->subctxt);
@@ -414,8 +421,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	if (!cq)
 		goto cq_nomem;
 
-	memsize = ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size,
-			PAGE_SIZE);
+	memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
 	cq->comps = vmalloc_user(memsize);
 	if (!cq->comps)
 		goto cq_comps_nomem;
@@ -423,6 +429,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	cq->nentries = hfi1_sdma_comp_ring_size;
 	fd->cq = cq;
 
+	ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+	if (ret) {
+		dd_dev_err(dd, "Failed to register with MMU %d", ret);
+		goto done;
+	}
+
 	spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 	list_add(&pq->list, &uctxt->sdma_queues);
 	spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
@@ -452,6 +464,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
 	hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
 		  uctxt->ctxt, fd->subctxt);
 	pq = fd->pq;
+	hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
 	if (pq) {
 		spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 		if (!list_empty(&pq->list))
@@ -468,8 +481,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
 		fd->pq = NULL;
 	}
 	if (fd->cq) {
-		if (fd->cq->comps)
-			vfree(fd->cq->comps);
+		vfree(fd->cq->comps);
 		kfree(fd->cq);
 		fd->cq = NULL;
 	}
@@ -479,7 +491,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
 int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 				   unsigned long dim, unsigned long *count)
 {
-	int ret = 0, i = 0, sent;
+	int ret = 0, i = 0;
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@ -505,9 +517,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 			  dd->unit, uctxt->ctxt, fd->subctxt, ret);
 		return -EFAULT;
 	}
+
 	trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
 				     (u16 *)&info);
-	if (cq->comps[info.comp_idx].status == QUEUED) {
+	if (cq->comps[info.comp_idx].status == QUEUED ||
+	    test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
 		hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
 			  dd->unit, uctxt->ctxt, fd->subctxt,
 			  info.comp_idx);
@@ -534,10 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	req->cq = cq;
 	req->status = -1;
 	INIT_LIST_HEAD(&req->txps);
-	INIT_LIST_HEAD(&req->txcmp);
-	INIT_WORK(&req->worker, user_sdma_delayed_completion);
 
-	spin_lock_init(&req->txcmp_lock);
 	memcpy(&req->info, &info, sizeof(info));
 
 	if (req_opcode(info.ctrl) == EXPECTED)
@@ -596,8 +607,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	}
 
 	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
-	/* Calculate the initial TID offset based on the values of
-	   KDETH.OFFSET and KDETH.OM that are passed in. */
+	/*
+	 * Calculate the initial TID offset based on the values of
+	 * KDETH.OFFSET and KDETH.OM that are passed in.
+	 */
 	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
 		(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
 		 KDETH_OM_LARGE : KDETH_OM_SMALL);
@@ -606,8 +619,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 
 	/* Save all the IO vector structures */
 	while (i < req->data_iovs) {
+		INIT_LIST_HEAD(&req->iovs[i].list);
 		memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
-		req->iovs[i].offset = 0;
+		ret = pin_vector_pages(req, &req->iovs[i]);
+		if (ret) {
+			req->status = ret;
+			goto free_req;
+		}
 		req->data_len += req->iovs[i++].iov.iov_len;
 	}
 	SDMA_DBG(req, "total data length %u", req->data_len);
@@ -671,52 +689,59 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 		}
 	}
 
-	set_comp_state(req, QUEUED, 0);
+	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+	atomic_inc(&pq->n_reqs);
 	/* Send the first N packets in the request to buy us some time */
-	sent = user_sdma_send_pkts(req, pcount);
-	if (unlikely(sent < 0)) {
-		if (sent != -EBUSY) {
-			req->status = sent;
-			set_comp_state(req, ERROR, req->status);
-			return sent;
-		} else
-			sent = 0;
+	ret = user_sdma_send_pkts(req, pcount);
+	if (unlikely(ret < 0 && ret != -EBUSY)) {
+		req->status = ret;
+		goto free_req;
 	}
-	atomic_inc(&pq->n_reqs);
-	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
 
-	if (sent < req->info.npkts) {
-		/*
-		 * This is a somewhat blocking send implementation.
-		 * The driver will block the caller until all packets of the
-		 * request have been submitted to the SDMA engine. However, it
-		 * will not wait for send completions.
-		 */
-		while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
-			ret = user_sdma_send_pkts(req, pcount);
-			if (ret < 0) {
-				if (ret != -EBUSY) {
-					req->status = ret;
-					return ret;
-				}
-				wait_event_interruptible_timeout(
-					pq->busy.wait_dma,
-					(pq->state == SDMA_PKT_Q_ACTIVE),
-					msecs_to_jiffies(
-						SDMA_IOWAIT_TIMEOUT));
+	/*
+	 * It is possible that the SDMA engine would have processed all the
+	 * submitted packets by the time we get here. Therefore, only set
+	 * packet queue state to ACTIVE if there are still uncompleted
+	 * requests.
+	 */
+	if (atomic_read(&pq->n_reqs))
+		xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+	/*
+	 * This is a somewhat blocking send implementation.
+	 * The driver will block the caller until all packets of the
+	 * request have been submitted to the SDMA engine. However, it
+	 * will not wait for send completions.
+	 */
+	while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+		ret = user_sdma_send_pkts(req, pcount);
+		if (ret < 0) {
+			if (ret != -EBUSY) {
+				req->status = ret;
+				set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+				if (ACCESS_ONCE(req->seqcomp) ==
+				    req->seqsubmitted - 1)
+					goto free_req;
+				return ret;
 			}
+			wait_event_interruptible_timeout(
+				pq->busy.wait_dma,
+				(pq->state == SDMA_PKT_Q_ACTIVE),
+				msecs_to_jiffies(
+					SDMA_IOWAIT_TIMEOUT));
 		}
-
 	}
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req);
+	user_sdma_free_request(req, true);
+	pq_update(pq);
+	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
 	return ret;
 }
 
 static inline u32 compute_data_length(struct user_sdma_request *req,
-					    struct user_sdma_txreq *tx)
+				      struct user_sdma_txreq *tx)
 {
 	/*
 	 * Determine the proper size of the packet data.
@@ -734,8 +759,10 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
 	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
 		u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
 			PAGE_SIZE;
-		/* Get the data length based on the remaining space in the
-		 * TID pair. */
+		/*
+		 * Get the data length based on the remaining space in the
+		 * TID pair.
+		 */
 		len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
 		/* If we've filled up the TID pair, move to the next one. */
 		if (unlikely(!len) && ++req->tididx < req->n_tids &&
@@ -745,12 +772,15 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
 			req->tidoffset = 0;
 			len = min_t(u32, tidlen, req->info.fragsize);
 		}
-		/* Since the TID pairs map entire pages, make sure that we
+		/*
+		 * Since the TID pairs map entire pages, make sure that we
 		 * are not going to try to send more data that we have
-		 * remaining. */
+		 * remaining.
+		 */
 		len = min(len, req->data_len - req->sent);
-	} else
+	} else {
 		len = min(req->data_len - req->sent, (u32)req->info.fragsize);
+	}
 	SDMA_DBG(req, "Data Length = %u", len);
 	return len;
 }
@@ -813,9 +843,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		tx->flags = 0;
 		tx->req = req;
 		tx->busycount = 0;
-		tx->idx = -1;
 		INIT_LIST_HEAD(&tx->list);
-		memset(tx->iovecs, 0, sizeof(tx->iovecs));
 
 		if (req->seqnum == req->info.npkts - 1)
 			tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
@@ -836,18 +864,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 				WARN_ON(iovec->offset);
 			}
 
-			/*
-			 * This request might include only a header and no user
-			 * data, so pin pages only if there is data and it the
-			 * pages have not been pinned already.
-			 */
-			if (unlikely(!iovec->pages && iovec->iov.iov_len)) {
-				ret = pin_vector_pages(req, iovec);
-				if (ret)
-					goto free_tx;
-			}
-
-			tx->iovecs[++tx->idx].vec = iovec;
 			datalen = compute_data_length(req, tx);
 			if (!datalen) {
 				SDMA_DBG(req,
@@ -926,8 +942,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 			unsigned pageidx, len;
 
 			base = (unsigned long)iovec->iov.iov_base;
-			offset = ((base + iovec->offset + iov_offset) &
-				  ~PAGE_MASK);
+			offset = offset_in_page(base + iovec->offset +
+						iov_offset);
 			pageidx = (((iovec->offset + iov_offset +
 				     base) - (base & PAGE_MASK)) >> PAGE_SHIFT);
 			len = offset + req->info.fragsize > PAGE_SIZE ?
@@ -937,16 +953,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 					      iovec->pages[pageidx],
 					      offset, len);
 			if (ret) {
-				int i;
-
 				SDMA_DBG(req, "SDMA txreq add page failed %d\n",
 					 ret);
-				/* Mark all assigned vectors as complete so they
-				 * are unpinned in the callback. */
-				for (i = tx->idx; i >= 0; i--) {
-					tx->iovecs[i].flags |=
-						TXREQ_FLAGS_IOVEC_LAST_PKT;
-				}
 				goto free_txreq;
 			}
 			iov_offset += len;
@@ -954,19 +962,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 			data_sent += len;
 			if (unlikely(queued < datalen &&
 				     pageidx == iovec->npages &&
-				     req->iov_idx < req->data_iovs - 1 &&
-				     tx->idx < ARRAY_SIZE(tx->iovecs))) {
+				     req->iov_idx < req->data_iovs - 1)) {
 				iovec->offset += iov_offset;
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
 				iovec = &req->iovs[++req->iov_idx];
-				if (!iovec->pages) {
-					ret = pin_vector_pages(req, iovec);
-					if (ret)
-						goto free_txreq;
-				}
 				iov_offset = 0;
-				tx->iovecs[++tx->idx].vec = iovec;
 			}
 		}
 		/*
@@ -977,28 +976,21 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		if (req_opcode(req->info.ctrl) == EXPECTED)
 			req->tidoffset += datalen;
 		req->sent += data_sent;
-		if (req->data_len) {
-			tx->iovecs[tx->idx].vec->offset += iov_offset;
-			/* If we've reached the end of the io vector, mark it
-			 * so the callback can unpin the pages and free it. */
-			if (tx->iovecs[tx->idx].vec->offset ==
-			    tx->iovecs[tx->idx].vec->iov.iov_len)
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
-		}
-
+		if (req->data_len)
+			iovec->offset += iov_offset;
+		list_add_tail(&tx->txreq.list, &req->txps);
 		/*
 		 * It is important to increment this here as it is used to
 		 * generate the BTH.PSN and, therefore, can't be bulk-updated
 		 * outside of the loop.
 		 */
 		tx->seqnum = req->seqnum++;
-		list_add_tail(&tx->txreq.list, &req->txps);
 		npkts++;
 	}
 dosend:
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
-	if (list_empty(&req->txps))
+	if (list_empty(&req->txps)) {
+		req->seqsubmitted = req->seqnum;
 		if (req->seqnum == req->info.npkts) {
 			set_bit(SDMA_REQ_SEND_DONE, &req->flags);
 			/*
@@ -1010,6 +1002,10 @@ dosend:
 			if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
 				sdma_ahg_free(req->sde, req->ahg_idx);
 		}
+	} else if (ret > 0) {
+		req->seqsubmitted += ret;
+		ret = 0;
+	}
 	return ret;
 
 free_txreq:
@@ -1024,7 +1020,7 @@ free_tx:
  */
 static inline int num_user_pages(const struct iovec *iov)
 {
-	const unsigned long addr  = (unsigned long) iov->iov_base;
+	const unsigned long addr  = (unsigned long)iov->iov_base;
 	const unsigned long len   = iov->iov_len;
 	const unsigned long spage = addr & PAGE_MASK;
 	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
@@ -1032,64 +1028,129 @@ static inline int num_user_pages(const struct iovec *iov)
 	return 1 + ((epage - spage) >> PAGE_SHIFT);
 }
 
-static int pin_vector_pages(struct user_sdma_request *req,
-			    struct user_sdma_iovec *iovec) {
-	int pinned, npages;
+/* Caller must hold pq->evict_lock */
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+	u32 cleared = 0;
+	struct sdma_mmu_node *node, *ptr;
 
-	npages = num_user_pages(&iovec->iov);
-	iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
-	if (!iovec->pages) {
-		SDMA_DBG(req, "Failed page array alloc");
-		return -ENOMEM;
+	list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
+		/* Make sure that no one is still using the node. */
+		if (!atomic_read(&node->refcount)) {
+			/*
+			 * Need to use the page count now as the remove callback
+			 * will free the node.
+			 */
+			cleared += node->npages;
+			spin_unlock(&pq->evict_lock);
+			hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+			spin_lock(&pq->evict_lock);
+			if (cleared >= npages)
+				break;
+		}
 	}
+	return cleared;
+}
 
-	/*
-	 * Get a reference to the process's mm so we can use it when
-	 * unpinning the io vectors.
-	 */
-	req->pq->user_mm = get_task_mm(current);
+static int pin_vector_pages(struct user_sdma_request *req,
+			    struct user_sdma_iovec *iovec) {
+	int ret = 0, pinned, npages, cleared;
+	struct page **pages;
+	struct hfi1_user_sdma_pkt_q *pq = req->pq;
+	struct sdma_mmu_node *node = NULL;
+	struct mmu_rb_node *rb_node;
+
+	rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
+				     (unsigned long)iovec->iov.iov_base,
+				     iovec->iov.iov_len);
+	if (rb_node)
+		node = container_of(rb_node, struct sdma_mmu_node, rb);
+
+	if (!node) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return -ENOMEM;
 
-	pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
-					 npages, 0, iovec->pages);
+		node->rb.addr = (unsigned long)iovec->iov.iov_base;
+		node->rb.len = iovec->iov.iov_len;
+		node->pq = pq;
+		atomic_set(&node->refcount, 0);
+		INIT_LIST_HEAD(&node->list);
+	}
 
-	if (pinned < 0)
-		return pinned;
+	npages = num_user_pages(&iovec->iov);
+	if (node->npages < npages) {
+		pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+		if (!pages) {
+			SDMA_DBG(req, "Failed page array alloc");
+			ret = -ENOMEM;
+			goto bail;
+		}
+		memcpy(pages, node->pages, node->npages * sizeof(*pages));
+
+		npages -= node->npages;
+retry:
+		if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+			spin_lock(&pq->evict_lock);
+			cleared = sdma_cache_evict(pq, npages);
+			spin_unlock(&pq->evict_lock);
+			if (cleared >= npages)
+				goto retry;
+		}
+		pinned = hfi1_acquire_user_pages(
+			((unsigned long)iovec->iov.iov_base +
+			 (node->npages * PAGE_SIZE)), npages, 0,
+			pages + node->npages);
+		if (pinned < 0) {
+			kfree(pages);
+			ret = pinned;
+			goto bail;
+		}
+		if (pinned != npages) {
+			unpin_vector_pages(current->mm, pages, pinned);
+			ret = -EFAULT;
+			goto bail;
+		}
+		kfree(node->pages);
+		node->pages = pages;
+		node->npages += pinned;
+		npages = node->npages;
+		spin_lock(&pq->evict_lock);
+		if (!rb_node)
+			list_add(&node->list, &pq->evict);
+		else
+			list_move(&node->list, &pq->evict);
+		pq->n_locked += pinned;
+		spin_unlock(&pq->evict_lock);
+	}
+	iovec->pages = node->pages;
+	iovec->npages = npages;
 
-	iovec->npages = pinned;
-	if (pinned != npages) {
-		SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
-		unpin_vector_pages(req, iovec);
-		return -EFAULT;
+	if (!rb_node) {
+		ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+		if (ret) {
+			spin_lock(&pq->evict_lock);
+			list_del(&node->list);
+			pq->n_locked -= node->npages;
+			spin_unlock(&pq->evict_lock);
+			ret = 0;
+			goto bail;
+		}
+	} else {
+		atomic_inc(&node->refcount);
 	}
 	return 0;
+bail:
+	if (!rb_node)
+		kfree(node);
+	return ret;
 }
 
-static void unpin_vector_pages(struct user_sdma_request *req,
-			       struct user_sdma_iovec *iovec)
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+			       unsigned npages)
 {
-	/*
-	 * Unpinning is done through the workqueue so use the
-	 * process's mm if we have a reference to it.
-	 */
-	if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
-		use_mm(req->pq->user_mm);
-
-	hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
-
-	/*
-	 * Unuse the user's mm (see above) and release the
-	 * reference to it.
-	 */
-	if (req->pq->user_mm) {
-		if (current->flags & PF_KTHREAD)
-			unuse_mm(req->pq->user_mm);
-		mmput(req->pq->user_mm);
-	}
-
-	kfree(iovec->pages);
-	iovec->pages = NULL;
-	iovec->npages = 0;
-	iovec->offset = 0;
+	hfi1_release_user_pages(mm, pages, npages, 0);
+	kfree(pages);
 }
 
 static int check_header_template(struct user_sdma_request *req,
@@ -1212,7 +1273,6 @@ static int set_txreq_header(struct user_sdma_request *req,
 		if (ret)
 			return ret;
 		goto done;
-
 	}
 
 	hdr->bth[2] = cpu_to_be32(
@@ -1222,7 +1282,7 @@ static int set_txreq_header(struct user_sdma_request *req,
 
 	/* Set ACK request on last packet */
 	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
-		hdr->bth[2] |= cpu_to_be32(1UL<<31);
+		hdr->bth[2] |= cpu_to_be32(1UL << 31);
 
 	/* Set the new offset */
 	hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
@@ -1236,8 +1296,10 @@ static int set_txreq_header(struct user_sdma_request *req,
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1318,8 +1380,10 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1343,8 +1407,9 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 								INTR) >> 16);
 			val &= cpu_to_le16(~(1U << 13));
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
-		} else
+		} else {
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+		}
 	}
 
 	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
@@ -1359,113 +1424,62 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
  * tx request have been processed by the DMA engine. Called in
  * interrupt context.
  */
-static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
-			       int drain)
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
 	struct user_sdma_txreq *tx =
 		container_of(txreq, struct user_sdma_txreq, txreq);
 	struct user_sdma_request *req;
-	bool defer;
-	int i;
+	struct hfi1_user_sdma_pkt_q *pq;
+	struct hfi1_user_sdma_comp_q *cq;
+	u16 idx;
 
 	if (!tx->req)
 		return;
 
 	req = tx->req;
-	/*
-	 * If this is the callback for the last packet of the request,
-	 * queue up the request for clean up.
-	 */
-	defer = (tx->seqnum == req->info.npkts - 1);
-
-	/*
-	 * If we have any io vectors associated with this txreq,
-	 * check whether they need to be 'freed'. We can't free them
-	 * here because the unpin function needs to be able to sleep.
-	 */
-	for (i = tx->idx; i >= 0; i--) {
-		if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
-			defer = true;
-			break;
-		}
-	}
+	pq = req->pq;
+	cq = req->cq;
 
-	req->status = status;
 	if (status != SDMA_TXREQ_S_OK) {
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
-		defer = true;
 	}
 
-	/*
-	 * Defer the clean up of the iovectors and the request until later
-	 * so it can be done outside of interrupt context.
-	 */
-	if (defer) {
-		spin_lock(&req->txcmp_lock);
-		list_add_tail(&tx->list, &req->txcmp);
-		spin_unlock(&req->txcmp_lock);
-		schedule_work(&req->worker);
+	req->seqcomp = tx->seqnum;
+	kmem_cache_free(pq->txreq_cache, tx);
+	tx = NULL;
+
+	idx = req->info.comp_idx;
+	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+		if (req->seqcomp == req->info.npkts - 1) {
+			req->status = 0;
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, COMPLETE, 0);
+		}
 	} else {
-		kmem_cache_free(req->pq->txreq_cache, tx);
+		if (status != SDMA_TXREQ_S_OK)
+			req->status = status;
+		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+		    (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+		     test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, ERROR, req->status);
+		}
 	}
 }
 
-static void user_sdma_delayed_completion(struct work_struct *work)
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
 {
-	struct user_sdma_request *req =
-		container_of(work, struct user_sdma_request, worker);
-	struct hfi1_user_sdma_pkt_q *pq = req->pq;
-	struct user_sdma_txreq *tx = NULL;
-	unsigned long flags;
-	u64 seqnum;
-	int i;
-
-	while (1) {
-		spin_lock_irqsave(&req->txcmp_lock, flags);
-		if (!list_empty(&req->txcmp)) {
-			tx = list_first_entry(&req->txcmp,
-					      struct user_sdma_txreq, list);
-			list_del(&tx->list);
-		}
-		spin_unlock_irqrestore(&req->txcmp_lock, flags);
-		if (!tx)
-			break;
-
-		for (i = tx->idx; i >= 0; i--)
-			if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
-				unpin_vector_pages(req, tx->iovecs[i].vec);
-
-		seqnum = tx->seqnum;
-		kmem_cache_free(pq->txreq_cache, tx);
-		tx = NULL;
-
-		if (req->status != SDMA_TXREQ_S_OK) {
-			if (seqnum == ACCESS_ONCE(req->seqnum) &&
-			    test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, ERROR, req->status);
-				user_sdma_free_request(req);
-				break;
-			}
-		} else {
-			if (seqnum == req->info.npkts - 1) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, COMPLETE, 0);
-				user_sdma_free_request(req);
-				break;
-			}
-		}
-	}
-
-	if (!atomic_read(&pq->n_reqs)) {
+	if (atomic_dec_and_test(&pq->n_reqs)) {
 		xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
 		wake_up(&pq->wait);
 	}
 }
 
-static void user_sdma_free_request(struct user_sdma_request *req)
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 {
 	if (!list_empty(&req->txps)) {
 		struct sdma_txreq *t, *p;
@@ -1479,25 +1493,87 @@ static void user_sdma_free_request(struct user_sdma_request *req)
 		}
 	}
 	if (req->data_iovs) {
+		struct sdma_mmu_node *node;
+		struct mmu_rb_node *mnode;
 		int i;
 
-		for (i = 0; i < req->data_iovs; i++)
-			if (req->iovs[i].npages && req->iovs[i].pages)
-				unpin_vector_pages(req, &req->iovs[i]);
+		for (i = 0; i < req->data_iovs; i++) {
+			mnode = hfi1_mmu_rb_search(
+				&req->pq->sdma_rb_root,
+				(unsigned long)req->iovs[i].iov.iov_base,
+				req->iovs[i].iov.iov_len);
+			if (!mnode)
+				continue;
+
+			node = container_of(mnode, struct sdma_mmu_node, rb);
+			if (unpin)
+				hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+						   &node->rb);
+			else
+				atomic_dec(&node->refcount);
+		}
 	}
 	kfree(req->tids);
 	clear_bit(SDMA_REQ_IN_USE, &req->flags);
 }
 
-static inline void set_comp_state(struct user_sdma_request *req,
-					enum hfi1_sdma_comp_state state,
-					int ret)
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+				  struct hfi1_user_sdma_comp_q *cq,
+				  u16 idx, enum hfi1_sdma_comp_state state,
+				  int ret)
 {
-	SDMA_DBG(req, "Setting completion status %u %d", state, ret);
-	req->cq->comps[req->info.comp_idx].status = state;
+	hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+		  pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+	cq->comps[idx].status = state;
 	if (state == ERROR)
-		req->cq->comps[req->info.comp_idx].errcode = -ret;
-	trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
-					req->pq->subctxt, req->info.comp_idx,
-					state, ret);
+		cq->comps[idx].errcode = -ret;
+	trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+					idx, state, ret);
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+			   unsigned long len)
+{
+	return (bool)(node->addr == addr);
+}
+
+static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	atomic_inc(&node->refcount);
+	return 0;
+}
+
+static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
+			   bool notifier)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	spin_lock(&node->pq->evict_lock);
+	list_del(&node->list);
+	node->pq->n_locked -= node->npages;
+	spin_unlock(&node->pq->evict_lock);
+
+	unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+			   node->npages);
+	/*
+	 * If called by the MMU notifier, we have to adjust the pinned
+	 * page count ourselves.
+	 */
+	if (notifier)
+		current->mm->pinned_vm -= node->npages;
+	kfree(node);
+}
+
+static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	if (!atomic_read(&node->refcount))
+		return 1;
+	return 0;
 }
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h
index 0afa28508a8a..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/staging/rdma/hfi1/user_sdma.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -69,7 +66,11 @@ struct hfi1_user_sdma_pkt_q {
 	struct iowait busy;
 	unsigned state;
 	wait_queue_head_t wait;
-	struct mm_struct *user_mm;
+	unsigned long unpinned;
+	struct rb_root sdma_rb_root;
+	u32 n_locked;
+	struct list_head evict;
+	spinlock_t evict_lock; /* protect evict and n_locked */
 };
 
 struct hfi1_user_sdma_comp_q {
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 09b8d412ee90..89f2aad45c1b 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -63,9 +60,9 @@
 #include "device.h"
 #include "trace.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-unsigned int hfi1_lkey_table_size = 16;
+static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
@@ -124,45 +121,181 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF;
 module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
+unsigned short piothreshold = 256;
+module_param(piothreshold, ushort, S_IRUGO);
+MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
+
+#define COPY_CACHELESS 1
+#define COPY_ADAPTIVE  2
+static unsigned int sge_copy_mode;
+module_param(sge_copy_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(sge_copy_mode,
+		 "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
+
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained);
+	int status);
+
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag);
 
 /* Length of buffer to create verbs txreq cache name */
 #define TXREQ_NAME_LEN 24
 
+static uint wss_threshold;
+module_param(wss_threshold, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
+static uint wss_clean_period = 256;
+module_param(wss_clean_period, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
+
+/* memory working set size */
+struct hfi1_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
+};
+
+static struct hfi1_wss wss;
+
+int hfi1_wss_init(void)
+{
+	long llc_size;
+	long llc_bits;
+	long table_size;
+	long table_bits;
+
+	/* check for a valid percent range - default to 80 if none or invalid */
+	if (wss_threshold < 1 || wss_threshold > 100)
+		wss_threshold = 80;
+	/* reject a wildly large period */
+	if (wss_clean_period > 1000000)
+		wss_clean_period = 256;
+	/* reject a zero period */
+	if (wss_clean_period == 0)
+		wss_clean_period = 1;
+
+	/*
+	 * Calculate the table size - the next power of 2 larger than the
+	 * LLC size.  LLC size is in KiB.
+	 */
+	llc_size = wss_llc_size() * 1024;
+	table_size = roundup_pow_of_two(llc_size);
+
+	/* one bit per page in rounded up table */
+	llc_bits = llc_size / PAGE_SIZE;
+	table_bits = table_size / PAGE_SIZE;
+	wss.pages_mask = table_bits - 1;
+	wss.num_entries = table_bits / BITS_PER_LONG;
+
+	wss.threshold = (llc_bits * wss_threshold) / 100;
+	if (wss.threshold == 0)
+		wss.threshold = 1;
+
+	atomic_set(&wss.clean_counter, wss_clean_period);
+
+	wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
+			      GFP_KERNEL);
+	if (!wss.entries) {
+		hfi1_wss_exit();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void hfi1_wss_exit(void)
+{
+	/* coded to handle partially initialized and repeat callers */
+	kfree(wss.entries);
+	wss.entries = NULL;
+}
+
 /*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; hfi1_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
+ * Advance the clean counter.  When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking.  Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information.  Since this is only a heuristic, this is
+ * OK.  Any innaccuracies will clean themselves out as the counter
+ * advances.  That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero.  When zero
+ * is reached an entry is cleaned.
  */
-const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = HFI1_POST_RECV_OK,
-	[IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK |
-	    HFI1_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-	[IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-};
+static void wss_advance_clean_counter(void)
+{
+	int entry;
+	int weight;
+	unsigned long bits;
 
-struct hfi1_ucontext {
-	struct ib_ucontext ibucontext;
-};
+	/* become the cleaner if we decrement the counter to zero */
+	if (atomic_dec_and_test(&wss.clean_counter)) {
+		/*
+		 * Set, not add, the clean period.  This avoids an issue
+		 * where the counter could decrement below the clean period.
+		 * Doing a set can result in lost decrements, slowing the
+		 * clean advance.  Since this a heuristic, this possible
+		 * slowdown is OK.
+		 *
+		 * An alternative is to loop, advancing the counter by a
+		 * clean period until the result is > 0. However, this could
+		 * lead to several threads keeping another in the clean loop.
+		 * This could be mitigated by limiting the number of times
+		 * we stay in the loop.
+		 */
+		atomic_set(&wss.clean_counter, wss_clean_period);
 
-static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
+		/*
+		 * Uniquely grab the entry to clean and move to next.
+		 * The current entry is always the lower bits of
+		 * wss.clean_entry.  The table size, wss.num_entries,
+		 * is always a power-of-2.
+		 */
+		entry = (atomic_inc_return(&wss.clean_entry) - 1)
+			& (wss.num_entries - 1);
+
+		/* clear the entry and count the bits */
+		bits = xchg(&wss.entries[entry], 0);
+		weight = hweight64((u64)bits);
+		/* only adjust the contended total count if needed */
+		if (weight)
+			atomic_sub(weight, &wss.total_count);
+	}
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(void *address)
 {
-	return container_of(ibucontext, struct hfi1_ucontext, ibucontext);
+	u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
+	u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+	u32 nr = page & (BITS_PER_LONG - 1);
+
+	if (!test_and_set_bit(nr, &wss.entries[entry]))
+		atomic_inc(&wss.total_count);
+
+	wss_advance_clean_counter();
 }
 
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp);
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline int wss_exceeds_threshold(void)
+{
+	return atomic_read(&wss.total_count) >= wss.threshold;
+}
 
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
@@ -274,14 +407,47 @@ __be64 ib_hfi1_sys_image_guid;
  * @ss: the SGE state
  * @data: the data to copy
  * @length: the length of the data
+ * @copy_last: do a separate copy of the last 8 bytes
  */
 void hfi1_copy_sge(
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	void *data, u32 length,
-	int release)
+	int release,
+	int copy_last)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
+	int in_last = 0;
+	int i;
+	int cacheless_copy = 0;
+
+	if (sge_copy_mode == COPY_CACHELESS) {
+		cacheless_copy = length >= PAGE_SIZE;
+	} else if (sge_copy_mode == COPY_ADAPTIVE) {
+		if (length >= PAGE_SIZE) {
+			/*
+			 * NOTE: this *assumes*:
+			 * o The first vaddr is the dest.
+			 * o If multiple pages, then vaddr is sequential.
+			 */
+			wss_insert(sge->vaddr);
+			if (length >= (2 * PAGE_SIZE))
+				wss_insert(sge->vaddr + PAGE_SIZE);
+
+			cacheless_copy = wss_exceeds_threshold();
+		} else {
+			wss_advance_clean_counter();
+		}
+	}
+	if (copy_last) {
+		if (length > 8) {
+			length -= 8;
+		} else {
+			copy_last = 0;
+			in_last = 1;
+		}
+	}
 
+again:
 	while (length) {
 		u32 len = sge->length;
 
@@ -290,17 +456,25 @@ void hfi1_copy_sge(
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		memcpy(sge->vaddr, data, len);
+		if (unlikely(in_last)) {
+			/* enforce byte transfer ordering */
+			for (i = 0; i < len; i++)
+				((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+		} else if (cacheless_copy) {
+			cacheless_memcpy(sge->vaddr, data, len);
+		} else {
+			memcpy(sge->vaddr, data, len);
+		}
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -313,6 +487,13 @@ void hfi1_copy_sge(
 		data += len;
 		length -= len;
 	}
+
+	if (copy_last) {
+		copy_last = 0;
+		in_last = 1;
+		length = 8;
+		goto again;
+	}
 }
 
 /**
@@ -320,9 +501,9 @@ void hfi1_copy_sge(
  * @ss: the SGE state
  * @length: the number of bytes to skip
  */
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -337,11 +518,11 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -355,231 +536,6 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
 	}
 }
 
-/**
- * post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
-	struct hfi1_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd;
-	struct hfi1_ibport *ibp;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (unlikely(wr->num_sge > qp->s_max_sge))
-		return -EINVAL;
-
-	ppd = &dd->pport[qp->port_num - 1];
-	ibp = &ppd->ibport_data;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			return -EINVAL;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			return -EINVAL;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			return -EINVAL;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		return -EINVAL;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last)
-		return -ENOMEM;
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					  &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-	} else {
-		struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
-
-		atomic_inc(&ah->refcount);
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	return 0;
-
-bail_inval_free:
-	/* release mr holds */
-	while (j) {
-		struct hfi1_sge *sge = &wqe->sg_list[--j];
-
-		hfi1_put_mr(sge->mr);
-	}
-	return -EINVAL;
-}
-
-/**
- * post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int call_send;
-	unsigned long flags;
-	unsigned nreq = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		return -EINVAL;
-	}
-
-	/* sq empty and not list -> call send */
-	call_send = qp->s_head == qp->s_last && !wr->next;
-
-	for (; wr; wr = wr->next) {
-		err = post_one_send(qp, wr);
-		if (unlikely(err)) {
-			*bad_wr = wr;
-			goto bail;
-		}
-		nreq++;
-	}
-bail:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	if (nreq && !call_send)
-		_hfi1_schedule_send(qp);
-	if (nreq && call_send)
-		hfi1_do_send(&qp->s_iowait.iowork);
-	return err;
-}
-
-/**
- * post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			struct ib_recv_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
 /*
  * Make sure the QP is ready and able to accept the given opcode.
  */
@@ -587,18 +543,17 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet)
 {
 	struct hfi1_ibport *ibp;
 
-	if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
 		goto dropit;
 	if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
 	    (opcode == IB_OPCODE_CNP))
 		return 1;
 dropit:
 	ibp = &packet->rcd->ppd->ibport_data;
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return 0;
 }
 
-
 /**
  * hfi1_ib_rcv - process an incoming packet
  * @packet: data packet information
@@ -614,6 +569,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	u32 tlen = packet->tlen;
 	struct hfi1_pportdata *ppd = rcd->ppd;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
 	unsigned long flags;
 	u32 qp_num;
 	int lnh;
@@ -622,9 +578,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 
 	/* Check for GRH */
 	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-	if (lnh == HFI1_LRH_BTH)
+	if (lnh == HFI1_LRH_BTH) {
 		packet->ohdr = &hdr->u.oth;
-	else if (lnh == HFI1_LRH_GRH) {
+	} else if (lnh == HFI1_LRH_GRH) {
 		u32 vtf;
 
 		packet->ohdr = &hdr->u.l.oth;
@@ -634,8 +590,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 		if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 			goto drop;
 		packet->rcv_flags |= HFI1_HAS_GRH;
-	} else
+	} else {
 		goto drop;
+	}
 
 	trace_input_ibhdr(rcd->dd, hdr);
 
@@ -643,17 +600,17 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	inc_opstats(tlen, &rcd->opstats->stats[opcode]);
 
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK;
+	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) &&
-	    (lid != HFI1_PERMISSIVE_LID))) {
-		struct hfi1_mcast *mcast;
-		struct hfi1_mcast_qp *p;
+	if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+		     (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 		if (lnh != HFI1_LRH_GRH)
 			goto drop;
-		mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid);
-		if (mcast == NULL)
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+		if (!mcast)
 			goto drop;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list) {
 			packet->qp = p->qp;
@@ -663,14 +620,14 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 			spin_unlock_irqrestore(&packet->qp->r_lock, flags);
 		}
 		/*
-		 * Notify hfi1_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 	} else {
 		rcu_read_lock();
-		packet->qp = hfi1_lookup_qpn(ibp, qp_num);
+		packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 		if (!packet->qp) {
 			rcu_read_unlock();
 			goto drop;
@@ -684,7 +641,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 
 /*
@@ -695,15 +652,17 @@ static void mem_timer(unsigned long data)
 {
 	struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
 	struct list_head *list = &dev->memwait;
-	struct hfi1_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
 	struct iowait *wait;
 	unsigned long flags;
+	struct hfi1_qp_priv *priv;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
 	if (!list_empty(list)) {
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
@@ -711,12 +670,12 @@ static void mem_timer(unsigned long data)
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	if (qp)
-		hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM);
+		hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
 }
 
-void update_sge(struct hfi1_sge_state *ss, u32 length)
+void update_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	sge->vaddr += length;
 	sge->length -= length;
@@ -725,7 +684,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= HFI1_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 			sge->n = 0;
@@ -735,143 +694,55 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
 	}
 }
 
-static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
-						struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-	unsigned long flags;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		write_seqlock(&dev->iowait_lock);
-		if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK &&
-		    list_empty(&qp->s_iowait.list)) {
-			dev->n_txwait++;
-			qp->s_flags |= HFI1_S_WAIT_TX;
-			list_add_tail(&qp->s_iowait.list, &dev->txwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX);
-			atomic_inc(&qp->refcount);
-		}
-		qp->s_flags &= ~HFI1_S_BUSY;
-		write_sequnlock(&dev->iowait_lock);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		tx = ERR_PTR(-EBUSY);
-	}
-	return tx;
-}
-
-static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
-					    struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		/* call slow path to get the lock */
-		tx =  __get_txreq(dev, qp);
-		if (IS_ERR(tx))
-			return tx;
-	}
-	tx->qp = qp;
-	return tx;
-}
-
-void hfi1_put_txreq(struct verbs_txreq *tx)
-{
-	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
-	unsigned long flags;
-	unsigned int seq;
-
-	qp = tx->qp;
-	dev = to_idev(qp->ibqp.device);
-
-	if (tx->mr) {
-		hfi1_put_mr(tx->mr);
-		tx->mr = NULL;
-	}
-	sdma_txclean(dd_from_dev(dev), &tx->txreq);
-
-	/* Free verbs_txreq and return to slab cache */
-	kmem_cache_free(dev->verbs_txreq_cache, tx);
-
-	do {
-		seq = read_seqbegin(&dev->iowait_lock);
-		if (!list_empty(&dev->txwait)) {
-			struct iowait *wait;
-
-			write_seqlock_irqsave(&dev->iowait_lock, flags);
-			/* Wake up first QP wanting a free struct */
-			wait = list_first_entry(&dev->txwait, struct iowait,
-						list);
-			qp = container_of(wait, struct hfi1_qp, s_iowait);
-			list_del_init(&qp->s_iowait.list);
-			/* refcount held until actual wake up */
-			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
-			hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX);
-			break;
-		}
-	} while (read_seqretry(&dev->iowait_lock, seq));
-}
-
 /*
  * This is called with progress side lock held.
  */
 /* New API */
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained)
+	int status)
 {
 	struct verbs_txreq *tx =
 		container_of(cookie, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
 
 	spin_lock(&qp->s_lock);
-	if (tx->wqe)
+	if (tx->wqe) {
 		hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
-	else if (qp->ibqp.qp_type == IB_QPT_RC) {
+	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		struct hfi1_ib_header *hdr;
 
 		hdr = &tx->phdr.hdr;
 		hfi1_rc_send_complete(qp, hdr);
 	}
-	if (drained) {
-		/*
-		 * This happens when the send engine notes
-		 * a QP in the error state and cannot
-		 * do the flush work until that QP's
-		 * sdma work has finished.
-		 */
-		if (qp->s_flags & HFI1_S_WAIT_DMA) {
-			qp->s_flags &= ~HFI1_S_WAIT_DMA;
-			hfi1_schedule_send(qp);
-		}
-	}
 	spin_unlock(&qp->s_lock);
 
 	hfi1_put_txreq(tx);
 }
 
-static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
+static int wait_kmem(struct hfi1_ibdev *dev,
+		     struct rvt_qp *qp,
+		     struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= HFI1_S_WAIT_KMEM;
-			list_add_tail(&qp->s_iowait.list, &dev->memwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->s_iowait.list, &dev->memwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -884,14 +755,14 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
  *
  * Add failures will revert the sge cursor
  */
-static int build_verbs_ulp_payload(
+static noinline int build_verbs_ulp_payload(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx)
 {
-	struct hfi1_sge *sg_list = ss->sg_list;
-	struct hfi1_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u32 len;
 	int ret = 0;
@@ -928,23 +799,21 @@ bail_txadd:
  * NOTE: DMA mapping is held in the tx until completed in the ring or
  *       the tx desc is freed without having been submitted to the ring
  *
- * This routine insures the following all the helper routine
- * calls succeed.
+ * This routine ensures all the helper routine calls succeed.
  */
 /* New API */
 static int build_verbs_tx_desc(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx,
 	struct ahg_ib_header *ahdr,
 	u64 pbc)
 {
 	int ret = 0;
-	struct hfi1_pio_header *phdr;
+	struct hfi1_pio_header *phdr = &tx->phdr;
 	u16 hdrbytes = tx->hdr_dwords << 2;
 
-	phdr = &tx->phdr;
 	if (!ahdr->ahgcount) {
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
@@ -958,29 +827,14 @@ static int build_verbs_tx_desc(
 		if (ret)
 			goto bail_txadd;
 		phdr->pbc = cpu_to_le64(pbc);
-		memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc));
-		/* add the header */
 		ret = sdma_txadd_kvaddr(
 			sde->dd,
 			&tx->txreq,
-			&tx->phdr,
-			tx->hdr_dwords << 2);
+			phdr,
+			hdrbytes);
 		if (ret)
 			goto bail_txadd;
 	} else {
-		struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth;
-		struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth;
-
-		/* needed in rc_send_complete() */
-		phdr->hdr.lrh[0] = ahdr->ibh.lrh[0];
-		if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) {
-			sohdr = &ahdr->ibh.u.l.oth;
-			dohdr = &phdr->hdr.u.l.oth;
-		}
-		/* opcode */
-		dohdr->bth[0] = sohdr->bth[0];
-		/* PSN/ACK  */
-		dohdr->bth[2] = sohdr->bth[2];
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
 			ahdr->tx_flags,
@@ -1001,80 +855,75 @@ bail_txadd:
 	return ret;
 }
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct ahg_ib_header *ahdr = priv->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
 	struct hfi1_ibdev *dev = ps->dev;
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct verbs_txreq *tx;
-	struct sdma_txreq *stx;
 	u64 pbc_flags = 0;
-	u8 sc5 = qp->s_sc;
+	u8 sc5 = priv->s_sc;
+
 	int ret;
 
-	if (!list_empty(&qp->s_iowait.tx_head)) {
-		stx = list_first_entry(
-			&qp->s_iowait.tx_head,
-			struct sdma_txreq,
-			list);
-		list_del_init(&stx->list);
-		tx = container_of(stx, struct verbs_txreq, txreq);
-		ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx);
-		if (unlikely(ret == -ECOMM))
+	tx = ps->s_txreq;
+	if (!sdma_txreq_built(&tx->txreq)) {
+		if (likely(pbc == 0)) {
+			u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+			/* No vl15 here */
+			/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
+			pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+
+			pbc = create_pbc(ppd,
+					 pbc_flags,
+					 qp->srate_mbps,
+					 vl,
+					 plen);
+		}
+		tx->wqe = qp->s_wqe;
+		ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+		if (unlikely(ret))
+			goto bail_build;
+	}
+	ret =  sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+	if (unlikely(ret < 0)) {
+		if (ret == -ECOMM)
 			goto bail_ecomm;
 		return ret;
 	}
-
-	tx = get_txreq(dev, qp);
-	if (IS_ERR(tx))
-		goto bail_tx;
-
-	tx->sde = qp->s_sde;
-
-	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
-		/* No vl15 here */
-		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-
-		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
-	}
-	tx->wqe = qp->s_wqe;
-	tx->mr = qp->s_rdma_mr;
-	if (qp->s_rdma_mr)
-		qp->s_rdma_mr = NULL;
-	tx->hdr_dwords = hdrwords + 2;
-	ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
-	if (unlikely(ret))
-		goto bail_build;
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-	ret =  sdma_send_txreq(tx->sde, &qp->s_iowait, &tx->txreq);
-	if (unlikely(ret == -ECOMM))
-		goto bail_ecomm;
+	trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+				&ps->s_txreq->phdr.hdr);
 	return ret;
 
 bail_ecomm:
 	/* The current one got "sent" */
 	return 0;
 bail_build:
-	/* kmalloc or mapping fail */
-	hfi1_put_txreq(tx);
-	return wait_kmem(dev, qp);
-bail_tx:
-	return PTR_ERR(tx);
+	ret = wait_kmem(dev, qp, ps);
+	if (!ret) {
+		/* free txreq - bad state */
+		hfi1_put_txreq(ps->s_txreq);
+		ps->s_txreq = NULL;
+	}
+	return ret;
 }
 
 /*
  * If we are now in the error state, return zero to flush the
  * send work request.
  */
-static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	unsigned long flags;
@@ -1087,74 +936,89 @@ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
 	 * enabling the PIO avail interrupt.
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibdev *dev = &dd->verbs_dev;
 			int was_empty;
 
+			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
+			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
 			dev->n_piowait++;
-			qp->s_flags |= HFI1_S_WAIT_PIO;
+			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
-			list_add_tail(&qp->s_iowait.list, &sc->piowait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO);
+			list_add_tail(&priv->s_iowait.list, &sc->piowait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
 			atomic_inc(&qp->refcount);
 			/* counting: only call wantpiobuf_intr if first user */
 			if (was_empty)
 				hfi1_sc_wantpiobuf_intr(sc, 1);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5)
+static void verbs_pio_complete(void *arg, int code)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1);
-	u8 vl;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	vl = sc_to_vlt(dd, sc5);
-	if (vl >= ppd->vls_supported && vl != 15)
-		return NULL;
-	return dd->vld[vl].sc;
+	if (iowait_pio_dec(&priv->s_iowait))
+		iowait_drain_wakeup(&priv->s_iowait);
 }
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u64 pbc_flags = 0;
-	u32 sc5;
+	u8 sc5;
 	unsigned long flags = 0;
 	struct send_context *sc;
 	struct pio_buf *pbuf;
 	int wc_status = IB_WC_SUCCESS;
+	int ret = 0;
+	pio_release_cb cb = NULL;
+
+	/* only RC/UC use complete */
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		cb = verbs_pio_complete;
+		break;
+	default:
+		break;
+	}
 
 	/* vl15 special case taken care of in ud.c */
-	sc5 = qp->s_sc;
-	sc = qp_to_send_context(qp, sc5);
+	sc5 = priv->s_sc;
+	sc = ps->s_txreq->psc;
 
-	if (!sc)
-		return -EINVAL;
 	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+		u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
 		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
 		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
 		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
 	}
-	pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
-	if (unlikely(pbuf == NULL)) {
+	if (cb)
+		iowait_pio_inc(&priv->s_iowait);
+	pbuf = sc_buffer_alloc(sc, plen, cb, qp);
+	if (unlikely(!pbuf)) {
+		if (cb)
+			verbs_pio_complete(qp, 0);
 		if (ppd->host_link_state != HLS_UP_ACTIVE) {
 			/*
 			 * If we have filled the PIO buffers to capacity and are
@@ -1174,7 +1038,12 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 			 * so lets continue to queue the request.
 			 */
 			hfi1_cdbg(PIO, "alloc failed. state active, queuing");
-			return no_bufs_available(qp, sc);
+			ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
+			if (!ret)
+				/* txreq not queued - free */
+				goto bail;
+			/* tx consumed in wait */
+			return ret;
 		}
 	}
 
@@ -1182,7 +1051,7 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
 	} else {
 		if (ss) {
-			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4);
+			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
 			while (len) {
 				void *addr = ss->sge.vaddr;
 				u32 slen = ss->sge.length;
@@ -1197,12 +1066,8 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		}
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-
-	if (qp->s_rdma_mr) {
-		hfi1_put_mr(qp->s_rdma_mr);
-		qp->s_rdma_mr = NULL;
-	}
+	trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+			       &ps->s_txreq->phdr.hdr);
 
 pio_bail:
 	if (qp->s_wqe) {
@@ -1211,10 +1076,15 @@ pio_bail:
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		hfi1_rc_send_complete(qp, &ahdr->ibh);
+		hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	}
-	return 0;
+
+	ret = 0;
+
+bail:
+	hfi1_put_txreq(ps->s_txreq);
+	return ret;
 }
 
 /*
@@ -1247,13 +1117,14 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
  */
 static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
 				    struct hfi1_ib_header *hdr,
-				    struct hfi1_qp *qp)
+				    struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct hfi1_devdata *dd;
 	int i = 0;
 	u16 pkey;
-	u8 lnh, sc5 = qp->s_sc;
+	u8 lnh, sc5 = priv->s_sc;
 
 	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
 		return 0;
@@ -1271,14 +1142,14 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
 	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
 		goto bad;
 
-
 	/* Is the pkey = 0x0, or 0x8000? */
 	if ((pkey & PKEY_LOW_15_MASK) == 0)
 		goto bad;
 
 	/* The most likely matching pkey has index qp->s_pkey_index */
 	if (unlikely(!egress_pkey_matches_entry(pkey,
-					ppd->pkeys[qp->s_pkey_index]))) {
+						ppd->pkeys
+						[qp->s_pkey_index]))) {
 		/* no match - try the entire table */
 		for (; i < MAX_PKEY_VALUES; i++) {
 			if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
@@ -1302,31 +1173,65 @@ bad:
 }
 
 /**
+ * get_send_routine - choose an egress routine
+ *
+ * Choose an egress routine based on QP type
+ * and size
+ */
+static inline send_routine get_send_routine(struct rvt_qp *qp,
+					    struct verbs_txreq *tx)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ib_header *h = &tx->phdr.hdr;
+
+	if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
+		return dd->process_pio_send;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		return dd->process_pio_send;
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		break;
+	case IB_QPT_RC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	case IB_QPT_UC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	default:
+		break;
+	}
+	return dd->process_dma_send;
+}
+
+/**
  * hfi1_verbs_send - send a packet
  * @qp: the QP to send on
  * @ps: the state of the packet to send
  *
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	send_routine sr;
 	int ret;
-	int pio = 0;
-	unsigned long flags = 0;
-
-	/*
-	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-	 * can defer SDMA restart until link goes ACTIVE without
-	 * worrying about just how we got there.
-	 */
-	if ((qp->ibqp.qp_type == IB_QPT_SMI) ||
-	    !(dd->flags & HFI1_HAS_SEND_DMA))
-		pio = 1;
 
-	ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp);
+	sr = get_send_routine(qp, ps->s_txreq);
+	ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
 	if (unlikely(ret)) {
 		/*
 		 * The value we are returning here does not get propagated to
@@ -1336,7 +1241,9 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
 		 * mechanism for handling the errors. So for SDMA we can just
 		 * return.
 		 */
-		if (pio) {
+		if (sr == dd->process_pio_send) {
+			unsigned long flags;
+
 			hfi1_cdbg(PIO, "%s() Failed. Completing with err",
 				  __func__);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -1345,71 +1252,57 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
 		}
 		return -EINVAL;
 	}
-
-	if (pio) {
-		ret = dd->process_pio_send(qp, ps, 0);
-	} else {
-#ifdef CONFIG_SDMA_VERBOSITY
-		dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n",
-			   slashstrip(__FILE__), __LINE__, __func__);
-		dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords,
-			   qp->s_cur_size);
-#endif
-		ret = dd->process_dma_send(qp, ps, 0);
-	}
-
-	return ret;
+	if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
+		return pio_wait(qp,
+				ps->s_txreq->psc,
+				ps,
+				RVT_S_WAIT_PIO_DRAIN);
+	return sr(qp, ps, 0);
 }
 
-static int query_device(struct ib_device *ibdev,
-			struct ib_device_attr *props,
-			struct ib_udata *uhw)
+/**
+ * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
-	props->vendor_part_id = dd->pcidev->device;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_hfi1_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = hfi1_max_qps;
-	props->max_qp_wr = hfi1_max_qp_wrs;
-	props->max_sge = hfi1_max_sges;
-	props->max_sge_rd = hfi1_max_sges;
-	props->max_cq = hfi1_max_cqs;
-	props->max_ah = hfi1_max_ahs;
-	props->max_cqe = hfi1_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = hfi1_max_pds;
-	props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = hfi1_max_srqs;
-	props->max_srq_wr = hfi1_max_srq_wrs;
-	props->max_srq_sge = hfi1_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = hfi1_get_npkeys(dd);
-	props->max_mcast_grp = hfi1_max_mcast_grps;
-	props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
+
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+			IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
+	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = hfi1_max_qps;
+	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+	rdi->dparms.props.max_sge = hfi1_max_sges;
+	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
+	rdi->dparms.props.max_cq = hfi1_max_cqs;
+	rdi->dparms.props.max_ah = hfi1_max_ahs;
+	rdi->dparms.props.max_cqe = hfi1_max_cqes;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_pd = hfi1_max_pds;
+	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = hfi1_max_srqs;
+	rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 
 static inline u16 opa_speed_to_ib(u16 in)
@@ -1443,33 +1336,24 @@ static inline u16 opa_width_to_ib(u16 in)
 	}
 }
 
-static int query_port(struct ib_device *ibdev, u8 port,
+static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 		      struct ib_port_attr *props)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
 	u16 lid = ppd->lid;
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : 0;
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	/* OPA logical states match IB logical states */
 	props->state = driver_lstate(ppd);
 	props->phys_state = hfi1_ibphys_portstate(ppd);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = hfi1_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
 	/* see rate_show() in ib core/sysfs.c */
 	props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
 	props->max_vl_num = ppd->vls_supported;
-	props->init_type_reply = 0;
 
 	/* Once we are a "first class" citizen and have added the OPA MTUs to
 	 * the core we can advertise the larger MTU enum to the ULPs, for now
@@ -1483,27 +1367,6 @@ static int query_port(struct ib_device *ibdev, u8 port,
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
-	props->subnet_timeout = ibp->subnet_timeout;
-
-	return 0;
-}
-
-static int port_immutable(struct ib_device *ibdev, u8 port_num,
-			  struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	memset(immutable, 0, sizeof(*immutable));
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
-	immutable->max_mad_size = OPA_MGMT_MAD_SIZE;
 
 	return 0;
 }
@@ -1547,102 +1410,31 @@ bail:
 	return ret;
 }
 
-static int modify_port(struct ib_device *ibdev, u8 port,
-		       int port_modify_mask, struct ib_port_modify *props)
-{
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	int ret = 0;
-
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		hfi1_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN) {
-		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
-		  OPA_LINKDOWN_REASON_UNKNOWN);
-		ret = set_link_state(ppd, HLS_DN_DOWNDEF);
-	}
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
-	return ret;
-}
-
-static int query_gid(struct ib_device *ibdev, u8 port,
-		     int index, union ib_gid *gid)
+static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
-
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct hfi1_ibport *ibp = to_iport(ibdev, port);
-		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = cpu_to_be64(ppd->guid);
-		else if (index < HFI1_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct ib_pd *alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
-{
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == hfi1_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+	int ret;
 
-bail:
+	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
+			     OPA_LINKDOWN_REASON_UNKNOWN);
+	ret = set_link_state(ppd, HLS_DN_DOWNDEF);
 	return ret;
 }
 
-static int dealloc_pd(struct ib_pd *ibpd)
+static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			    int guid_index, __be64 *guid)
 {
-	struct hfi1_pd *pd = to_ipd(ibpd);
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
+	struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = cpu_to_be64(ppd->guid);
+	else if (guid_index < HFI1_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -1657,101 +1449,57 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
 	return ibp->sl_to_sc[ah->sl];
 }
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_devdata *dd;
 	u8 sc5;
 
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != HFI1_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
-		goto bail;
-	if (ah_attr->sl >= OPA_MAX_SLS)
-		goto bail;
 	/* test the mapping for validity */
 	ibp = to_iport(ibdev, ah_attr->port_num);
 	ppd = ppd_from_ibp(ibp);
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	dd = dd_from_ppd(ppd);
 	if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
-		goto bail;
+		return -EINVAL;
 	return 0;
-bail:
-	return -EINVAL;
 }
 
-/**
- * create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *create_ah(struct ib_pd *pd,
-			       struct ib_ah_attr *ah_attr)
+static void hfi1_notify_new_ah(struct ib_device *ibdev,
+			       struct ib_ah_attr *ah_attr,
+			       struct rvt_ah *ah)
 {
-	struct hfi1_ah *ah;
-	struct ib_ah *ret;
-	struct hfi1_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
-
-	if (hfi1_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == hfi1_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
+	struct hfi1_ibport *ibp;
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u8 sc5;
 
-	ret = &ah->ibah;
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	sc5 = ibp->sl_to_sc[ah->attr.sl];
+	dd = dd_from_ppd(ppd);
+	ah->vl = sc_to_vlt(dd, sc5);
+	if (ah->vl < num_vls || ah->vl == 15)
+		ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
 }
 
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
 {
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct hfi1_qp *qp0;
+	struct rvt_qp *qp0;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp[0]);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
@@ -1759,51 +1507,6 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
 }
 
 /**
- * destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int destroy_ah(struct ib_ah *ibah)
-{
-	struct hfi1_ibdev *dev = to_idev(ibah->device);
-	struct hfi1_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	if (hfi1_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
-/**
  * hfi1_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the hfi1_ib device
  */
@@ -1812,54 +1515,6 @@ unsigned hfi1_get_npkeys(struct hfi1_devdata *dd)
 	return ARRAY_SIZE(dd->pport[0].pkeys);
 }
 
-static int query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-		      u16 *pkey)
-{
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= hfi1_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = hfi1_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the driver
- */
-
-static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata)
-{
-	struct hfi1_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -1871,28 +1526,21 @@ static void init_ibport(struct hfi1_pportdata *ppd)
 		ibp->sc_to_sl[i] = i;
 	}
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = 0;
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = 0;
 	/* Below should only set bits defined in OPA PortInfo.CapabilityMask */
-	ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
+	ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_CAP_MASK_NOTICE_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-	RCU_INIT_POINTER(ibp->qp[0], NULL);
-	RCU_INIT_POINTER(ibp->qp[1], NULL);
-}
-
-static void verbs_txreq_kmem_cache_ctor(void *obj)
-{
-	struct verbs_txreq *tx = obj;
-
-	memset(tx, 0, sizeof(*tx));
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 
 /**
@@ -1903,76 +1551,26 @@ static void verbs_txreq_kmem_cache_ctor(void *obj)
 int hfi1_register_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct hfi1_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i;
 	int ret;
 	size_t lcpysz = IB_DEVICE_NAME_MAX;
-	u16 descq_cnt;
-	char buf[TXREQ_NAME_LEN];
-
-	ret = hfi1_qp_init(dev);
-	if (ret)
-		goto err_qp_init;
-
 
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
-	init_timer(&dev->mem_timer);
-	dev->mem_timer.function = mem_timer;
-	dev->mem_timer.data = (unsigned long) dev;
 
-	/*
-	 * The top hfi1_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	dev->lk_table.max = 1 << hfi1_lkey_table_size;
-	/* ensure generation is at least 4 bits (keys.c) */
-	if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			      hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct hfi1_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
+	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
+
 	seqlock_init(&dev->iowait_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->txwait);
 	INIT_LIST_HEAD(&dev->memwait);
 
-	descq_cnt = sdma_get_descq_cnt();
-
-	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
-	/* SLAB_HWCACHE_ALIGN for AHG */
-	dev->verbs_txreq_cache = kmem_cache_create(buf,
-						   sizeof(struct verbs_txreq),
-						   0, SLAB_HWCACHE_ALIGN,
-						   verbs_txreq_kmem_cache_ctor);
-	if (!dev->verbs_txreq_cache) {
-		ret = -ENOMEM;
+	ret = verbs_txreq_init(dev);
+	if (ret)
 		goto err_verbs_txreq;
-	}
 
 	/*
 	 * The system image GUID is supposed to be the same for all
@@ -1985,142 +1583,119 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = cpu_to_be64(ppd->guid);
-	ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = query_device;
 	ibdev->modify_device = modify_device;
-	ibdev->query_port = query_port;
-	ibdev->modify_port = modify_port;
-	ibdev->query_pkey = query_pkey;
-	ibdev->query_gid = query_gid;
-	ibdev->alloc_ucontext = alloc_ucontext;
-	ibdev->dealloc_ucontext = dealloc_ucontext;
-	ibdev->alloc_pd = alloc_pd;
-	ibdev->dealloc_pd = dealloc_pd;
-	ibdev->create_ah = create_ah;
-	ibdev->destroy_ah = destroy_ah;
-	ibdev->modify_ah = modify_ah;
-	ibdev->query_ah = query_ah;
-	ibdev->create_srq = hfi1_create_srq;
-	ibdev->modify_srq = hfi1_modify_srq;
-	ibdev->query_srq = hfi1_query_srq;
-	ibdev->destroy_srq = hfi1_destroy_srq;
-	ibdev->create_qp = hfi1_create_qp;
-	ibdev->modify_qp = hfi1_modify_qp;
-	ibdev->query_qp = hfi1_query_qp;
-	ibdev->destroy_qp = hfi1_destroy_qp;
-	ibdev->post_send = post_send;
-	ibdev->post_recv = post_receive;
-	ibdev->post_srq_recv = hfi1_post_srq_receive;
-	ibdev->create_cq = hfi1_create_cq;
-	ibdev->destroy_cq = hfi1_destroy_cq;
-	ibdev->resize_cq = hfi1_resize_cq;
-	ibdev->poll_cq = hfi1_poll_cq;
-	ibdev->req_notify_cq = hfi1_req_notify_cq;
-	ibdev->get_dma_mr = hfi1_get_dma_mr;
-	ibdev->reg_user_mr = hfi1_reg_user_mr;
-	ibdev->dereg_mr = hfi1_dereg_mr;
-	ibdev->alloc_mr = hfi1_alloc_mr;
-	ibdev->alloc_fmr = hfi1_alloc_fmr;
-	ibdev->map_phys_fmr = hfi1_map_phys_fmr;
-	ibdev->unmap_fmr = hfi1_unmap_fmr;
-	ibdev->dealloc_fmr = hfi1_dealloc_fmr;
-	ibdev->attach_mcast = hfi1_multicast_attach;
-	ibdev->detach_mcast = hfi1_multicast_detach;
+
+	/* keep process mad in the driver */
 	ibdev->process_mad = hfi1_process_mad;
-	ibdev->mmap = hfi1_mmap;
-	ibdev->dma_ops = &hfi1_dma_mapping_ops;
-	ibdev->get_port_immutable = port_immutable;
 
 	strncpy(ibdev->node_desc, init_utsname()->nodename,
 		sizeof(ibdev->node_desc));
 
-	ret = ib_register_device(ibdev, hfi1_create_port_files);
-	if (ret)
-		goto err_reg;
-
-	ret = hfi1_create_agents(dev);
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
+	dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
+	/*
+	 * Fill in rvt info device attributes.
+	 */
+	hfi1_fill_device_attr(dd);
+
+	/* queue pair */
+	dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 0;
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
+	dd->verbs_dev.rdi.dparms.qpn_res_end =
+	dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
+	dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
+
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
+	dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+
+	/* completeion queue */
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "hfi1_cq%d", dd->unit);
+	dd->verbs_dev.rdi.dparms.node = dd->node;
+
+	/* misc settings */
+	dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */
+	dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++)
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      ppd->pkeys);
+
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
-		goto err_agents;
+		goto err_verbs_txreq;
 
 	ret = hfi1_verbs_register_sysfs(dd);
 	if (ret)
 		goto err_class;
 
-	goto bail;
+	return ret;
 
 err_class:
-	hfi1_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_verbs_txreq:
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
-err_lk:
-	hfi1_qp_exit(dev);
-err_qp_init:
+	verbs_txreq_exit(dev);
 	dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 }
 
 void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
 
 	hfi1_verbs_unregister_sysfs(dd);
 
-	hfi1_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 	if (!list_empty(&dev->txwait))
 		dd_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 		dd_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		dd_dev_err(dd, "DMA MR not NULL!\n");
 
-	hfi1_qp_exit(dev);
 	del_timer_sync(&dev->mem_timer);
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
+	verbs_txreq_exit(dev);
 }
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -2128,7 +1703,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_ib_header *hdr = packet->hdr;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	u32 lqpn, rqpn = 0;
 	u16 rlid = 0;
 	u8 sl, sc5, sc4_bit, svc_type;
@@ -2151,7 +1726,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	default:
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		return;
 	}
 
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 286e468b0479..6c4670fffdbb 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,9 +56,13 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
+#include <linux/slab.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_mad.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
+#include <rdma/rdmavt_cq.h>
 
 struct hfi1_ctxtdata;
 struct hfi1_pportdata;
@@ -79,12 +80,6 @@ struct hfi1_packet;
  */
 #define HFI1_UVERBS_ABI_VERSION       2
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 
 /* AETH NAK opcode values */
@@ -95,17 +90,6 @@ struct hfi1_packet;
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
-/* Flags for checking QP state (see ib_hfi1_state_ops[]) */
-#define HFI1_POST_SEND_OK                0x01
-#define HFI1_POST_RECV_OK                0x02
-#define HFI1_PROCESS_RECV_OK             0x04
-#define HFI1_PROCESS_SEND_OK             0x08
-#define HFI1_PROCESS_NEXT_SEND_OK        0x10
-#define HFI1_FLUSH_SEND			0x20
-#define HFI1_FLUSH_RECV			0x40
-#define HFI1_PROCESS_OR_FLUSH_SEND \
-	(HFI1_PROCESS_SEND_OK | HFI1_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -208,341 +192,18 @@ struct hfi1_pio_header {
 } __packed;
 
 /*
- * used for force cacheline alignment for AHG
- */
-struct tx_pio_header {
-	struct hfi1_pio_header phdr;
-} ____cacheline_aligned;
-
-/*
- * There is one struct hfi1_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct hfi1_mcast_qp.
- */
-struct hfi1_mcast_qp {
-	struct list_head list;
-	struct hfi1_qp *qp;
-};
-
-struct hfi1_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct hfi1_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct hfi1_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by hfi1_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct hfi1_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct hfi1_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct hfi1_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct hfi1_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct hfi1_cq_wc *queue;
-	struct hfi1_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * Used by the verbs layer.
- */
-struct hfi1_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of hfi1_segs that fit in a page. */
-#define HFI1_SEGSZ     (PAGE_SIZE / sizeof(struct hfi1_seg))
-
-struct hfi1_segarray {
-	struct hfi1_seg segs[HFI1_SEGSZ];
-};
-
-struct hfi1_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of hfi1_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	atomic_t refcount;
-	struct hfi1_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct hfi1_sge {
-	struct hfi1_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct hfi1_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct hfi1_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct hfi1_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-		struct ib_ud_wr ud_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct hfi1_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct hfi1_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
+ * hfi1 specific data structures that will be hidden from rvt after the queue
+ * pair is made common
  */
-struct hfi1_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct hfi1_rwqe wq[0];
-};
-
-struct hfi1_rq {
-	struct hfi1_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	/* protect changes in this struct */
-	spinlock_t lock ____cacheline_aligned_in_smp;
-};
-
-struct hfi1_srq {
-	struct ib_srq ibsrq;
-	struct hfi1_rq rq;
-	struct hfi1_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct hfi1_sge_state {
-	struct hfi1_sge *sg_list;      /* next SGE to be used if any */
-	struct hfi1_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct hfi1_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct hfi1_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct hfi1_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct hfi1_qp __rcu *next;           /* link list for QPN hash table */
-	struct hfi1_swqe *s_wq;  /* send work queue */
-	struct hfi1_mmap_info *ip;
-	struct ahg_ib_header *s_hdr;     /* next packet header to send */
-	/* sc for UC/RC QPs - based on ah for UD */
-	u8 s_sc;
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	int srate_mbps;		/* s_srate (below) converted to Mbit/s */
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-	u32 s_ahgpsn;           /* set to the psn in the copy of the header */
-
-	u8 state;               /* QP state */
-	u8 allowed_ops;		/* high order bits of allowed opcodes */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct hfi1_ack_entry s_ack_queue[HFI1_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct hfi1_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_adefered;         /* number of acks defered */
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waiting to respond */
-
-	struct hfi1_sge_state r_sge;     /* current receive data */
-	struct hfi1_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct hfi1_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct hfi1_swqe *s_wqe;
-	struct hfi1_sge_state s_sge;     /* current send request data */
-	struct hfi1_mregion *s_rdma_mr;
-	struct sdma_engine *s_sde; /* current sde */
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	s8 s_ahgidx;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct hfi1_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
-
+struct hfi1_qp_priv {
+	struct ahg_ib_header *s_hdr;              /* next header to send */
+	struct sdma_engine *s_sde;                /* current sde */
+	struct send_context *s_sendcontext;       /* current sendcontext */
+	u8 s_sc;		                  /* SC[0..4] for next packet */
+	u8 r_adefered;                            /* number of acks defered */
 	struct iowait s_iowait;
-
-	struct hfi1_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct timer_list s_rnr_timer;
+	struct rvt_qp *owner;
 };
 
 /*
@@ -553,123 +214,11 @@ struct hfi1_pkt_state {
 	struct hfi1_ibdev *dev;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
+	struct verbs_txreq *s_txreq;
 };
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define HFI1_R_WRID_VALID        0
-#define HFI1_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define HFI1_R_REUSE_SGE       0x01
-#define HFI1_R_RDMAR_SEQ       0x02
-/* defer ack until end of interrupt session */
-#define HFI1_R_RSP_DEFERED_ACK 0x04
-/* relay ack to send engine */
-#define HFI1_R_RSP_SEND        0x08
-#define HFI1_R_COMM_EST        0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * HFI1_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * HFI1_S_BUSY - send tasklet is processing the QP
- * HFI1_S_TIMER - the RC retry timer is active
- * HFI1_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * HFI1_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RNR - waiting for RNR timeout
- * HFI1_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * HFI1_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * HFI1_S_WAIT_PIO - waiting for a send buffer to be available
- * HFI1_S_WAIT_TX - waiting for a struct verbs_txreq to be available
- * HFI1_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * HFI1_S_WAIT_KMEM - waiting for kernel memory to be available
- * HFI1_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * HFI1_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * HFI1_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- * HFI1_S_ECN - a BECN was queued to the send engine
- */
-#define HFI1_S_SIGNAL_REQ_WR	0x0001
-#define HFI1_S_BUSY		0x0002
-#define HFI1_S_TIMER		0x0004
-#define HFI1_S_RESP_PENDING	0x0008
-#define HFI1_S_ACK_PENDING	0x0010
-#define HFI1_S_WAIT_FENCE	0x0020
-#define HFI1_S_WAIT_RDMAR	0x0040
-#define HFI1_S_WAIT_RNR		0x0080
-#define HFI1_S_WAIT_SSN_CREDIT	0x0100
-#define HFI1_S_WAIT_DMA		0x0200
-#define HFI1_S_WAIT_PIO		0x0400
-#define HFI1_S_WAIT_TX		0x0800
-#define HFI1_S_WAIT_DMA_DESC	0x1000
-#define HFI1_S_WAIT_KMEM		0x2000
-#define HFI1_S_WAIT_PSN		0x4000
-#define HFI1_S_WAIT_ACK		0x8000
-#define HFI1_S_SEND_ONE		0x10000
-#define HFI1_S_UNLIMITED_CREDIT	0x20000
-#define HFI1_S_AHG_VALID		0x40000
-#define HFI1_S_AHG_CLEAR		0x80000
-#define HFI1_S_ECN		0x100000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define HFI1_S_ANY_WAIT_IO (HFI1_S_WAIT_PIO | HFI1_S_WAIT_TX | \
-	HFI1_S_WAIT_DMA_DESC | HFI1_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define HFI1_S_ANY_WAIT_SEND (HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | \
-	HFI1_S_WAIT_RNR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_DMA | \
-	HFI1_S_WAIT_PSN | HFI1_S_WAIT_ACK)
-
-#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | HFI1_S_ANY_WAIT_SEND)
-
 #define HFI1_PSN_CREDIT  16
 
-/*
- * Since struct hfi1_swqe is not a fixed size, we can't simply index into
- * struct hfi1_qp.s_wq.  This function does the array index computation.
- */
-static inline struct hfi1_swqe *get_swqe_ptr(struct hfi1_qp *qp,
-					     unsigned n)
-{
-	return (struct hfi1_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct hfi1_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct hfi1_sge)) * n);
-}
-
-/*
- * Since struct hfi1_rwqe is not a fixed size, we can't simply index into
- * struct hfi1_rwq.wq.  This function does the array index computation.
- */
-static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n)
-{
-	return (struct hfi1_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct hfi1_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct hfi1_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct hfi1_mregion __rcu **table;
-};
-
 struct hfi1_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
@@ -690,75 +239,20 @@ static inline void inc_opstats(
 }
 
 struct hfi1_ibport {
-	struct hfi1_qp __rcu *qp[2];
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct hfi1_ah *sm_ah;
-	struct hfi1_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
-
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
+	struct rvt_qp __rcu *qp[2];
+	struct rvt_ibport rvp;
+
 	__be64 guids[HFI1_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
-	u64 n_rc_resends;
-	u64 n_seq_naks;
-	u64 n_rdma_seq;
-	u64 n_rnr_naks;
-	u64 n_other_naks;
-	u64 n_loop_pkts;
-	u64 n_pkt_drops;
-	u64 n_vl15_dropped;
-	u64 n_rc_timeouts;
-	u64 n_dmawait;
-	u64 n_unaligned;
-	u64 n_rc_dupreq;
-	u64 n_rc_seqnak;
-
-	/* Hot-path per CPU counters to avoid cacheline trading to update */
-	u64 z_rc_acks;
-	u64 z_rc_qacks;
-	u64 z_rc_delayed_comp;
-	u64 __percpu *rc_acks;
-	u64 __percpu *rc_qacks;
-	u64 __percpu *rc_delayed_comp;
-
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
+
 	/* the first 16 entries are sl_to_vl for !OPA */
 	u8 sl_to_sc[32];
 	u8 sc_to_sl[32];
 };
 
-
-struct hfi1_qp_ibdev;
 struct hfi1_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct hfi1_mregion __rcu *dma_mr;
-
-	struct hfi1_qp_ibdev *qp_dev;
+	struct rvt_dev_info rdi; /* Must be first */
 
 	/* QP numbers are shared by all IB ports */
-	struct hfi1_lkey_table lk_table;
 	/* protect wait lists */
 	seqlock_t iowait_lock;
 	struct list_head txwait;        /* list for wait verbs_txreq */
@@ -767,26 +261,11 @@ struct hfi1_ibdev {
 	struct kmem_cache *verbs_txreq_cache;
 	struct timer_list mem_timer;
 
-	/* other waiters */
-	spinlock_t pending_lock;
-
 	u64 n_piowait;
+	u64 n_piodrain;
 	u64 n_txwait;
 	u64 n_kmem_wait;
-	u64 n_send_schedule;
-
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
+
 #ifdef CONFIG_DEBUG_FS
 	/* per HFI debugfs */
 	struct dentry *hfi1_ibdev_dbg;
@@ -795,66 +274,31 @@ struct hfi1_ibdev {
 #endif
 };
 
-struct hfi1_verbs_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-	u32 vl15_dropped;
-};
-
-static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct hfi1_mr, ibmr);
-}
-
-static inline struct hfi1_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct hfi1_pd, ibpd);
-}
-
-static inline struct hfi1_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct hfi1_ah, ibah);
-}
-
-static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq)
+static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
 {
-	return container_of(ibcq, struct hfi1_cq, ibcq);
-}
+	struct rvt_dev_info *rdi;
 
-static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct hfi1_srq, ibsrq);
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct hfi1_ibdev, rdi);
 }
 
-static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp)
+static inline struct rvt_qp *iowait_to_qp(struct  iowait *s_iowait)
 {
-	return container_of(ibqp, struct hfi1_qp, ibqp);
-}
+	struct hfi1_qp_priv *priv;
 
-static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct hfi1_ibdev, ibdev);
+	priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait);
+	return priv->owner;
 }
 
 /*
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  */
-static inline int hfi1_send_ok(struct hfi1_qp *qp)
+static inline int hfi1_send_ok(struct rvt_qp *qp)
 {
-	return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) ||
-		 !(qp->s_flags & HFI1_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
 /*
@@ -862,7 +306,7 @@ static inline int hfi1_send_ok(struct hfi1_qp *qp)
  */
 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2);
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
@@ -870,8 +314,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
 		     u16 *out_mad_pkey_index);
-int hfi1_create_agents(struct hfi1_ibdev *dev);
-void hfi1_free_agents(struct hfi1_ibdev *dev);
 
 /*
  * The PSN_MASK and PSN_SHIFT allow for
@@ -901,7 +343,7 @@ void hfi1_free_agents(struct hfi1_ibdev *dev);
  */
 static inline int cmp_msn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << 8;
+	return (((int)a) - ((int)b)) << 8;
 }
 
 /*
@@ -910,7 +352,7 @@ static inline int cmp_msn(u32 a, u32 b)
  */
 static inline int cmp_psn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << PSN_SHIFT;
+	return (((int)a) - ((int)b)) << PSN_SHIFT;
 }
 
 /*
@@ -929,23 +371,15 @@ static inline u32 delta_psn(u32 a, u32 b)
 	return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
 }
 
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid);
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp);
-
 struct verbs_txreq;
 void hfi1_put_txreq(struct verbs_txreq *tx);
 
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps);
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-void hfi1_copy_sge(struct hfi1_sge_state *ss, void *data, u32 length,
-		   int release);
+void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
+		   int release, int copy_last);
 
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release);
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet);
 
@@ -957,147 +391,75 @@ void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp);
+	struct rvt_qp *qp);
 
 u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
-
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
 
 void hfi1_rc_rnr_retry(unsigned long arg);
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
+void hfi1_rc_timeout(unsigned long arg);
+void hfi1_del_timers_sync(struct rvt_qp *qp);
+void hfi1_stop_rc_timers(struct rvt_qp *qp);
 
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err);
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
 void hfi1_ud_rcv(struct hfi1_packet *packet);
 
 int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
 
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region);
-
-void hfi1_free_lkey(struct hfi1_mregion *mr);
-
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc);
-
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc);
-
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
-
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata);
-
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata);
-
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int hfi1_destroy_srq(struct ib_srq *ibsrq);
-
-int hfi1_cq_init(struct hfi1_devdata *dd);
-
-void hfi1_cq_exit(struct hfi1_devdata *dd);
-
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int sig);
-
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata);
-
-int hfi1_destroy_cq(struct ib_cq *ibcq);
-
-int hfi1_req_notify_cq(
-	struct ib_cq *ibcq,
-	enum ib_cq_notify_flags notify_flags);
-
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata);
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
-int hfi1_dereg_mr(struct ib_mr *ibmr);
+void hfi1_migrate_qp(struct rvt_qp *qp);
 
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_entries);
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata);
 
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr);
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata);
 
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
 
-int hfi1_unmap_fmr(struct list_head *fmr_list);
+extern const u32 rc_only_opcode;
+extern const u32 uc_only_opcode;
 
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void hfi1_get_mr(struct hfi1_mregion *mr)
-{
-	atomic_inc(&mr->refcount);
-}
-
-static inline void hfi1_put_mr(struct hfi1_mregion *mr)
+static inline u8 get_opcode(struct hfi1_ib_header *h)
 {
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		complete(&mr->comp);
-}
+	u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
 
-static inline void hfi1_put_ss(struct hfi1_sge_state *ss)
-{
-	while (ss->num_sge) {
-		hfi1_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
+	if (lnh == IB_LNH_IBA_LOCAL)
+		return be32_to_cpu(h->u.oth.bth[0]) >> 24;
+	else
+		return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
 }
 
-void hfi1_release_mmap_info(struct kref *ref);
-
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size,
-					     struct ib_ucontext *context,
-					     void *obj);
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj);
-
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only);
-
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0);
+		       int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 		  struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle);
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps);
 
-void hfi1_do_send(struct work_struct *work);
+void _hfi1_do_send(struct work_struct *work);
 
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_do_send(struct rvt_qp *qp);
+
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status);
 
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct hfi1_qp *qp, int is_fecn);
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn);
 
-int hfi1_make_rc_req(struct hfi1_qp *qp);
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_uc_req(struct hfi1_qp *qp);
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_ud_req(struct hfi1_qp *qp);
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
 int hfi1_register_ib_device(struct hfi1_devdata *);
 
@@ -1107,24 +469,42 @@ void hfi1_ib_rcv(struct hfi1_packet *packet);
 
 unsigned hfi1_get_npkeys(struct hfi1_devdata *);
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5);
+int hfi1_wss_init(void);
+void hfi1_wss_exit(void);
+
+/* platform specific: return the lowest level cache (llc) size, in KiB */
+static inline int wss_llc_size(void)
+{
+	/* assume that the boot CPU value is universal for all CPUs */
+	return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+	/*
+	 * Use the only available X64 cacheless copy.  Add a __user cast
+	 * to quiet sparse.  The src agument is already in the kernel so
+	 * there are no security issues.  The extra fault recovery machinery
+	 * is not invoked.
+	 */
+	__copy_user_nocache(dst, (void __user *)src, n, 0);
+}
 
 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
 
 extern const u8 hdr_len_by_opcode[];
 
-extern const int ib_hfi1_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 extern __be64 ib_hfi1_sys_image_guid;    /* in network order */
 
-extern unsigned int hfi1_lkey_table_size;
-
 extern unsigned int hfi1_max_cqes;
 
 extern unsigned int hfi1_max_cqs;
@@ -1145,8 +525,8 @@ extern unsigned int hfi1_max_srq_sges;
 
 extern unsigned int hfi1_max_srq_wrs;
 
-extern const u32 ib_hfi1_rnr_table[];
+extern unsigned short piothreshold;
 
-extern struct ib_dma_mapping_ops hfi1_dma_mapping_ops;
+extern const u32 ib_hfi1_rnr_table[];
 
 #endif                          /* HFI1_VERBS_H */
diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c
deleted file mode 100644
index afc6b4c61a1d..000000000000
--- a/drivers/staging/rdma/hfi1/verbs_mcast.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/rculist.h>
-
-#include "hfi.h"
-
-/**
- * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
-{
-	struct hfi1_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
-{
-	struct hfi1_qp *qp = mqp->qp;
-
-	/* Notify hfi1_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
-{
-	struct hfi1_mcast *mcast;
-
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void mcast_free(struct hfi1_mcast *mcast)
-{
-	struct hfi1_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * hfi1_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct hfi1_mcast *mcast;
-
-	spin_lock_irqsave(&ibp->lock, flags);
-	n = ibp->mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
-		     struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
-{
-	struct rb_node **n = &ibp->mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&ibp->lock);
-
-	while (*n) {
-		struct hfi1_mcast *tmcast;
-		struct hfi1_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&ibp->lock);
-
-	return ret;
-}
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp;
-	struct hfi1_mcast *mcast;
-	struct hfi1_mcast_qp *mqp;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		break;
-
-	case EEXIST:            /* The mcast wasn't used */
-		mcast_free(mcast);
-		break;
-
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct hfi1_mcast *mcast = NULL;
-	struct hfi1_mcast_qp *p, *tmp;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irq(&ibp->lock);
-
-	/* Find the GID in the mcast table. */
-	n = ibp->mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&ibp->lock);
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&ibp->lock);
-
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		mcast_qp_free(p);
-	}
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
-{
-	return ibp->mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
new file mode 100644
index 000000000000..bc95c4112c61
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs_txreq.h"
+#include "qp.h"
+#include "trace.h"
+
+#define TXREQ_LEN 24
+
+void hfi1_put_txreq(struct verbs_txreq *tx)
+{
+	struct hfi1_ibdev *dev;
+	struct rvt_qp *qp;
+	unsigned long flags;
+	unsigned int seq;
+	struct hfi1_qp_priv *priv;
+
+	qp = tx->qp;
+	dev = to_idev(qp->ibqp.device);
+
+	if (tx->mr)
+		rvt_put_mr(tx->mr);
+
+	sdma_txclean(dd_from_dev(dev), &tx->txreq);
+
+	/* Free verbs_txreq and return to slab cache */
+	kmem_cache_free(dev->verbs_txreq_cache, tx);
+
+	do {
+		seq = read_seqbegin(&dev->iowait_lock);
+		if (!list_empty(&dev->txwait)) {
+			struct iowait *wait;
+
+			write_seqlock_irqsave(&dev->iowait_lock, flags);
+			wait = list_first_entry(&dev->txwait, struct iowait,
+						list);
+			qp = iowait_to_qp(wait);
+			priv = qp->priv;
+			list_del_init(&priv->s_iowait.list);
+			/* refcount held until actual wake up */
+			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+			hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
+			break;
+		}
+	} while (read_seqretry(&dev->iowait_lock, seq));
+}
+
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	write_seqlock(&dev->iowait_lock);
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		struct hfi1_qp_priv *priv;
+
+		tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+		if (tx)
+			goto out;
+		priv = qp->priv;
+		if (list_empty(&priv->s_iowait.list)) {
+			dev->n_txwait++;
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->s_iowait.list, &dev->txwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
+			atomic_inc(&qp->refcount);
+		}
+		qp->s_flags &= ~RVT_S_BUSY;
+	}
+out:
+	write_sequnlock(&dev->iowait_lock);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return tx;
+}
+
+static void verbs_txreq_kmem_cache_ctor(void *obj)
+{
+	struct verbs_txreq *tx = (struct verbs_txreq *)obj;
+
+	memset(tx, 0, sizeof(*tx));
+}
+
+int verbs_txreq_init(struct hfi1_ibdev *dev)
+{
+	char buf[TXREQ_LEN];
+	struct hfi1_devdata *dd = dd_from_dev(dev);
+
+	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
+	dev->verbs_txreq_cache = kmem_cache_create(buf,
+						   sizeof(struct verbs_txreq),
+						   0, SLAB_HWCACHE_ALIGN,
+						   verbs_txreq_kmem_cache_ctor);
+	if (!dev->verbs_txreq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void verbs_txreq_exit(struct hfi1_ibdev *dev)
+{
+	kmem_cache_destroy(dev->verbs_txreq_cache);
+	dev->verbs_txreq_cache = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
new file mode 100644
index 000000000000..1cf69b2fe4a5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_VERBS_TXREQ_H
+#define HFI1_VERBS_TXREQ_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "verbs.h"
+#include "sdma_txreq.h"
+#include "iowait.h"
+
+struct verbs_txreq {
+	struct hfi1_pio_header	phdr;
+	struct sdma_txreq       txreq;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
+	struct sdma_engine     *sde;
+	struct send_context     *psc;
+	u16                     hdr_dwords;
+};
+
+struct hfi1_ibdev;
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp);
+
+static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
+					    struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+	if (unlikely(!tx)) {
+		/* call slow path to get the lock */
+		tx = __get_txreq(dev, qp);
+		if (IS_ERR(tx))
+			return tx;
+	}
+	tx->qp = qp;
+	tx->mr = NULL;
+	tx->sde = priv->s_sde;
+	tx->psc = priv->s_sendcontext;
+	/* so that we can test if the sdma decriptors are there */
+	tx->txreq.num_desc = 0;
+	return tx;
+}
+
+static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
+{
+	return &tx->txreq;
+}
+
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+{
+	struct sdma_txreq *stx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	stx = iowait_get_txhead(&priv->s_iowait);
+	if (stx)
+		return container_of(stx, struct verbs_txreq, txreq);
+	return NULL;
+}
+
+void hfi1_put_txreq(struct verbs_txreq *tx);
+int verbs_txreq_init(struct hfi1_ibdev *dev);
+void verbs_txreq_exit(struct hfi1_ibdev *dev);
+
+#endif                         /* HFI1_VERBS_TXREQ_H */
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644
index 041ce0634968..000000000000
--- a/drivers/staging/rdma/ipath/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-config INFINIBAND_IPATH
-	tristate "QLogic HTX HCA support"
-	depends on 64BIT && NET && HT_IRQ
-	---help---
-	This is a driver for the deprecated QLogic Hyper-Transport
-	IB host channel adapter (model QHT7140),
-	including InfiniBand verbs support.  This driver allows these
-	devices to be used with both kernel upper level protocols such
-	as IP-over-InfiniBand as well as with userspace applications
-	(in conjunction with InfiniBand userspace access).
-	For QLogic PCIe QLE based cards, use the QIB driver instead.
-
-	If you have this hardware you will need to boot with PAT disabled
-	on your x86-64 systems, use the nopat kernel parameter.
-
-	Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644
index 4496f2820c92..000000000000
--- a/drivers/staging/rdma/ipath/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-	-DIPATH_KERN_TYPE=0
-
-obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
-
-ib_ipath-y := \
-	ipath_cq.o \
-	ipath_diag.o \
-	ipath_dma.o \
-	ipath_driver.o \
-	ipath_eeprom.o \
-	ipath_file_ops.o \
-	ipath_fs.o \
-	ipath_init_chip.o \
-	ipath_intr.o \
-	ipath_keys.o \
-	ipath_mad.o \
-	ipath_mmap.o \
-	ipath_mr.o \
-	ipath_qp.o \
-	ipath_rc.o \
-	ipath_ruc.o \
-	ipath_sdma.o \
-	ipath_srq.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_uc.o \
-	ipath_ud.o \
-	ipath_user_pages.o \
-	ipath_user_sdma.o \
-	ipath_verbs_mcast.o \
-	ipath_verbs.o
-
-ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-
-ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644
index cb00158d64c8..000000000000
--- a/drivers/staging/rdma/ipath/TODO
+++ /dev/null
@@ -1,5 +0,0 @@
-The ipath driver has been moved to staging in preparation for its removal in a
-few releases. The driver will be deleted during the 4.6 merge window.
-
-Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
-Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644
index 28cfe97cf1e9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_common.h
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_COMMON_H
-#define _IPATH_COMMON_H
-
-/*
- * This file contains defines, structures, etc. that are used
- * to communicate between kernel and user code.
- */
-
-
-/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
-#define IPATH_SRC_OUI_1 0x00
-#define IPATH_SRC_OUI_2 0x11
-#define IPATH_SRC_OUI_3 0x75
-
-/* version of protocol header (known to chip also). In the long run,
- * we should be able to generate and accept a range of version numbers;
- * for now we only accept one, and it's compiled in.
- */
-#define IPS_PROTO_VERSION 2
-
-/*
- * These are compile time constants that you may want to enable or disable
- * if you are trying to debug problems with code or performance.
- * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
- * fastpath code
- * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
- * traced in faspath code
- * _IPATH_TRACING define as 0 if you want to remove all tracing in a
- * compilation unit
- * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
- */
-
-/*
- * The value in the BTH QP field that InfiniPath uses to differentiate
- * an infinipath protocol IB packet vs standard IB transport
- */
-#define IPATH_KD_QP 0x656b79
-
-/*
- * valid states passed to ipath_set_linkstate() user call
- */
-#define IPATH_IB_LINKDOWN		0
-#define IPATH_IB_LINKARM		1
-#define IPATH_IB_LINKACTIVE		2
-#define IPATH_IB_LINKDOWN_ONLY		3
-#define IPATH_IB_LINKDOWN_SLEEP		4
-#define IPATH_IB_LINKDOWN_DISABLE	5
-#define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
-#define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
-#define IPATH_IB_LINK_NO_HRTBT	8 /* disable Heartbeat, e.g. for loopback */
-#define IPATH_IB_LINK_HRTBT	9 /* enable heartbeat, normal, non-loopback */
-
-/*
- * These 3 values (SDR and DDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs.  They
- * are also the the possible values for ipath_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define IPATH_IB_SDR 1
-#define IPATH_IB_DDR 2
-
-/*
- * stats maintained by the driver.  For now, at least, this is global
- * to all minor devices.
- */
-struct infinipath_stats {
-	/* number of interrupts taken */
-	__u64 sps_ints;
-	/* number of interrupts for errors */
-	__u64 sps_errints;
-	/* number of errors from chip (not incl. packet errors or CRC) */
-	__u64 sps_errs;
-	/* number of packet errors from chip other than CRC */
-	__u64 sps_pkterrs;
-	/* number of packets with CRC errors (ICRC and VCRC) */
-	__u64 sps_crcerrs;
-	/* number of hardware errors reported (parity, etc.) */
-	__u64 sps_hwerrs;
-	/* number of times IB link changed state unexpectedly */
-	__u64 sps_iblink;
-	__u64 sps_unused; /* was fastrcvint, no longer implemented */
-	/* number of kernel (port0) packets received */
-	__u64 sps_port0pkts;
-	/* number of "ethernet" packets sent by driver */
-	__u64 sps_ether_spkts;
-	/* number of "ethernet" packets received by driver */
-	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver. Obsolete. */
-	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver. Obsolete. */
-	__u64 sps_sma_rpkts;
-	/* number of times all ports rcvhdrq was full and packet dropped */
-	__u64 sps_hdrqfull;
-	/* number of times all ports egrtid was full and packet dropped */
-	__u64 sps_etidfull;
-	/*
-	 * number of times we tried to send from driver, but no pio buffers
-	 * avail
-	 */
-	__u64 sps_nopiobufs;
-	/* number of ports currently open */
-	__u64 sps_ports;
-	/* list of pkeys (other than default) accepted (0 means not set) */
-	__u16 sps_pkeys[4];
-	__u16 sps_unused16[4]; /* available; maintaining compatible layout */
-	/* number of user ports per chip (not IB ports) */
-	__u32 sps_nports;
-	/* not our interrupt, or already handled */
-	__u32 sps_nullintr;
-	/* max number of packets handled per receive call */
-	__u32 sps_maxpkts_call;
-	/* avg number of packets handled per receive call */
-	__u32 sps_avgpkts_call;
-	/* total number of pages locked */
-	__u64 sps_pagelocks;
-	/* total number of pages unlocked */
-	__u64 sps_pageunlocks;
-	/*
-	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, etc.)
-	 */
-	__u64 sps_krdrops;
-	__u64 sps_txeparity; /* PIO buffer parity error, recovered */
-	/* pad for future growth */
-	__u64 __sps_pad[45];
-};
-
-/*
- * These are the status bits readable (in ascii form, 64bit value)
- * from the "status" sysfs file.
- */
-#define IPATH_STATUS_INITTED       0x1	/* basic initialization done */
-#define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
-/* Device has been disabled via admin request */
-#define IPATH_STATUS_ADMIN_DISABLED    0x4
-/* Chip has been found and initted */
-#define IPATH_STATUS_CHIP_PRESENT 0x20
-/* IB link is at ACTIVE, usable for data traffic */
-#define IPATH_STATUS_IB_READY     0x40
-/* link is configured, LID, MTU, etc. have been set */
-#define IPATH_STATUS_IB_CONF      0x80
-/* no link established, probably no cable */
-#define IPATH_STATUS_IB_NOCABLE  0x100
-/* A Fatal hardware error has occurred. */
-#define IPATH_STATUS_HWERROR     0x200
-
-/*
- * The list of usermode accessible registers.  Also see Reg_* later in file.
- */
-typedef enum _ipath_ureg {
-	/* (RO)  DMA RcvHdr to be used next. */
-	ur_rcvhdrtail = 0,
-	/* (RW)  RcvHdr entry to be processed next by host. */
-	ur_rcvhdrhead = 1,
-	/* (RO)  Index of next Eager index to use. */
-	ur_rcvegrindextail = 2,
-	/* (RW)  Eager TID to be processed next */
-	ur_rcvegrindexhead = 3,
-	/* For internal use only; max register number. */
-	_IPATH_UregMax
-} ipath_ureg;
-
-/* bit values for spi_runtime_flags */
-#define IPATH_RUNTIME_HT	0x1
-#define IPATH_RUNTIME_PCIE	0x2
-#define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
-#define IPATH_RUNTIME_RCVHDR_COPY	0x8
-#define IPATH_RUNTIME_MASTER	0x10
-#define IPATH_RUNTIME_NODMA_RTAIL 0x80
-#define IPATH_RUNTIME_SDMA	      0x200
-#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
-#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
-
-/*
- * This structure is returned by ipath_userinit() immediately after
- * open to get implementation-specific info, and info specific to this
- * instance.
- *
- * This struct must have explict pad fields where type sizes
- * may result in different alignments between 32 and 64 bit
- * programs, since the 64 bit * bit kernel requires the user code
- * to have matching offsets
- */
-struct ipath_base_info {
-	/* version of hardware, for feature checking. */
-	__u32 spi_hw_version;
-	/* version of software, for feature checking. */
-	__u32 spi_sw_version;
-	/* InfiniPath port assigned, goes into sent packets */
-	__u16 spi_port;
-	__u16 spi_subport;
-	/*
-	 * IB MTU, packets IB data must be less than this.
-	 * The MTU is in bytes, and will be a multiple of 4 bytes.
-	 */
-	__u32 spi_mtu;
-	/*
-	 * Size of a PIO buffer.  Any given packet's total size must be less
-	 * than this (in words).  Included is the starting control word, so
-	 * if 513 is returned, then total pkt size is 512 words or less.
-	 */
-	__u32 spi_piosize;
-	/* size of the TID cache in infinipath, in entries */
-	__u32 spi_tidcnt;
-	/* size of the TID Eager list in infinipath, in entries */
-	__u32 spi_tidegrcnt;
-	/* size of a single receive header queue entry in words. */
-	__u32 spi_rcvhdrent_size;
-	/*
-	 * Count of receive header queue entries allocated.
-	 * This may be less than the spu_rcvhdrcnt passed in!.
-	 */
-	__u32 spi_rcvhdr_cnt;
-
-	/* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
-	__u32 spi_runtime_flags;
-
-	/* address where receive buffer queue is mapped into */
-	__u64 spi_rcvhdr_base;
-
-	/* user program. */
-
-	/* base address of eager TID receive buffers. */
-	__u64 spi_rcv_egrbufs;
-
-	/* Allocated by initialization code, not by protocol. */
-
-	/*
-	 * Size of each TID buffer in host memory, starting at
-	 * spi_rcv_egrbufs.  The buffers are virtually contiguous.
-	 */
-	__u32 spi_rcv_egrbufsize;
-	/*
-	 * The special QP (queue pair) value that identifies an infinipath
-	 * protocol packet from standard IB packets.  More, probably much
-	 * more, to be added.
-	 */
-	__u32 spi_qpair;
-
-	/*
-	 * User register base for init code, not to be used directly by
-	 * protocol or applications.
-	 */
-	__u64 __spi_uregbase;
-	/*
-	 * Maximum buffer size in bytes that can be used in a single TID
-	 * entry (assuming the buffer is aligned to this boundary).  This is
-	 * the minimum of what the hardware and software support Guaranteed
-	 * to be a power of 2.
-	 */
-	__u32 spi_tid_maxsize;
-	/*
-	 * alignment of each pio send buffer (byte count
-	 * to add to spi_piobufbase to get to second buffer)
-	 */
-	__u32 spi_pioalign;
-	/*
-	 * The index of the first pio buffer available to this process;
-	 * needed to do lookup in spi_pioavailaddr; not added to
-	 * spi_piobufbase.
-	 */
-	__u32 spi_pioindex;
-	 /* number of buffers mapped for this process */
-	__u32 spi_piocnt;
-
-	/*
-	 * Base address of writeonly pio buffers for this process.
-	 * Each buffer has spi_piosize words, and is aligned on spi_pioalign
-	 * boundaries.  spi_piocnt buffers are mapped from this address
-	 */
-	__u64 spi_piobufbase;
-
-	/*
-	 * Base address of readonly memory copy of the pioavail registers.
-	 * There are 2 bits for each buffer.
-	 */
-	__u64 spi_pioavailaddr;
-
-	/*
-	 * Address where driver updates a copy of the interface and driver
-	 * status (IPATH_STATUS_*) as a 64 bit value.  It's followed by a
-	 * string indicating hardware error, if there was one.
-	 */
-	__u64 spi_status;
-
-	/* number of chip ports available to user processes */
-	__u32 spi_nports;
-	/* unit number of chip we are using */
-	__u32 spi_unit;
-	/* num bufs in each contiguous set */
-	__u32 spi_rcv_egrperchunk;
-	/* size in bytes of each contiguous set */
-	__u32 spi_rcv_egrchunksize;
-	/* total size of mmap to cover full rcvegrbuffers */
-	__u32 spi_rcv_egrbuftotlen;
-	__u32 spi_filler_for_align;
-	/* address of readonly memory copy of the rcvhdrq tail register. */
-	__u64 spi_rcvhdr_tailaddr;
-
-	/* shared memory pages for subports if port is shared */
-	__u64 spi_subport_uregbase;
-	__u64 spi_subport_rcvegrbuf;
-	__u64 spi_subport_rcvhdr_base;
-
-	/* shared memory page for hardware port if it is shared */
-	__u64 spi_port_uregbase;
-	__u64 spi_port_rcvegrbuf;
-	__u64 spi_port_rcvhdr_base;
-	__u64 spi_port_rcvhdr_tailaddr;
-
-} __attribute__ ((aligned(8)));
-
-
-/*
- * This version number is given to the driver by the user code during
- * initialization in the spu_userversion field of ipath_user_info, so
- * the driver can check for compatibility with user code.
- *
- * The major version changes when data structures
- * change in an incompatible way.  The driver must be the same or higher
- * for initialization to succeed.  In some cases, a higher version
- * driver will not interoperate with older software, and initialization
- * will return an error.
- */
-#define IPATH_USER_SWMAJOR 1
-
-/*
- * Minor version differences are always compatible
- * a within a major version, however if user software is larger
- * than driver software, some new features and/or structure fields
- * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference.
- */
-#define IPATH_USER_SWMINOR 6
-
-#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
-
-#define IPATH_KERN_TYPE 0
-
-/*
- * Similarly, this is the kernel version going back to the user.  It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a QLogic release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of ipath_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
-
-/*
- * This structure is passed to ipath_userinit() to tell the driver where
- * user code buffers are, sizes, etc.   The offsets and sizes of the
- * fields must remain unchanged, for binary compatibility.  It can
- * be extended, if userversion is changed so user code can tell, if needed
- */
-struct ipath_user_info {
-	/*
-	 * version of user software, to detect compatibility issues.
-	 * Should be set to IPATH_USER_SWVERSION.
-	 */
-	__u32 spu_userversion;
-
-	/* desired number of receive header queue entries */
-	__u32 spu_rcvhdrcnt;
-
-	/* size of struct base_info to write to */
-	__u32 spu_base_info_size;
-
-	/*
-	 * number of words in KD protocol header
-	 * This tells InfiniPath how many words to copy to rcvhdrq.  If 0,
-	 * kernel uses a default.  Once set, attempts to set any other value
-	 * are an error (EAGAIN) until driver is reloaded.
-	 */
-	__u32 spu_rcvhdrsize;
-
-	/*
-	 * If two or more processes wish to share a port, each process
-	 * must set the spu_subport_cnt and spu_subport_id to the same
-	 * values.  The only restriction on the spu_subport_id is that
-	 * it be unique for a given node.
-	 */
-	__u16 spu_subport_cnt;
-	__u16 spu_subport_id;
-
-	__u32 spu_unused; /* kept for compatible layout */
-
-	/*
-	 * address of struct base_info to write to
-	 */
-	__u64 spu_base_info;
-
-} __attribute__ ((aligned(8)));
-
-/* User commands. */
-
-#define IPATH_CMD_MIN		16
-
-#define __IPATH_CMD_USER_INIT	16	/* old set up userspace (for old user code) */
-#define IPATH_CMD_PORT_INFO	17	/* find out what resources we got */
-#define IPATH_CMD_RECV_CTRL	18	/* control receipt of packets */
-#define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
-#define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
-#define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
-#define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
-#define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
-#define IPATH_CMD_USER_INIT 	24	/* set up userspace */
-#define IPATH_CMD_UNUSED_1	25
-#define IPATH_CMD_UNUSED_2	26
-#define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
-#define IPATH_CMD_POLL_TYPE	28	/* set the kind of polling we want */
-#define IPATH_CMD_ARMLAUNCH_CTRL	29 /* armlaunch detection control */
-/* 30 is unused */
-#define IPATH_CMD_SDMA_INFLIGHT 31	/* sdma inflight counter request */
-#define IPATH_CMD_SDMA_COMPLETE 32	/* sdma completion counter request */
-
-/*
- * Poll types
- */
-#define IPATH_POLL_TYPE_URGENT	 0x01
-#define IPATH_POLL_TYPE_OVERFLOW 0x02
-
-struct ipath_port_info {
-	__u32 num_active;	/* number of active units */
-	__u32 unit;		/* unit (chip) assigned to caller */
-	__u16 port;		/* port on unit assigned to caller */
-	__u16 subport;		/* subport on unit assigned to caller */
-	__u16 num_ports;	/* number of ports available on unit */
-	__u16 num_subports;	/* number of subports opened on port */
-};
-
-struct ipath_tid_info {
-	__u32 tidcnt;
-	/* make structure same size in 32 and 64 bit */
-	__u32 tid__unused;
-	/* virtual address of first page in transfer */
-	__u64 tidvaddr;
-	/* pointer (same size 32/64 bit) to __u16 tid array */
-	__u64 tidlist;
-
-	/*
-	 * pointer (same size 32/64 bit) to bitmap of TIDs used
-	 * for this call; checked for being large enough at open
-	 */
-	__u64 tidmap;
-};
-
-struct ipath_cmd {
-	__u32 type;			/* command type */
-	union {
-		struct ipath_tid_info tid_info;
-		struct ipath_user_info user_info;
-
-		/*
-		 * address in userspace where we should put the sdma
-		 * inflight counter
-		 */
-		__u64 sdma_inflight;
-		/*
-		 * address in userspace where we should put the sdma
-		 * completion counter
-		 */
-		__u64 sdma_complete;
-		/* address in userspace of struct ipath_port_info to
-		   write result to */
-		__u64 port_info;
-		/* enable/disable receipt of packets */
-		__u32 recv_ctrl;
-		/* enable/disable armlaunch errors (non-zero to enable) */
-		__u32 armlaunch_ctrl;
-		/* partition key to set */
-		__u16 part_key;
-		/* user address of __u32 bitmask of active slaves */
-		__u64 slave_mask_addr;
-		/* type of polling we want */
-		__u16 poll_type;
-	} cmd;
-};
-
-struct ipath_iovec {
-	/* Pointer to data, but same size 32 and 64 bit */
-	__u64 iov_base;
-
-	/*
-	 * Length of data; don't need 64 bits, but want
-	 * ipath_sendpkt to remain same size as before 32 bit changes, so...
-	 */
-	__u64 iov_len;
-};
-
-/*
- * Describes a single packet for send.  Each packet can have one or more
- * buffers, but the total length (exclusive of IB headers) must be less
- * than the MTU, and if using the PIO method, entire packet length,
- * including IB headers, must be less than the ipath_piosize value (words).
- * Use of this necessitates including sys/uio.h
- */
-struct __ipath_sendpkt {
-	__u32 sps_flags;	/* flags for packet (TBD) */
-	__u32 sps_cnt;		/* number of entries to use in sps_iov */
-	/* array of iov's describing packet. TEMPORARY */
-	struct ipath_iovec sps_iov[4];
-};
-
-/*
- * diagnostics can send a packet by "writing" one of the following
- * two structs to diag data special file
- * The first is the legacy version for backward compatibility
- */
-struct ipath_diag_pkt {
-	__u32 unit;
-	__u64 data;
-	__u32 len;
-};
-
-/* The second diag_pkt struct is the expanded version that allows
- * more control over the packet, specifically, by allowing a custom
- * pbc (+ static rate) qword, so that special modes and deliberate
- * changes to CRCs can be used. The elements were also re-ordered
- * for better alignment and to avoid padding issues.
- */
-struct ipath_diag_xpkt {
-	__u64 data;
-	__u64 pbc_wd;
-	__u32 unit;
-	__u32 len;
-};
-
-/*
- * Data layout in I2C flash (for GUID, etc.)
- * All fields are little-endian binary unless otherwise stated
- */
-#define IPATH_FLASH_VERSION 2
-struct ipath_flash {
-	/* flash layout version (IPATH_FLASH_VERSION) */
-	__u8 if_fversion;
-	/* checksum protecting if_length bytes */
-	__u8 if_csum;
-	/*
-	 * valid length (in use, protected by if_csum), including
-	 * if_fversion and if_csum themselves)
-	 */
-	__u8 if_length;
-	/* the GUID, in network order */
-	__u8 if_guid[8];
-	/* number of GUIDs to use, starting from if_guid */
-	__u8 if_numguid;
-	/* the (last 10 characters of) board serial number, in ASCII */
-	char if_serial[12];
-	/* board mfg date (YYYYMMDD ASCII) */
-	char if_mfgdate[8];
-	/* last board rework/test date (YYYYMMDD ASCII) */
-	char if_testdate[8];
-	/* logging of error counts, TBD */
-	__u8 if_errcntp[4];
-	/* powered on hours, updated at driver unload */
-	__u8 if_powerhour[2];
-	/* ASCII free-form comment field */
-	char if_comment[32];
-	/* Backwards compatible prefix for longer QLogic Serial Numbers */
-	char if_sprefix[4];
-	/* 82 bytes used, min flash size is 128 bytes */
-	__u8 if_future[46];
-};
-
-/*
- * These are the counters implemented in the chip, and are listed in order.
- * The InterCaps naming is taken straight from the chip spec.
- */
-struct infinipath_counters {
-	__u64 LBIntCnt;
-	__u64 LBFlowStallCnt;
-	__u64 TxSDmaDescCnt;	/* was Reserved1 */
-	__u64 TxUnsupVLErrCnt;
-	__u64 TxDataPktCnt;
-	__u64 TxFlowPktCnt;
-	__u64 TxDwordCnt;
-	__u64 TxLenErrCnt;
-	__u64 TxMaxMinLenErrCnt;
-	__u64 TxUnderrunCnt;
-	__u64 TxFlowStallCnt;
-	__u64 TxDroppedPktCnt;
-	__u64 RxDroppedPktCnt;
-	__u64 RxDataPktCnt;
-	__u64 RxFlowPktCnt;
-	__u64 RxDwordCnt;
-	__u64 RxLenErrCnt;
-	__u64 RxMaxMinLenErrCnt;
-	__u64 RxICRCErrCnt;
-	__u64 RxVCRCErrCnt;
-	__u64 RxFlowCtrlErrCnt;
-	__u64 RxBadFormatCnt;
-	__u64 RxLinkProblemCnt;
-	__u64 RxEBPCnt;
-	__u64 RxLPCRCErrCnt;
-	__u64 RxBufOvflCnt;
-	__u64 RxTIDFullErrCnt;
-	__u64 RxTIDValidErrCnt;
-	__u64 RxPKeyMismatchCnt;
-	__u64 RxP0HdrEgrOvflCnt;
-	__u64 RxP1HdrEgrOvflCnt;
-	__u64 RxP2HdrEgrOvflCnt;
-	__u64 RxP3HdrEgrOvflCnt;
-	__u64 RxP4HdrEgrOvflCnt;
-	__u64 RxP5HdrEgrOvflCnt;
-	__u64 RxP6HdrEgrOvflCnt;
-	__u64 RxP7HdrEgrOvflCnt;
-	__u64 RxP8HdrEgrOvflCnt;
-	__u64 RxP9HdrEgrOvflCnt;	/* was Reserved6 */
-	__u64 RxP10HdrEgrOvflCnt;	/* was Reserved7 */
-	__u64 RxP11HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP12HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP13HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP14HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP15HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 RxP16HdrEgrOvflCnt;	/* new for IBA7220 */
-	__u64 IBStatusChangeCnt;
-	__u64 IBLinkErrRecoveryCnt;
-	__u64 IBLinkDownedCnt;
-	__u64 IBSymbolErrCnt;
-	/* The following are new for IBA7220 */
-	__u64 RxVL15DroppedPktCnt;
-	__u64 RxOtherLocalPhyErrCnt;
-	__u64 PcieRetryBufDiagQwordCnt;
-	__u64 ExcessBufferOvflCnt;
-	__u64 LocalLinkIntegrityErrCnt;
-	__u64 RxVlErrCnt;
-	__u64 RxDlidFltrCnt;
-};
-
-/*
- * The next set of defines are for packet headers, and chip register
- * and memory bits that are visible to and/or used by user-mode software
- * The other bits that are used only by the driver or diags are in
- * ipath_registers.h
- */
-
-/* RcvHdrFlags bits */
-#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
-#define INFINIPATH_RHF_LENGTH_SHIFT 0
-#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
-#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
-#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
-#define INFINIPATH_RHF_SEQ_MASK 0xF
-#define INFINIPATH_RHF_SEQ_SHIFT 0
-#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
-#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
-#define INFINIPATH_RHF_H_ICRCERR   0x80000000
-#define INFINIPATH_RHF_H_VCRCERR   0x40000000
-#define INFINIPATH_RHF_H_PARITYERR 0x20000000
-#define INFINIPATH_RHF_H_LENERR    0x10000000
-#define INFINIPATH_RHF_H_MTUERR    0x08000000
-#define INFINIPATH_RHF_H_IHDRERR   0x04000000
-#define INFINIPATH_RHF_H_TIDERR    0x02000000
-#define INFINIPATH_RHF_H_MKERR     0x01000000
-#define INFINIPATH_RHF_H_IBERR     0x00800000
-#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
-#define INFINIPATH_RHF_L_USE_EGR   0x80000000
-#define INFINIPATH_RHF_L_SWA       0x00008000
-#define INFINIPATH_RHF_L_SWB       0x00004000
-
-/* infinipath header fields */
-#define INFINIPATH_I_VERS_MASK 0xF
-#define INFINIPATH_I_VERS_SHIFT 28
-#define INFINIPATH_I_PORT_MASK 0xF
-#define INFINIPATH_I_PORT_SHIFT 24
-#define INFINIPATH_I_TID_MASK 0x7FF
-#define INFINIPATH_I_TID_SHIFT 13
-#define INFINIPATH_I_OFFSET_MASK 0x1FFF
-#define INFINIPATH_I_OFFSET_SHIFT 0
-
-/* K_PktFlags bits */
-#define INFINIPATH_KPF_INTR 0x1
-#define INFINIPATH_KPF_SUBPORT_MASK 0x3
-#define INFINIPATH_KPF_SUBPORT_SHIFT 1
-
-#define INFINIPATH_MAX_SUBPORT	4
-
-/* SendPIO per-buffer control */
-#define INFINIPATH_SP_TEST    0x40
-#define INFINIPATH_SP_TESTEBP 0x20
-#define INFINIPATH_SP_TRIGGER_SHIFT  15
-
-/* SendPIOAvail bits */
-#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
-#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
-
-/* infinipath header format */
-struct ipath_header {
-	/*
-	 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
-	 * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-	 * Port 4, TID 11, offset 13.
-	 */
-	__le32 ver_port_tid_offset;
-	__le16 chksum;
-	__le16 pkt_flags;
-};
-
-/* infinipath user message header format.
- * This structure contains the first 4 fields common to all protocols
- * that employ infinipath.
- */
-struct ipath_message_header {
-	__be16 lrh[4];
-	__be32 bth[3];
-	/* fields below this point are in host byte order */
-	struct ipath_header iph;
-	__u8 sub_opcode;
-};
-
-/* infinipath ethernet header format */
-struct ether_header {
-	__be16 lrh[4];
-	__be32 bth[3];
-	struct ipath_header iph;
-	__u8 sub_opcode;
-	__u8 cmd;
-	__be16 lid;
-	__u16 mac[3];
-	__u8 frag_num;
-	__u8 seq_num;
-	__le32 len;
-	/* MUST be of word size due to PIO write requirements */
-	__le32 csum;
-	__le16 csum_offset;
-	__le16 flags;
-	__u16 first_2_bytes;
-	__u8 unused[2];		/* currently unused */
-};
-
-
-/* IB - LRH header consts */
-#define IPATH_LRH_GRH 0x0003	/* 1. word of IB LRH - next header: GRH */
-#define IPATH_LRH_BTH 0x0002	/* 1. word of IB LRH - next header: BTH */
-
-/* misc. */
-#define SIZE_OF_CRC 1
-
-#define IPATH_DEFAULT_P_KEY 0xFFFF
-#define IPATH_PERMISSIVE_LID 0xFFFF
-#define IPATH_AETH_CREDIT_SHIFT 24
-#define IPATH_AETH_CREDIT_MASK 0x1F
-#define IPATH_AETH_CREDIT_INVAL 0x1F
-#define IPATH_PSN_MASK 0xFFFFFF
-#define IPATH_MSN_MASK 0xFFFFFF
-#define IPATH_QPN_MASK 0xFFFFFF
-#define IPATH_MULTICAST_LID_BASE 0xC000
-#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
-#define IPATH_MULTICAST_QPN 0xFFFFFF
-
-/* Receive Header Queue: receive type (from infinipath) */
-#define RCVHQ_RCV_TYPE_EXPECTED  0
-#define RCVHQ_RCV_TYPE_EAGER     1
-#define RCVHQ_RCV_TYPE_NON_KD    2
-#define RCVHQ_RCV_TYPE_ERROR     3
-
-
-/* sub OpCodes - ith4x  */
-#define IPATH_ITH4X_OPCODE_ENCAP 0x81
-#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
-
-#define IPATH_HEADER_QUEUE_WORDS 9
-
-/* functions for extracting fields from rcvhdrq entries for the driver.
- */
-static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
-{
-	return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
-}
-
-static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
-{
-	return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
-	    & INFINIPATH_RHF_RCVTYPE_MASK;
-}
-
-static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
-{
-	return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
-		& INFINIPATH_RHF_LENGTH_MASK) << 2;
-}
-
-static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
-{
-	return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
-	    & INFINIPATH_RHF_EGRINDEX_MASK;
-}
-
-static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
-		& INFINIPATH_RHF_SEQ_MASK;
-}
-
-static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
-{
-	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
-		& INFINIPATH_RHF_HDRQ_OFFSET_MASK;
-}
-
-static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
-{
-	return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
-}
-
-static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
-{
-	return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
-	    & INFINIPATH_I_VERS_MASK;
-}
-
-#endif				/* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644
index e9dd9112e718..000000000000
--- a/drivers/staging/rdma/ipath/ipath_cq.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct ipath_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
-		cq->notify = IB_CQ_NONE;
-		cq->triggered++;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		tasklet_hi_schedule(&cq->comptask);
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	if (entry->status != IB_WC_SUCCESS)
-		to_idev(cq->ibcq.device)->n_wqe_errs++;
-}
-
-/**
- * ipath_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(unsigned long data)
-{
-	struct ipath_cq *cq = (struct ipath_cq *)data;
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, tasklet_hi_schedule()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * ipath_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the InfiniPath driver
- * @udata: unused by the InfiniPath driver
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-			      const struct ib_cq_init_attr *attr,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cq *cq;
-	struct ipath_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > ib_ipath_max_cqes) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * ipath_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int ipath_destroy_cq(struct ib_cq *ibcq)
-{
-	struct ipath_ibdev *dev = to_idev(ibcq->device);
-	struct ipath_cq *cq = to_icq(ibcq);
-
-	tasklet_kill(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * ipath_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * ipath_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *old_wc;
-	struct ipath_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct ipath_ibdev *dev = to_idev(ibcq->device);
-		struct ipath_mmap_info *ip = cq->ip;
-
-		ipath_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See ipath_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644
index 65926cd35759..000000000000
--- a/drivers/staging/rdma/ipath/ipath_debug.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_DEBUG_H
-#define _IPATH_DEBUG_H
-
-#ifndef _IPATH_DEBUGGING	/* debugging enabled or not */
-#define _IPATH_DEBUGGING 1
-#endif
-
-#if _IPATH_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or disable
- * dynamically.  This can be set at modprobe time also:
- *      modprobe infinipath.ko infinipath_debug=7
- */
-
-#define __IPATH_INFO        0x1	/* generic low verbosity stuff */
-#define __IPATH_DBG         0x2	/* generic debug */
-#define __IPATH_TRSAMPLE    0x8	/* generate trace buffer sample entries */
-/* leave some low verbosity spots open */
-#define __IPATH_VERBDBG     0x40	/* very verbose debug */
-#define __IPATH_PKTDBG      0x80	/* print packet data */
-/* print process startup (init)/exit messages */
-#define __IPATH_PROCDBG     0x100
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG       0x200
-#define __IPATH_ERRPKTDBG   0x400
-#define __IPATH_USER_SEND   0x1000	/* use user mode send */
-#define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
-#define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
-#define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
-#define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
-#define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump */
-#define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump */
-#define __IPATH_LINKVERBDBG 0x200000	/* very verbose linkchange debug */
-
-#else				/* _IPATH_DEBUGGING */
-
-/*
- * define all of these even with debugging off, for the few places that do
- * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
- * compiler eliminate the code
- */
-
-#define __IPATH_INFO      0x0	/* generic low verbosity stuff */
-#define __IPATH_DBG       0x0	/* generic debug */
-#define __IPATH_TRSAMPLE  0x0	/* generate trace buffer sample entries */
-#define __IPATH_VERBDBG   0x0	/* very verbose debug */
-#define __IPATH_PKTDBG    0x0	/* print packet data */
-#define __IPATH_PROCDBG   0x0	/* process startup (init)/exit messages */
-/* print mmap/fault stuff, not using VDBG any more */
-#define __IPATH_MMDBG     0x0
-#define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
-#define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
-#define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
-#define __IPATH_IPATHPD   0x0	/* Ethernet (IPATH) packet dump on   */
-#define __IPATH_IPATHTABLE 0x0	/* Ethernet (IPATH) packet dump on   */
-#define __IPATH_LINKVERBDBG 0x0	/* very verbose linkchange debug */
-
-#endif				/* _IPATH_DEBUGGING */
-
-#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
-
-#endif				/* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644
index 45802e97332e..000000000000
--- a/drivers/staging/rdma/ipath/ipath_diag.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains support for diagnostic functions.  It is accessed by
- * opening the ipath_diag device, normally minor number 129.  Diagnostic use
- * of the InfiniPath chip may render the chip or board unusable until the
- * driver is unloaded, or in some cases, until the system is rebooted.
- *
- * Accesses to the chip through this interface are not similar to going
- * through the /sys/bus/pci resource mmap interface.
- */
-
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/export.h>
-#include <asm/uaccess.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-int ipath_diag_inuse;
-static int diag_set_link;
-
-static int ipath_diag_open(struct inode *in, struct file *fp);
-static int ipath_diag_release(struct inode *in, struct file *fp);
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-			       size_t count, loff_t *off);
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off);
-
-static const struct file_operations diag_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_diag_write,
-	.read = ipath_diag_read,
-	.open = ipath_diag_open,
-	.release = ipath_diag_release,
-	.llseek = default_llseek,
-};
-
-static ssize_t ipath_diagpkt_write(struct file *fp,
-				   const char __user *data,
-				   size_t count, loff_t *off);
-
-static const struct file_operations diagpkt_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_diagpkt_write,
-	.llseek = noop_llseek,
-};
-
-static atomic_t diagpkt_count = ATOMIC_INIT(0);
-static struct cdev *diagpkt_cdev;
-static struct device *diagpkt_dev;
-
-int ipath_diag_add(struct ipath_devdata *dd)
-{
-	char name[16];
-	int ret = 0;
-
-	if (atomic_inc_return(&diagpkt_count) == 1) {
-		ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
-				      "ipath_diagpkt", &diagpkt_file_ops,
-				      &diagpkt_cdev, &diagpkt_dev);
-
-		if (ret) {
-			ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
-				      "device: %d", ret);
-			goto done;
-		}
-	}
-
-	snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
-
-	ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
-			      &diag_file_ops, &dd->diag_cdev,
-			      &dd->diag_dev);
-	if (ret)
-		ipath_dev_err(dd, "Couldn't create %s device: %d",
-			      name, ret);
-
-done:
-	return ret;
-}
-
-void ipath_diag_remove(struct ipath_devdata *dd)
-{
-	if (atomic_dec_and_test(&diagpkt_count))
-		ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
-
-	ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
-}
-
-/**
- * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy (multiple of 32 bits)
- *
- * This function also localizes all chip memory accesses.
- * The copy should be written such that we read full cacheline packets
- * from the chip.  This is usually used for a single qword
- *
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
-			     const void __iomem *caddr, size_t count)
-{
-	const u64 __iomem *reg_addr = caddr;
-	const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-	int ret;
-
-	/* not very efficient, but it works for now */
-	if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u64 data = readq(reg_addr);
-		if (copy_to_user(uaddr, &data, sizeof(u64))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		reg_addr++;
-		uaddr += sizeof(u64);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: the number of bytes to copy (multiple of 32 bits)
- *
- * This is usually used for a single qword
- * NOTE:  This assumes the chip address is 64-bit aligned.
- */
-
-static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
-			      const void __user *uaddr, size_t count)
-{
-	u64 __iomem *reg_addr = caddr;
-	const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
-	int ret;
-
-	/* not very efficient, but it works for now */
-	if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u64 data;
-		if (copy_from_user(&data, uaddr, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		writeq(data, reg_addr);
-
-		reg_addr++;
-		uaddr += sizeof(u64);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
- * @dd: the infinipath device
- * @uaddr: the location to store the data in user memory
- * @caddr: the source chip address (full pointer, not offset)
- * @count: number of bytes to copy
- *
- * read 32 bit values, not 64 bit; for memories that only
- * support 32 bit reads; usually a single dword.
- */
-static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
-			     const void __iomem *caddr, size_t count)
-{
-	const u32 __iomem *reg_addr = caddr;
-	const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-	int ret;
-
-	if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-	    reg_end > (u32 __iomem *) dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* not very efficient, but it works for now */
-	while (reg_addr < reg_end) {
-		u32 data = readl(reg_addr);
-		if (copy_to_user(uaddr, &data, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-
-		reg_addr++;
-		uaddr += sizeof(u32);
-
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
- * @dd: the infinipath device
- * @caddr: the destination chip address (full pointer, not offset)
- * @uaddr: the source of the data in user memory
- * @count: number of bytes to copy
- *
- * write 32 bit values, not 64 bit; for memories that only
- * support 32 bit write; usually a single dword.
- */
-
-static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
-			      const void __user *uaddr, size_t count)
-{
-	u32 __iomem *reg_addr = caddr;
-	const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
-	int ret;
-
-	if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
-	    reg_end > (u32 __iomem *) dd->ipath_kregend) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	while (reg_addr < reg_end) {
-		u32 data;
-		if (copy_from_user(&data, uaddr, sizeof(data))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		writel(data, reg_addr);
-
-		reg_addr++;
-		uaddr += sizeof(u32);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-static int ipath_diag_open(struct inode *in, struct file *fp)
-{
-	int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
-	struct ipath_devdata *dd;
-	int ret;
-
-	mutex_lock(&ipath_mutex);
-
-	if (ipath_diag_inuse) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	dd = ipath_lookup(unit);
-
-	if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
-	    !dd->ipath_kregbase) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	fp->private_data = dd;
-	ipath_diag_inuse = -2;
-	diag_set_link = 0;
-	ret = 0;
-
-	/* Only expose a way to reset the device if we
-	   make it into diag mode. */
-	ipath_expose_reset(&dd->pcidev->dev);
-
-bail:
-	mutex_unlock(&ipath_mutex);
-
-	return ret;
-}
-
-/**
- * ipath_diagpkt_write - write an IB packet
- * @fp: the diag data device file pointer
- * @data: ipath_diag_pkt structure saying where to get the packet
- * @count: size of data to write
- * @off: unused by this code
- */
-static ssize_t ipath_diagpkt_write(struct file *fp,
-				   const char __user *data,
-				   size_t count, loff_t *off)
-{
-	u32 __iomem *piobuf;
-	u32 plen, pbufn, maxlen_reserve;
-	struct ipath_diag_pkt odp;
-	struct ipath_diag_xpkt dp;
-	u32 *tmpbuf = NULL;
-	struct ipath_devdata *dd;
-	ssize_t ret = 0;
-	u64 val;
-	u32 l_state, lt_state; /* LinkState, LinkTrainingState */
-
-
-	if (count == sizeof(dp)) {
-		if (copy_from_user(&dp, data, sizeof(dp))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-	} else if (count == sizeof(odp)) {
-		if (copy_from_user(&odp, data, sizeof(odp))) {
-			ret = -EFAULT;
-			goto bail;
-		}
-		dp.len = odp.len;
-		dp.unit = odp.unit;
-		dp.data = odp.data;
-		dp.pbc_wd = 0;
-	} else {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* send count must be an exact number of dwords */
-	if (dp.len & 3) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	plen = dp.len >> 2;
-
-	dd = ipath_lookup(dp.unit);
-	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
-	    !dd->ipath_kregbase) {
-		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
-			   dp.unit);
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (ipath_diag_inuse && !diag_set_link &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		diag_set_link = 1;
-		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
-			   "diag pkt\n");
-		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-	}
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
-		ret = -ENODEV;
-		goto bail;
-	}
-	/*
-	 * Want to skip check for l_state if using custom PBC,
-	 * because we might be trying to force an SM packet out.
-	 * first-cut, skip _all_ state checking in that case.
-	 */
-	val = ipath_ib_state(dd, dd->ipath_lastibcstat);
-	lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-	l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-	if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
-	    (val != dd->ib_init && val != dd->ib_arm &&
-	    val != dd->ib_active))) {
-		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
-			   dd->ipath_unit, (unsigned long long) val);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * need total length before first word written, plus 2 Dwords. One Dword
-	 * is for padding so we get the full user data when not aligned on
-	 * a word boundary. The other Dword is to make sure we have room for the
-	 * ICRC which gets tacked on later.
-	 */
-	maxlen_reserve = 2 * sizeof(u32);
-	if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
-		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
-			  dp.len, dd->ipath_ibmaxlen);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	plen = sizeof(u32) + dp.len;
-
-	tmpbuf = vmalloc(plen);
-	if (!tmpbuf) {
-		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
-			 "failing\n");
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	if (copy_from_user(tmpbuf,
-			   (const void __user *) (unsigned long) dp.data,
-			   dp.len)) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	plen >>= 2;		/* in dwords */
-
-	piobuf = ipath_getpiobuf(dd, plen, &pbufn);
-	if (!piobuf) {
-		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
-			   dd->ipath_unit);
-		ret = -EBUSY;
-		goto bail;
-	}
-	/* disarm it just to be extra sure */
-	ipath_disarm_piobufs(dd, pbufn, 1);
-
-	if (ipath_debug & __IPATH_PKTDBG)
-		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
-			   dd->ipath_unit, plen - 1, pbufn);
-
-	if (dp.pbc_wd == 0)
-		dp.pbc_wd = plen;
-	writeq(dp.pbc_wd, piobuf);
-	/*
-	 * Copy all by the trigger word, then flush, so it's written
-	 * to chip before trigger word, then write trigger word, then
-	 * flush again, so packet is sent.
-	 */
-	if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-		ipath_flush_wc();
-		__iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
-		ipath_flush_wc();
-		__raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
-	} else
-		__iowrite32_copy(piobuf + 2, tmpbuf, plen);
-
-	ipath_flush_wc();
-
-	ret = sizeof(dp);
-
-bail:
-	vfree(tmpbuf);
-	return ret;
-}
-
-static int ipath_diag_release(struct inode *in, struct file *fp)
-{
-	mutex_lock(&ipath_mutex);
-	ipath_diag_inuse = 0;
-	fp->private_data = NULL;
-	mutex_unlock(&ipath_mutex);
-	return 0;
-}
-
-static ssize_t ipath_diag_read(struct file *fp, char __user *data,
-			       size_t count, loff_t *off)
-{
-	struct ipath_devdata *dd = fp->private_data;
-	void __iomem *kreg_base;
-	ssize_t ret;
-
-	kreg_base = dd->ipath_kregbase;
-
-	if (count == 0)
-		ret = 0;
-	else if ((count % 4) || (*off % 4))
-		/* address or length is not 32-bit aligned, hence invalid */
-		ret = -EINVAL;
-	else if (ipath_diag_inuse < 1 && (*off || count != 8))
-		ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
-	else if ((count % 8) || (*off % 8))
-		/* address or length not 64-bit aligned; do 32-bit reads */
-		ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
-	else
-		ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
-
-	if (ret >= 0) {
-		*off += count;
-		ret = count;
-		if (ipath_diag_inuse == -2)
-			ipath_diag_inuse++;
-	}
-
-	return ret;
-}
-
-static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
-				size_t count, loff_t *off)
-{
-	struct ipath_devdata *dd = fp->private_data;
-	void __iomem *kreg_base;
-	ssize_t ret;
-
-	kreg_base = dd->ipath_kregbase;
-
-	if (count == 0)
-		ret = 0;
-	else if ((count % 4) || (*off % 4))
-		/* address or length is not 32-bit aligned, hence invalid */
-		ret = -EINVAL;
-	else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
-		 ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
-		ret = -EINVAL;  /* before any other write allowed */
-	else if ((count % 8) || (*off % 8))
-		/* address or length not 64-bit aligned; do 32-bit writes */
-		ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
-	else
-		ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
-
-	if (ret >= 0) {
-		*off += count;
-		ret = count;
-		if (ipath_diag_inuse == -1)
-			ipath_diag_inuse = 1; /* all read/write OK now */
-	}
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644
index 123a8c053539..000000000000
--- a/drivers/staging/rdma/ipath/ipath_dma.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
-	return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 ipath_dma_map_single(struct ib_device *dev,
-			        void *cpu_addr, size_t size,
-			        enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-	return (u64) cpu_addr;
-}
-
-static void ipath_dma_unmap_single(struct ib_device *dev,
-				   u64 addr, size_t size,
-				   enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 ipath_dma_map_page(struct ib_device *dev,
-			      struct page *page,
-			      unsigned long offset,
-			      size_t size,
-			      enum dma_data_direction direction)
-{
-	u64 addr;
-
-	BUG_ON(!valid_dma_direction(direction));
-
-	if (offset + size > PAGE_SIZE) {
-		addr = BAD_DMA_ADDRESS;
-		goto done;
-	}
-
-	addr = (u64) page_address(page);
-	if (addr)
-		addr += offset;
-	/* TODO: handle highmem pages */
-
-done:
-	return addr;
-}
-
-static void ipath_dma_unmap_page(struct ib_device *dev,
-				 u64 addr, size_t size,
-				 enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
-			int nents, enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	u64 addr;
-	int i;
-	int ret = nents;
-
-	BUG_ON(!valid_dma_direction(direction));
-
-	for_each_sg(sgl, sg, nents, i) {
-		addr = (u64) page_address(sg_page(sg));
-		/* TODO: handle highmem pages */
-		if (!addr) {
-			ret = 0;
-			break;
-		}
-		sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-		sg->dma_length = sg->length;
-#endif
-	}
-	return ret;
-}
-
-static void ipath_unmap_sg(struct ib_device *dev,
-			   struct scatterlist *sg, int nents,
-			   enum dma_data_direction direction)
-{
-	BUG_ON(!valid_dma_direction(direction));
-}
-
-static void ipath_sync_single_for_cpu(struct ib_device *dev,
-				      u64 addr,
-				      size_t size,
-				      enum dma_data_direction dir)
-{
-}
-
-static void ipath_sync_single_for_device(struct ib_device *dev,
-					 u64 addr,
-					 size_t size,
-					 enum dma_data_direction dir)
-{
-}
-
-static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
-				      u64 *dma_handle, gfp_t flag)
-{
-	struct page *p;
-	void *addr = NULL;
-
-	p = alloc_pages(flag, get_order(size));
-	if (p)
-		addr = page_address(p);
-	if (dma_handle)
-		*dma_handle = (u64) addr;
-	return addr;
-}
-
-static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
-				    void *cpu_addr, u64 dma_handle)
-{
-	free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
-	.mapping_error = ipath_mapping_error,
-	.map_single = ipath_dma_map_single,
-	.unmap_single = ipath_dma_unmap_single,
-	.map_page = ipath_dma_map_page,
-	.unmap_page = ipath_dma_unmap_page,
-	.map_sg = ipath_map_sg,
-	.unmap_sg = ipath_unmap_sg,
-	.sync_single_for_cpu = ipath_sync_single_for_cpu,
-	.sync_single_for_device = ipath_sync_single_for_device,
-	.alloc_coherent = ipath_dma_alloc_coherent,
-	.free_coherent = ipath_dma_free_coherent
-};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644
index 2ab22f98e3ba..000000000000
--- a/drivers/staging/rdma/ipath/ipath_driver.c
+++ /dev/null
@@ -1,2784 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#ifdef CONFIG_X86_64
-#include <asm/pat.h>
-#endif
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-
-static void ipath_update_pio_bufs(struct ipath_devdata *);
-
-const char *ipath_get_unit_name(int unit)
-{
-	static char iname[16];
-	snprintf(iname, sizeof iname, "infinipath%u", unit);
-	return iname;
-}
-
-#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
-#define PFX IPATH_DRV_NAME ": "
-
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_ipath_version[] = IPATH_IDSTR "\n";
-
-static struct idr unit_table;
-DEFINE_SPINLOCK(ipath_devs_lock);
-LIST_HEAD(ipath_dev_list);
-
-wait_queue_head_t ipath_state_wait;
-
-unsigned ipath_debug = __IPATH_INFO;
-
-module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "mask for debug prints");
-EXPORT_SYMBOL_GPL(ipath_debug);
-
-unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
-module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
-MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
-
-static unsigned ipath_hol_timeout_ms = 13000;
-module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
-MODULE_PARM_DESC(hol_timeout_ms,
-	"duration of user app suspension after link failure");
-
-unsigned ipath_linkrecovery = 1;
-module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
-
-/*
- * Table to translate the LINKTRAININGSTATE portion of
- * IBCStatus to a human-readable form.
- */
-const char *ipath_ibcstatus_str[] = {
-	"Disabled",
-	"LinkUp",
-	"PollActive",
-	"PollQuiet",
-	"SleepDelay",
-	"SleepQuiet",
-	"LState6",		/* unused */
-	"LState7",		/* unused */
-	"CfgDebounce",
-	"CfgRcvfCfg",
-	"CfgWaitRmt",
-	"CfgIdle",
-	"RecovRetrain",
-	"CfgTxRevLane",		/* unused before IBA7220 */
-	"RecovWaitRmt",
-	"RecovIdle",
-	/* below were added for IBA7220 */
-	"CfgEnhanced",
-	"CfgTest",
-	"CfgWaitRmtTest",
-	"CfgWaitCfgEnhanced",
-	"SendTS_T",
-	"SendTstIdles",
-	"RcvTS_T",
-	"SendTst_TS1s",
-	"LTState18", "LTState19", "LTState1A", "LTState1B",
-	"LTState1C", "LTState1D", "LTState1E", "LTState1F"
-};
-
-static void ipath_remove_one(struct pci_dev *);
-static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
-
-/* Only needed for registration, nothing else needs this info */
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-
-/* Number of seconds before our card status check...  */
-#define STATUS_TIMEOUT 60
-
-static const struct pci_device_id ipath_pci_tbl[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
-
-static struct pci_driver ipath_driver = {
-	.name = IPATH_DRV_NAME,
-	.probe = ipath_init_one,
-	.remove = ipath_remove_one,
-	.id_table = ipath_pci_tbl,
-	.driver = {
-		.groups = ipath_driver_attr_groups,
-	},
-};
-
-static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
-			     u32 *bar0, u32 *bar1)
-{
-	int ret;
-
-	ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
-	if (ret)
-		ipath_dev_err(dd, "failed to read bar0 before enable: "
-			      "error %d\n", -ret);
-
-	ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
-	if (ret)
-		ipath_dev_err(dd, "failed to read bar1 before enable: "
-			      "error %d\n", -ret);
-
-	ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
-}
-
-static void ipath_free_devdata(struct pci_dev *pdev,
-			       struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	pci_set_drvdata(pdev, NULL);
-
-	if (dd->ipath_unit != -1) {
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-		idr_remove(&unit_table, dd->ipath_unit);
-		list_del(&dd->ipath_list);
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	}
-	vfree(dd);
-}
-
-static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
-{
-	unsigned long flags;
-	struct ipath_devdata *dd;
-	int ret;
-
-	dd = vzalloc(sizeof(*dd));
-	if (!dd) {
-		dd = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-	dd->ipath_unit = -1;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate unit ID: error %d\n", -ret);
-		ipath_free_devdata(pdev, dd);
-		dd = ERR_PTR(ret);
-		goto bail_unlock;
-	}
-	dd->ipath_unit = ret;
-
-	dd->pcidev = pdev;
-	pci_set_drvdata(pdev, dd);
-
-	list_add(&dd->ipath_list, &ipath_dev_list);
-
-bail_unlock:
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	idr_preload_end();
-bail:
-	return dd;
-}
-
-static inline struct ipath_devdata *__ipath_lookup(int unit)
-{
-	return idr_find(&unit_table, unit);
-}
-
-struct ipath_devdata *ipath_lookup(int unit)
-{
-	struct ipath_devdata *dd;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-	dd = __ipath_lookup(unit);
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	return dd;
-}
-
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
-{
-	int nunits, npresent, nup;
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	int maxports;
-
-	nunits = npresent = nup = maxports = 0;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-		nunits++;
-		if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
-			npresent++;
-		if (dd->ipath_lid &&
-		    !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
-					 | IPATH_LINKUNK)))
-			nup++;
-		if (dd->ipath_cfgports > maxports)
-			maxports = dd->ipath_cfgports;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	if (npresentp)
-		*npresentp = npresent;
-	if (nupp)
-		*nupp = nup;
-	if (maxportsp)
-		*maxportsp = maxports;
-
-	return nunits;
-}
-
-/*
- * These next two routines are placeholders in case we don't have per-arch
- * code for controlling write combining.  If explicit control of write
- * combining is not available, performance will probably be awful.
- */
-
-int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
-{
-	return -EOPNOTSUPP;
-}
-
-void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
-{
-}
-
-/*
- * Perform a PIO buffer bandwidth write test, to verify proper system
- * configuration.  Even when all the setup calls work, occasionally
- * BIOS or other issues can prevent write combining from working, or
- * can cause other bandwidth problems to the chip.
- *
- * This test simply writes the same buffer over and over again, and
- * measures close to the peak bandwidth to the chip (not testing
- * data bandwidth to the wire).   On chips that use an address-based
- * trigger to send packets to the wire, this is easy.  On chips that
- * use a count to trigger, we want to make sure that the packet doesn't
- * go out on the wire, or trigger flow control checks.
- */
-static void ipath_verify_pioperf(struct ipath_devdata *dd)
-{
-	u32 pbnum, cnt, lcnt;
-	u32 __iomem *piobuf;
-	u32 *addr;
-	u64 msecs, emsecs;
-
-	piobuf = ipath_getpiobuf(dd, 0, &pbnum);
-	if (!piobuf) {
-		dev_info(&dd->pcidev->dev,
-			"No PIObufs for checking perf, skipping\n");
-		return;
-	}
-
-	/*
-	 * Enough to give us a reasonable test, less than piobuf size, and
-	 * likely multiple of store buffer length.
-	 */
-	cnt = 1024;
-
-	addr = vmalloc(cnt);
-	if (!addr) {
-		dev_info(&dd->pcidev->dev,
-			"Couldn't get memory for checking PIO perf,"
-			" skipping\n");
-		goto done;
-	}
-
-	preempt_disable();  /* we want reasonably accurate elapsed time */
-	msecs = 1 + jiffies_to_msecs(jiffies);
-	for (lcnt = 0; lcnt < 10000U; lcnt++) {
-		/* wait until we cross msec boundary */
-		if (jiffies_to_msecs(jiffies) >= msecs)
-			break;
-		udelay(1);
-	}
-
-	ipath_disable_armlaunch(dd);
-
-	/*
-	 * length 0, no dwords actually sent, and mark as VL15
-	 * on chips where that may matter (due to IB flowcontrol)
-	 */
-	if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
-		writeq(1UL << 63, piobuf);
-	else
-		writeq(0, piobuf);
-	ipath_flush_wc();
-
-	/*
-	 * this is only roughly accurate, since even with preempt we
-	 * still take interrupts that could take a while.   Running for
-	 * >= 5 msec seems to get us "close enough" to accurate values
-	 */
-	msecs = jiffies_to_msecs(jiffies);
-	for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
-		__iowrite32_copy(piobuf + 64, addr, cnt >> 2);
-		emsecs = jiffies_to_msecs(jiffies) - msecs;
-	}
-
-	/* 1 GiB/sec, slightly over IB SDR line rate */
-	if (lcnt < (emsecs * 1024U))
-		ipath_dev_err(dd,
-			"Performance problem: bandwidth to PIO buffers is "
-			"only %u MiB/sec\n",
-			lcnt / (u32) emsecs);
-	else
-		ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
-			lcnt / (u32) emsecs);
-
-	preempt_enable();
-
-	vfree(addr);
-
-done:
-	/* disarm piobuf, so it's available again */
-	ipath_disarm_piobufs(dd, pbnum, 1);
-	ipath_enable_armlaunch(dd);
-}
-
-static void cleanup_device(struct ipath_devdata *dd);
-
-static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int ret, len, j;
-	struct ipath_devdata *dd;
-	unsigned long long addr;
-	u32 bar0 = 0, bar1 = 0;
-
-#ifdef CONFIG_X86_64
-	if (pat_enabled()) {
-		pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
-		ret = -ENODEV;
-		goto bail;
-	}
-#endif
-
-	dd = ipath_alloc_devdata(pdev);
-	if (IS_ERR(dd)) {
-		ret = PTR_ERR(dd);
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate devdata: error %d\n", -ret);
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
-
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		/* This can happen iff:
-		 *
-		 * We did a chip reset, and then failed to reprogram the
-		 * BAR, or the chip reset due to an internal error.  We then
-		 * unloaded the driver and reloaded it.
-		 *
-		 * Both reset cases set the BAR back to initial state.  For
-		 * the latter case, the AER sticky error bit at offset 0x718
-		 * should be set, but the Linux kernel doesn't yet know
-		 * about that, it appears.  If the original BAR was retained
-		 * in the kernel data structures, this may be OK.
-		 */
-		ipath_dev_err(dd, "enable unit %d failed: error %d\n",
-			      dd->ipath_unit, -ret);
-		goto bail_devdata;
-	}
-	addr = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
-		   "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
-		   ent->device, ent->driver_data);
-
-	read_bars(dd, pdev, &bar0, &bar1);
-
-	if (!bar1 && !(bar0 & ~0xf)) {
-		if (addr) {
-			dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
-				 "rewriting as %llx\n", addr);
-			ret = pci_write_config_dword(
-				pdev, PCI_BASE_ADDRESS_0, addr);
-			if (ret) {
-				ipath_dev_err(dd, "rewrite of BAR0 "
-					      "failed: err %d\n", -ret);
-				goto bail_disable;
-			}
-			ret = pci_write_config_dword(
-				pdev, PCI_BASE_ADDRESS_1, addr >> 32);
-			if (ret) {
-				ipath_dev_err(dd, "rewrite of BAR1 "
-					      "failed: err %d\n", -ret);
-				goto bail_disable;
-			}
-		} else {
-			ipath_dev_err(dd, "BAR is 0 (probable RESET), "
-				      "not usable until reboot\n");
-			ret = -ENODEV;
-			goto bail_disable;
-		}
-	}
-
-	ret = pci_request_regions(pdev, IPATH_DRV_NAME);
-	if (ret) {
-		dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
-			 "err %d\n", dd->ipath_unit, -ret);
-		goto bail_disable;
-	}
-
-	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (ret) {
-		/*
-		 * if the 64 bit setup fails, try 32 bit.  Some systems
-		 * do not setup 64 bit maps on systems with 2GB or less
-		 * memory installed.
-		 */
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (ret) {
-			dev_info(&pdev->dev,
-				"Unable to set DMA mask for unit %u: %d\n",
-				dd->ipath_unit, ret);
-			goto bail_regions;
-		} else {
-			ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
-			ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-			if (ret)
-				dev_info(&pdev->dev,
-					"Unable to set DMA consistent mask "
-					"for unit %u: %d\n",
-					dd->ipath_unit, ret);
-
-		}
-	} else {
-		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (ret)
-			dev_info(&pdev->dev,
-				"Unable to set DMA consistent mask "
-				"for unit %u: %d\n",
-				dd->ipath_unit, ret);
-	}
-
-	pci_set_master(pdev);
-
-	/*
-	 * Save BARs to rewrite after device reset.  Save all 64 bits of
-	 * BAR, just in case.
-	 */
-	dd->ipath_pcibar0 = addr;
-	dd->ipath_pcibar1 = addr >> 32;
-	dd->ipath_deviceid = ent->device;	/* save for later use */
-	dd->ipath_vendorid = ent->vendor;
-
-	/* setup the chip-specific functions, as early as possible. */
-	switch (ent->device) {
-	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_iba6110_funcs(dd);
-		break;
-
-	default:
-		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
-			      "failing\n", ent->device);
-		return -ENODEV;
-	}
-
-	for (j = 0; j < 6; j++) {
-		if (!pdev->resource[j].start)
-			continue;
-		ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
-			   j, &pdev->resource[j],
-			   (unsigned long long)pci_resource_len(pdev, j));
-	}
-
-	if (!addr) {
-		ipath_dev_err(dd, "No valid address in BAR 0!\n");
-		ret = -ENODEV;
-		goto bail_regions;
-	}
-
-	dd->ipath_pcirev = pdev->revision;
-
-#if defined(__powerpc__)
-	/* There isn't a generic way to specify writethrough mappings */
-	dd->ipath_kregbase = __ioremap(addr, len,
-		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
-#else
-	/* XXX: split this properly to enable on PAT */
-	dd->ipath_kregbase = ioremap_nocache(addr, len);
-#endif
-
-	if (!dd->ipath_kregbase) {
-		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
-			  addr);
-		ret = -ENOMEM;
-		goto bail_iounmap;
-	}
-	dd->ipath_kregend = (u64 __iomem *)
-		((void __iomem *)dd->ipath_kregbase + len);
-	dd->ipath_physaddr = addr;	/* used for io_remap, etc. */
-	/* for user mmap */
-	ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
-		   addr, dd->ipath_kregbase);
-
-	if (dd->ipath_f_bus(dd, pdev))
-		ipath_dev_err(dd, "Failed to setup config space; "
-			      "continuing anyway\n");
-
-	/*
-	 * set up our interrupt handler; IRQF_SHARED probably not needed,
-	 * since MSI interrupts shouldn't be shared but won't  hurt for now.
-	 * check 0 irq after we return from chip-specific bus setup, since
-	 * that can affect this due to setup
-	 */
-	if (!dd->ipath_irq)
-		ipath_dev_err(dd, "irq is 0, BIOS error?  Interrupts won't "
-			      "work\n");
-	else {
-		ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
-				  IPATH_DRV_NAME, dd);
-		if (ret) {
-			ipath_dev_err(dd, "Couldn't setup irq handler, "
-				      "irq=%d: %d\n", dd->ipath_irq, ret);
-			goto bail_iounmap;
-		}
-	}
-
-	ret = ipath_init_chip(dd, 0);	/* do the chip-specific init */
-	if (ret)
-		goto bail_irqsetup;
-
-	ret = ipath_enable_wc(dd);
-
-	if (ret)
-		ret = 0;
-
-	ipath_verify_pioperf(dd);
-
-	ipath_device_create_group(&pdev->dev, dd);
-	ipathfs_add_device(dd);
-	ipath_user_add(dd);
-	ipath_diag_add(dd);
-	ipath_register_ib_device(dd);
-
-	goto bail;
-
-bail_irqsetup:
-	cleanup_device(dd);
-
-	if (dd->ipath_irq)
-		dd->ipath_f_free_irq(dd);
-
-	if (dd->ipath_f_cleanup)
-		dd->ipath_f_cleanup(dd);
-
-bail_iounmap:
-	iounmap((volatile void __iomem *) dd->ipath_kregbase);
-
-bail_regions:
-	pci_release_regions(pdev);
-
-bail_disable:
-	pci_disable_device(pdev);
-
-bail_devdata:
-	ipath_free_devdata(pdev, dd);
-
-bail:
-	return ret;
-}
-
-static void cleanup_device(struct ipath_devdata *dd)
-{
-	int port;
-	struct ipath_portdata **tmp;
-	unsigned long flags;
-
-	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-		/* can't do anything more with chip; needs re-init */
-		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-		if (dd->ipath_kregbase) {
-			/*
-			 * if we haven't already cleaned up before these are
-			 * to ensure any register reads/writes "fail" until
-			 * re-init
-			 */
-			dd->ipath_kregbase = NULL;
-			dd->ipath_uregbase = 0;
-			dd->ipath_sregbase = 0;
-			dd->ipath_cregbase = 0;
-			dd->ipath_kregsize = 0;
-		}
-		ipath_disable_wc(dd);
-	}
-
-	if (dd->ipath_spectriggerhit)
-		dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
-			 dd->ipath_spectriggerhit);
-
-	if (dd->ipath_pioavailregs_dma) {
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  (void *) dd->ipath_pioavailregs_dma,
-				  dd->ipath_pioavailregs_phys);
-		dd->ipath_pioavailregs_dma = NULL;
-	}
-	if (dd->ipath_dummy_hdrq) {
-		dma_free_coherent(&dd->pcidev->dev,
-			dd->ipath_pd[0]->port_rcvhdrq_size,
-			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-		dd->ipath_dummy_hdrq = NULL;
-	}
-
-	if (dd->ipath_pageshadow) {
-		struct page **tmpp = dd->ipath_pageshadow;
-		dma_addr_t *tmpd = dd->ipath_physshadow;
-		int i, cnt = 0;
-
-		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-			   "locked\n");
-		for (port = 0; port < dd->ipath_cfgports; port++) {
-			int port_tidbase = port * dd->ipath_rcvtidcnt;
-			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-			for (i = port_tidbase; i < maxtid; i++) {
-				if (!tmpp[i])
-					continue;
-				pci_unmap_page(dd->pcidev, tmpd[i],
-					PAGE_SIZE, PCI_DMA_FROMDEVICE);
-				ipath_release_user_pages(&tmpp[i], 1);
-				tmpp[i] = NULL;
-				cnt++;
-			}
-		}
-		if (cnt) {
-			ipath_stats.sps_pageunlocks += cnt;
-			ipath_cdbg(VERBOSE, "There were still %u expTID "
-				   "entries locked\n", cnt);
-		}
-		if (ipath_stats.sps_pagelocks ||
-		    ipath_stats.sps_pageunlocks)
-			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-				   "unlocked via ipath_m{un}lock\n",
-				   (unsigned long long)
-				   ipath_stats.sps_pagelocks,
-				   (unsigned long long)
-				   ipath_stats.sps_pageunlocks);
-
-		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-			   dd->ipath_pageshadow);
-		tmpp = dd->ipath_pageshadow;
-		dd->ipath_pageshadow = NULL;
-		vfree(tmpp);
-
-		dd->ipath_egrtidbase = NULL;
-	}
-
-	/*
-	 * free any resources still in use (usually just kernel ports)
-	 * at unload; we do for portcnt, because that's what we allocate.
-	 * We acquire lock to be really paranoid that ipath_pd isn't being
-	 * accessed from some interrupt-related code (that should not happen,
-	 * but best to be sure).
-	 */
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	tmp = dd->ipath_pd;
-	dd->ipath_pd = NULL;
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-	for (port = 0; port < dd->ipath_portcnt; port++) {
-		struct ipath_portdata *pd = tmp[port];
-		tmp[port] = NULL; /* debugging paranoia */
-		ipath_free_pddata(dd, pd);
-	}
-	kfree(tmp);
-}
-
-static void ipath_remove_one(struct pci_dev *pdev)
-{
-	struct ipath_devdata *dd = pci_get_drvdata(pdev);
-
-	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
-
-	/*
-	 * disable the IB link early, to be sure no new packets arrive, which
-	 * complicates the shutdown process
-	 */
-	ipath_shutdown_device(dd);
-
-	flush_workqueue(ib_wq);
-
-	if (dd->verbs_dev)
-		ipath_unregister_ib_device(dd->verbs_dev);
-
-	ipath_diag_remove(dd);
-	ipath_user_remove(dd);
-	ipathfs_remove_device(dd);
-	ipath_device_remove_group(&pdev->dev, dd);
-
-	ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
-		   "unit %u\n", dd, (u32) dd->ipath_unit);
-
-	cleanup_device(dd);
-
-	/*
-	 * turn off rcv, send, and interrupts for all ports, all drivers
-	 * should also hard reset the chip here?
-	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-	 * for all versions of the driver, if they were allocated
-	 */
-	if (dd->ipath_irq) {
-		ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
-			   dd->ipath_unit, dd->ipath_irq);
-		dd->ipath_f_free_irq(dd);
-	} else
-		ipath_dbg("irq is 0, not doing free_irq "
-			  "for unit %u\n", dd->ipath_unit);
-	/*
-	 * we check for NULL here, because it's outside
-	 * the kregbase check, and we need to call it
-	 * after the free_irq.	Thus it's possible that
-	 * the function pointers were never initialized.
-	 */
-	if (dd->ipath_f_cleanup)
-		/* clean up chip-specific stuff */
-		dd->ipath_f_cleanup(dd);
-
-	ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
-	iounmap((volatile void __iomem *) dd->ipath_kregbase);
-	pci_release_regions(pdev);
-	ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
-	pci_disable_device(pdev);
-
-	ipath_free_devdata(pdev, dd);
-}
-
-/* general driver use */
-DEFINE_MUTEX(ipath_mutex);
-
-static DEFINE_SPINLOCK(ipath_pioavail_lock);
-
-/**
- * ipath_disarm_piobufs - cancel a range of PIO buffers
- * @dd: the infinipath device
- * @first: the first PIO buffer to cancel
- * @cnt: the number of PIO buffers to cancel
- *
- * cancel a range of PIO buffers, used when they might be armed, but
- * not triggered.  Used at init to ensure buffer state, and also user
- * process close, in case it died while writing to a PIO buffer
- * Also after errors.
- */
-void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
-			  unsigned cnt)
-{
-	unsigned i, last = first + cnt;
-	unsigned long flags;
-
-	ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-	for (i = first; i < last; i++) {
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		/*
-		 * The disarm-related bits are write-only, so it
-		 * is ok to OR them in with our copy of sendctrl
-		 * while we hold the lock.
-		 */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl | INFINIPATH_S_DISARM |
-			(i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
-		/* can't disarm bufs back-to-back per iba7220 spec */
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-	/* on some older chips, update may not happen after cancel */
-	ipath_force_pio_avail_update(dd);
-}
-
-/**
- * ipath_wait_linkstate - wait for an IB link state change to occur
- * @dd: the infinipath device
- * @state: the state to wait for
- * @msecs: the number of milliseconds to wait
- *
- * wait up to msecs milliseconds for IB link state change to occur for
- * now, take the easy polling route.  Currently used only by
- * ipath_set_linkstate.  Returns 0 if state reached, otherwise
- * -ETIMEDOUT state can have multiple states set, for any of several
- * transitions.
- */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
-{
-	dd->ipath_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_state_wait,
-					 (dd->ipath_flags & state),
-					 msecs_to_jiffies(msecs));
-	dd->ipath_state_wanted = 0;
-
-	if (!(dd->ipath_flags & state)) {
-		u64 val;
-		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
-			   " ms\n",
-			   /* test INIT ahead of DOWN, both can be set */
-			   (state & IPATH_LINKINIT) ? "INIT" :
-			   ((state & IPATH_LINKDOWN) ? "DOWN" :
-			    ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
-			   msecs);
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-		ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
-			   (unsigned long long) ipath_read_kreg64(
-				   dd, dd->ipath_kregs->kr_ibcctrl),
-			   (unsigned long long) val,
-			   ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
-	}
-	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
-}
-
-static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
-	char *buf, size_t blen)
-{
-	static const struct {
-		ipath_err_t err;
-		const char *msg;
-	} errs[] = {
-		{ INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
-		{ INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
-		{ INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
-		{ INFINIPATH_E_SDMABASE, "SDmaBase" },
-		{ INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
-		{ INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
-		{ INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
-		{ INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
-		{ INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
-		{ INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
-		{ INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
-		{ INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
-	};
-	int i;
-	int expected;
-	size_t bidx = 0;
-
-	for (i = 0; i < ARRAY_SIZE(errs); i++) {
-		expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
-			test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-		if ((err & errs[i].err) && !expected)
-			bidx += snprintf(buf + bidx, blen - bidx,
-					 "%s ", errs[i].msg);
-	}
-}
-
-/*
- * Decode the error status into strings, deciding whether to always
- * print * it or not depending on "normal packet errors" vs everything
- * else.   Return 1 if "real" errors, otherwise 0 if only packet
- * errors, so caller can decide what to print with the string.
- */
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-	ipath_err_t err)
-{
-	int iserr = 1;
-	*buf = '\0';
-	if (err & INFINIPATH_E_PKTERRS) {
-		if (!(err & ~INFINIPATH_E_PKTERRS))
-			iserr = 0; // if only packet errors.
-		if (ipath_debug & __IPATH_ERRPKTDBG) {
-			if (err & INFINIPATH_E_REBP)
-				strlcat(buf, "EBP ", blen);
-			if (err & INFINIPATH_E_RVCRC)
-				strlcat(buf, "VCRC ", blen);
-			if (err & INFINIPATH_E_RICRC) {
-				strlcat(buf, "CRC ", blen);
-				// clear for check below, so only once
-				err &= INFINIPATH_E_RICRC;
-			}
-			if (err & INFINIPATH_E_RSHORTPKTLEN)
-				strlcat(buf, "rshortpktlen ", blen);
-			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-				strlcat(buf, "sdroppeddatapkt ", blen);
-			if (err & INFINIPATH_E_SPKTLEN)
-				strlcat(buf, "spktlen ", blen);
-		}
-		if ((err & INFINIPATH_E_RICRC) &&
-			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
-			strlcat(buf, "CRC ", blen);
-		if (!iserr)
-			goto done;
-	}
-	if (err & INFINIPATH_E_RHDRLEN)
-		strlcat(buf, "rhdrlen ", blen);
-	if (err & INFINIPATH_E_RBADTID)
-		strlcat(buf, "rbadtid ", blen);
-	if (err & INFINIPATH_E_RBADVERSION)
-		strlcat(buf, "rbadversion ", blen);
-	if (err & INFINIPATH_E_RHDR)
-		strlcat(buf, "rhdr ", blen);
-	if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
-		strlcat(buf, "sendspecialtrigger ", blen);
-	if (err & INFINIPATH_E_RLONGPKTLEN)
-		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RMAXPKTLEN)
-		strlcat(buf, "rmaxpktlen ", blen);
-	if (err & INFINIPATH_E_RMINPKTLEN)
-		strlcat(buf, "rminpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
-	if (err & INFINIPATH_E_RFORMATERR)
-		strlcat(buf, "rformaterr ", blen);
-	if (err & INFINIPATH_E_RUNSUPVL)
-		strlcat(buf, "runsupvl ", blen);
-	if (err & INFINIPATH_E_RUNEXPCHAR)
-		strlcat(buf, "runexpchar ", blen);
-	if (err & INFINIPATH_E_RIBFLOW)
-		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_SUNDERRUN)
-		strlcat(buf, "sunderrun ", blen);
-	if (err & INFINIPATH_E_SPIOARMLAUNCH)
-		strlcat(buf, "spioarmlaunch ", blen);
-	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
-		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
-		strlcat(buf, "sdroppedsmppkt ", blen);
-	if (err & INFINIPATH_E_SMAXPKTLEN)
-		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SUNSUPVL)
-		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_INVALIDADDR)
-		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RRCVEGRFULL)
-		strlcat(buf, "rcvegrfull ", blen);
-	if (err & INFINIPATH_E_RRCVHDRFULL)
-		strlcat(buf, "rcvhdrfull ", blen);
-	if (err & INFINIPATH_E_IBSTATUSCHANGED)
-		strlcat(buf, "ibcstatuschg ", blen);
-	if (err & INFINIPATH_E_RIBLOSTLINK)
-		strlcat(buf, "riblostlink ", blen);
-	if (err & INFINIPATH_E_HARDWARE)
-		strlcat(buf, "hardware ", blen);
-	if (err & INFINIPATH_E_RESET)
-		strlcat(buf, "reset ", blen);
-	if (err & INFINIPATH_E_SDMAERRS)
-		decode_sdma_errs(dd, err, buf, blen);
-	if (err & INFINIPATH_E_INVALIDEEPCMD)
-		strlcat(buf, "invalideepromcmd ", blen);
-done:
-	return iserr;
-}
-
-/**
- * get_rhf_errstring - decode RHF errors
- * @err: the err number
- * @msg: the output buffer
- * @len: the length of the output buffer
- *
- * only used one place now, may want more later
- */
-static void get_rhf_errstring(u32 err, char *msg, size_t len)
-{
-	/* if no errors, and so don't need to check what's first */
-	*msg = '\0';
-
-	if (err & INFINIPATH_RHF_H_ICRCERR)
-		strlcat(msg, "icrcerr ", len);
-	if (err & INFINIPATH_RHF_H_VCRCERR)
-		strlcat(msg, "vcrcerr ", len);
-	if (err & INFINIPATH_RHF_H_PARITYERR)
-		strlcat(msg, "parityerr ", len);
-	if (err & INFINIPATH_RHF_H_LENERR)
-		strlcat(msg, "lenerr ", len);
-	if (err & INFINIPATH_RHF_H_MTUERR)
-		strlcat(msg, "mtuerr ", len);
-	if (err & INFINIPATH_RHF_H_IHDRERR)
-		/* infinipath hdr checksum error */
-		strlcat(msg, "ipathhdrerr ", len);
-	if (err & INFINIPATH_RHF_H_TIDERR)
-		strlcat(msg, "tiderr ", len);
-	if (err & INFINIPATH_RHF_H_MKERR)
-		/* bad port, offset, etc. */
-		strlcat(msg, "invalid ipathhdr ", len);
-	if (err & INFINIPATH_RHF_H_IBERR)
-		strlcat(msg, "iberr ", len);
-	if (err & INFINIPATH_RHF_L_SWA)
-		strlcat(msg, "swA ", len);
-	if (err & INFINIPATH_RHF_L_SWB)
-		strlcat(msg, "swB ", len);
-}
-
-/**
- * ipath_get_egrbuf - get an eager buffer
- * @dd: the infinipath device
- * @bufnum: the eager buffer to get
- *
- * must only be called if ipath_pd[port] is known to be allocated
- */
-static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
-{
-	return dd->ipath_port0_skbinfo ?
-		(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
-}
-
-/**
- * ipath_alloc_skb - allocate an skb and buffer with possible constraints
- * @dd: the infinipath device
- * @gfp_mask: the sk_buff SFP mask
- */
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
-				gfp_t gfp_mask)
-{
-	struct sk_buff *skb;
-	u32 len;
-
-	/*
-	 * Only fully supported way to handle this is to allocate lots
-	 * extra, align as needed, and then do skb_reserve().  That wastes
-	 * a lot of memory...  I'll have to hack this into infinipath_copy
-	 * also.
-	 */
-
-	/*
-	 * We need 2 extra bytes for ipath_ether data sent in the
-	 * key header.  In order to keep everything dword aligned,
-	 * we'll reserve 4 bytes.
-	 */
-	len = dd->ipath_ibmaxlen + 4;
-
-	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		/* We need a 2KB multiple alignment, and there is no way
-		 * to do it except to allocate extra and then skb_reserve
-		 * enough to bring it up to the right alignment.
-		 */
-		len += 2047;
-	}
-
-	skb = __dev_alloc_skb(len, gfp_mask);
-	if (!skb) {
-		ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
-			      len);
-		goto bail;
-	}
-
-	skb_reserve(skb, 4);
-
-	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		u32 una = (unsigned long)skb->data & 2047;
-		if (una)
-			skb_reserve(skb, 2048 - una);
-	}
-
-bail:
-	return skb;
-}
-
-static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
-			     u32 eflags,
-			     u32 l,
-			     u32 etail,
-			     __le32 *rhf_addr,
-			     struct ipath_message_header *hdr)
-{
-	char emsg[128];
-
-	get_rhf_errstring(eflags, emsg, sizeof emsg);
-	ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
-		   "tlen=%x opcode=%x egridx=%x: %s\n",
-		   eflags, l,
-		   ipath_hdrget_rcv_type(rhf_addr),
-		   ipath_hdrget_length_in_bytes(rhf_addr),
-		   be32_to_cpu(hdr->bth[0]) >> 24,
-		   etail, emsg);
-
-	/* Count local link integrity errors. */
-	if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
-		u8 n = (dd->ipath_ibcctrl >>
-			INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-			INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-
-		if (++dd->ipath_lli_counter > n) {
-			dd->ipath_lli_counter = 0;
-			dd->ipath_lli_errors++;
-		}
-	}
-}
-
-/*
- * ipath_kreceive - receive a packet
- * @pd: the infinipath port
- *
- * called from interrupt handler for errors or receive interrupt
- */
-void ipath_kreceive(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	__le32 *rhf_addr;
-	void *ebuf;
-	const u32 rsize = dd->ipath_rcvhdrentsize;	/* words */
-	const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize;	/* words */
-	u32 etail = -1, l, hdrqtail;
-	struct ipath_message_header *hdr;
-	u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
-	static u64 totcalls;	/* stats, may eventually remove */
-	int last;
-
-	l = pd->port_head;
-	rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
-	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-		u32 seq = ipath_hdrget_seq(rhf_addr);
-
-		if (seq != pd->port_seq_cnt)
-			goto bail;
-		hdrqtail = 0;
-	} else {
-		hdrqtail = ipath_get_rcvhdrtail(pd);
-		if (l == hdrqtail)
-			goto bail;
-		smp_rmb();
-	}
-
-reloop:
-	for (last = 0, i = 1; !last; i += !last) {
-		hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
-		eflags = ipath_hdrget_err_flags(rhf_addr);
-		etype = ipath_hdrget_rcv_type(rhf_addr);
-		/* total length */
-		tlen = ipath_hdrget_length_in_bytes(rhf_addr);
-		ebuf = NULL;
-		if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
-		    ipath_hdrget_use_egr_buf(rhf_addr) :
-		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-			/*
-			 * It turns out that the chip uses an eager buffer
-			 * for all non-expected packets, whether it "needs"
-			 * one or not.  So always get the index, but don't
-			 * set ebuf (so we try to copy data) unless the
-			 * length requires it.
-			 */
-			etail = ipath_hdrget_index(rhf_addr);
-			updegr = 1;
-			if (tlen > sizeof(*hdr) ||
-			    etype == RCVHQ_RCV_TYPE_NON_KD)
-				ebuf = ipath_get_egrbuf(dd, etail);
-		}
-
-		/*
-		 * both tiderr and ipathhdrerr are set for all plain IB
-		 * packets; only ipathhdrerr should be set.
-		 */
-
-		if (etype != RCVHQ_RCV_TYPE_NON_KD &&
-		    etype != RCVHQ_RCV_TYPE_ERROR &&
-		    ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
-		    IPS_PROTO_VERSION)
-			ipath_cdbg(PKT, "Bad InfiniPath protocol version "
-				   "%x\n", etype);
-
-		if (unlikely(eflags))
-			ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
-		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-			ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
-			if (dd->ipath_lli_counter)
-				dd->ipath_lli_counter--;
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
-			u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
-			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-				   "qp=%x), len %x; ignored\n",
-				   etype, opcode, qp, tlen);
-		} else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
-			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-				  be32_to_cpu(hdr->bth[0]) >> 24);
-		} else {
-			/*
-			 * error packet, type of error unknown.
-			 * Probably type 3, but we don't know, so don't
-			 * even try to print the opcode, etc.
-			 * Usually caused by a "bad packet", that has no
-			 * BTH, when the LRH says it should.
-			 */
-			ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
-				  " %x, len %x hdrq+%x rhf: %Lx\n",
-				  etail, tlen, l, (unsigned long long)
-				  le64_to_cpu(*(__le64 *) rhf_addr));
-			if (ipath_debug & __IPATH_ERRPKTDBG) {
-				u32 j, *d, dw = rsize-2;
-				if (rsize > (tlen>>2))
-					dw = tlen>>2;
-				d = (u32 *)hdr;
-				printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
-					dw);
-				for (j = 0; j < dw; j++)
-					printk(KERN_DEBUG "%8x%s", d[j],
-						(j%8) == 7 ? "\n" : " ");
-				printk(KERN_DEBUG ".\n");
-			}
-		}
-		l += rsize;
-		if (l >= maxcnt)
-			l = 0;
-		rhf_addr = (__le32 *) pd->port_rcvhdrq +
-			l + dd->ipath_rhf_offset;
-		if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-			u32 seq = ipath_hdrget_seq(rhf_addr);
-
-			if (++pd->port_seq_cnt > 13)
-				pd->port_seq_cnt = 1;
-			if (seq != pd->port_seq_cnt)
-				last = 1;
-		} else if (l == hdrqtail) {
-			last = 1;
-		}
-		/*
-		 * update head regs on last packet, and every 16 packets.
-		 * Reduce bus traffic, while still trying to prevent
-		 * rcvhdrq overflows, for when the queue is nearly full
-		 */
-		if (last || !(i & 0xf)) {
-			u64 lval = l;
-
-			/* request IBA6120 and 7220 interrupt only on last */
-			if (last)
-				lval |= dd->ipath_rhdrhead_intr_off;
-			ipath_write_ureg(dd, ur_rcvhdrhead, lval,
-				pd->port_port);
-			if (updegr) {
-				ipath_write_ureg(dd, ur_rcvegrindexhead,
-						 etail, pd->port_port);
-				updegr = 0;
-			}
-		}
-	}
-
-	if (!dd->ipath_rhdrhead_intr_off && !reloop &&
-	    !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-		/* IBA6110 workaround; we can have a race clearing chip
-		 * interrupt with another interrupt about to be delivered,
-		 * and can clear it before it is delivered on the GPIO
-		 * workaround.  By doing the extra check here for the
-		 * in-memory tail register updating while we were doing
-		 * earlier packets, we "almost" guarantee we have covered
-		 * that case.
-		 */
-		u32 hqtail = ipath_get_rcvhdrtail(pd);
-		if (hqtail != hdrqtail) {
-			hdrqtail = hqtail;
-			reloop = 1; /* loop 1 extra time at most */
-			goto reloop;
-		}
-	}
-
-	pkttot += i;
-
-	pd->port_head = l;
-
-	if (pkttot > ipath_stats.sps_maxpkts_call)
-		ipath_stats.sps_maxpkts_call = pkttot;
-	ipath_stats.sps_port0pkts += pkttot;
-	ipath_stats.sps_avgpkts_call =
-		ipath_stats.sps_port0pkts / ++totcalls;
-
-bail:;
-}
-
-/**
- * ipath_update_pio_bufs - update shadow copy of the PIO availability map
- * @dd: the infinipath device
- *
- * called whenever our local copy indicates we have run out of send buffers
- * NOTE: This can be called from interrupt context by some code
- * and from non-interrupt context by ipath_getpiobuf().
- */
-
-static void ipath_update_pio_bufs(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int i;
-	const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
-
-	/* If the generation (check) bits have changed, then we update the
-	 * busy bit for the corresponding PIO buffer.  This algorithm will
-	 * modify positions to the value they already have in some cases
-	 * (i.e., no change), but it's faster than changing only the bits
-	 * that have changed.
-	 *
-	 * We would like to do this atomicly, to avoid spinlocks in the
-	 * critical send path, but that's not really possible, given the
-	 * type of changes, and that this routine could be called on
-	 * multiple cpu's simultaneously, so we lock in this routine only,
-	 * to avoid conflicting updates; all we change is the shadow, and
-	 * it's a single 64 bit memory location, so by definition the update
-	 * is atomic in terms of what other cpu's can see in testing the
-	 * bits.  The spin_lock overhead isn't too bad, since it only
-	 * happens when all buffers are in use, so only cpu overhead, not
-	 * latency or bandwidth is affected.
-	 */
-	if (!dd->ipath_pioavailregs_dma) {
-		ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
-		return;
-	}
-	if (ipath_debug & __IPATH_VERBDBG) {
-		/* only if packet debug and verbose */
-		volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-		unsigned long *shadow = dd->ipath_pioavailshadow;
-
-		ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
-			   "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
-			   "s3=%lx\n",
-			   (unsigned long long) le64_to_cpu(dma[0]),
-			   shadow[0],
-			   (unsigned long long) le64_to_cpu(dma[1]),
-			   shadow[1],
-			   (unsigned long long) le64_to_cpu(dma[2]),
-			   shadow[2],
-			   (unsigned long long) le64_to_cpu(dma[3]),
-			   shadow[3]);
-		if (piobregs > 4)
-			ipath_cdbg(
-				PKT, "2nd group, dma4=%llx shad4=%lx, "
-				"d5=%llx s5=%lx, d6=%llx s6=%lx, "
-				"d7=%llx s7=%lx\n",
-				(unsigned long long) le64_to_cpu(dma[4]),
-				shadow[4],
-				(unsigned long long) le64_to_cpu(dma[5]),
-				shadow[5],
-				(unsigned long long) le64_to_cpu(dma[6]),
-				shadow[6],
-				(unsigned long long) le64_to_cpu(dma[7]),
-				shadow[7]);
-	}
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (i = 0; i < piobregs; i++) {
-		u64 pchbusy, pchg, piov, pnew;
-		/*
-		 * Chip Errata: bug 6641; even and odd qwords>3 are swapped
-		 */
-		if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
-		else
-			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-		pchg = dd->ipath_pioavailkernel[i] &
-			~(dd->ipath_pioavailshadow[i] ^ piov);
-		pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
-		if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
-			pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
-			pnew |= piov & pchbusy;
-			dd->ipath_pioavailshadow[i] = pnew;
-		}
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/*
- * used to force update of pioavailshadow if we can't get a pio buffer.
- * Needed primarily due to exitting freeze mode after recovering
- * from errors.  Done lazily, because it's safer (known to not
- * be writing pio buffers).
- */
-static void ipath_reset_availshadow(struct ipath_devdata *dd)
-{
-	int i, im;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		u64 val, oldval;
-		/* deal with 6110 chip bug on high register #s */
-		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-			i ^ 1 : i;
-		val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
-		/*
-		 * busy out the buffers not in the kernel avail list,
-		 * without changing the generation bits.
-		 */
-		oldval = dd->ipath_pioavailshadow[i];
-		dd->ipath_pioavailshadow[i] = val |
-			((~dd->ipath_pioavailkernel[i] <<
-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
-			0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
-		if (oldval != dd->ipath_pioavailshadow[i])
-			ipath_dbg("shadow[%d] was %Lx, now %lx\n",
-				i, (unsigned long long) oldval,
-				dd->ipath_pioavailshadow[i]);
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-}
-
-/**
- * ipath_setrcvhdrsize - set the receive header size
- * @dd: the infinipath device
- * @rhdrsize: the receive header size
- *
- * called from user init code, and also layered driver init
- */
-int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
-{
-	int ret = 0;
-
-	if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
-		if (dd->ipath_rcvhdrsize != rhdrsize) {
-			dev_info(&dd->pcidev->dev,
-				 "Error: can't set protocol header "
-				 "size %u, already %u\n",
-				 rhdrsize, dd->ipath_rcvhdrsize);
-			ret = -EAGAIN;
-		} else
-			ipath_cdbg(VERBOSE, "Reuse same protocol header "
-				   "size %u\n", dd->ipath_rcvhdrsize);
-	} else if (rhdrsize > (dd->ipath_rcvhdrentsize -
-			       (sizeof(u64) / sizeof(u32)))) {
-		ipath_dbg("Error: can't set protocol header size %u "
-			  "(> max %u)\n", rhdrsize,
-			  dd->ipath_rcvhdrentsize -
-			  (u32) (sizeof(u64) / sizeof(u32)));
-		ret = -EOVERFLOW;
-	} else {
-		dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
-		dd->ipath_rcvhdrsize = rhdrsize;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-				 dd->ipath_rcvhdrsize);
-		ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
-			   dd->ipath_rcvhdrsize);
-	}
-	return ret;
-}
-
-/*
- * debugging code and stats updates if no pio buffers available.
- */
-static noinline void no_pio_bufs(struct ipath_devdata *dd)
-{
-	unsigned long *shadow = dd->ipath_pioavailshadow;
-	__le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
-
-	dd->ipath_upd_pio_shadow = 1;
-
-	/*
-	 * not atomic, but if we lose a stat count in a while, that's OK
-	 */
-	ipath_stats.sps_nopiobufs++;
-	if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-		ipath_force_pio_avail_update(dd); /* at start */
-		ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
-			"%llx %llx %llx %llx\n"
-			"ipath  shadow:  %lx %lx %lx %lx\n",
-			dd->ipath_consec_nopiobuf,
-			(unsigned long)get_cycles(),
-			(unsigned long long) le64_to_cpu(dma[0]),
-			(unsigned long long) le64_to_cpu(dma[1]),
-			(unsigned long long) le64_to_cpu(dma[2]),
-			(unsigned long long) le64_to_cpu(dma[3]),
-			shadow[0], shadow[1], shadow[2], shadow[3]);
-		/*
-		 * 4 buffers per byte, 4 registers above, cover rest
-		 * below
-		 */
-		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-		    (sizeof(shadow[0]) * 4 * 4))
-			ipath_dbg("2nd group: dmacopy: "
-				  "%llx %llx %llx %llx\n"
-				  "ipath  shadow:  %lx %lx %lx %lx\n",
-				  (unsigned long long)le64_to_cpu(dma[4]),
-				  (unsigned long long)le64_to_cpu(dma[5]),
-				  (unsigned long long)le64_to_cpu(dma[6]),
-				  (unsigned long long)le64_to_cpu(dma[7]),
-				  shadow[4], shadow[5], shadow[6], shadow[7]);
-
-		/* at end, so update likely happened */
-		ipath_reset_availshadow(dd);
-	}
-}
-
-/*
- * common code for normal driver pio buffer allocation, and reserved
- * allocation.
- *
- * do appropriate marking as busy, etc.
- * returns buffer number if one found (>=0), negative number is error.
- */
-static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
-	u32 *pbufnum, u32 first, u32 last, u32 firsti)
-{
-	int i, j, updated = 0;
-	unsigned piobcnt;
-	unsigned long flags;
-	unsigned long *shadow = dd->ipath_pioavailshadow;
-	u32 __iomem *buf;
-
-	piobcnt = last - first;
-	if (dd->ipath_upd_pio_shadow) {
-		/*
-		 * Minor optimization.  If we had no buffers on last call,
-		 * start out by doing the update; continue and do scan even
-		 * if no buffers were updated, to be paranoid
-		 */
-		ipath_update_pio_bufs(dd);
-		updated++;
-		i = first;
-	} else
-		i = firsti;
-rescan:
-	/*
-	 * while test_and_set_bit() is atomic, we do that and then the
-	 * change_bit(), and the pair is not.  See if this is the cause
-	 * of the remaining armlaunch errors.
-	 */
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (j = 0; j < piobcnt; j++, i++) {
-		if (i >= last)
-			i = first;
-		if (__test_and_set_bit((2 * i) + 1, shadow))
-			continue;
-		/* flip generation bit */
-		__change_bit(2 * i, shadow);
-		break;
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-	if (j == piobcnt) {
-		if (!updated) {
-			/*
-			 * first time through; shadow exhausted, but may be
-			 * buffers available, try an update and then rescan.
-			 */
-			ipath_update_pio_bufs(dd);
-			updated++;
-			i = first;
-			goto rescan;
-		} else if (updated == 1 && piobcnt <=
-			((dd->ipath_sendctrl
-			>> INFINIPATH_S_UPDTHRESH_SHIFT) &
-			INFINIPATH_S_UPDTHRESH_MASK)) {
-			/*
-			 * for chips supporting and using the update
-			 * threshold we need to force an update of the
-			 * in-memory copy if the count is less than the
-			 * thershold, then check one more time.
-			 */
-			ipath_force_pio_avail_update(dd);
-			ipath_update_pio_bufs(dd);
-			updated++;
-			i = first;
-			goto rescan;
-		}
-
-		no_pio_bufs(dd);
-		buf = NULL;
-	} else {
-		if (i < dd->ipath_piobcnt2k)
-			buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-					       i * dd->ipath_palign);
-		else
-			buf = (u32 __iomem *)
-				(dd->ipath_pio4kbase +
-				 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-		if (pbufnum)
-			*pbufnum = i;
-	}
-
-	return buf;
-}
-
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @plen: the size of the PIO buffer needed in 32-bit words
- * @pbufnum: the buffer number is placed here
- */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
-{
-	u32 __iomem *buf;
-	u32 pnum, nbufs;
-	u32 first, lasti;
-
-	if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
-		first = dd->ipath_piobcnt2k;
-		lasti = dd->ipath_lastpioindexl;
-	} else {
-		first = 0;
-		lasti = dd->ipath_lastpioindex;
-	}
-	nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
-
-	if (buf) {
-		/*
-		 * Set next starting place.  It's just an optimization,
-		 * it doesn't matter who wins on this, so no locking
-		 */
-		if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-			dd->ipath_lastpioindexl = pnum + 1;
-		else
-			dd->ipath_lastpioindex = pnum + 1;
-		if (dd->ipath_upd_pio_shadow)
-			dd->ipath_upd_pio_shadow = 0;
-		if (dd->ipath_consec_nopiobuf)
-			dd->ipath_consec_nopiobuf = 0;
-		ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-			   pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-		if (pbufnum)
-			*pbufnum = pnum;
-
-	}
-	return buf;
-}
-
-/**
- * ipath_chg_pioavailkernel - change which send buffers are available for kernel
- * @dd: the infinipath device
- * @start: the starting send buffer number
- * @len: the number of send buffers
- * @avail: true if the buffers are available for kernel use, false otherwise
- */
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-			      unsigned len, int avail)
-{
-	unsigned long flags;
-	unsigned end, cnt = 0;
-
-	/* There are two bits per send buffer (busy and generation) */
-	start *= 2;
-	end = start + len * 2;
-
-	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	/* Set or clear the busy bit in the shadow. */
-	while (start < end) {
-		if (avail) {
-			unsigned long dma;
-			int i, im;
-			/*
-			 * the BUSY bit will never be set, because we disarm
-			 * the user buffers before we hand them back to the
-			 * kernel.  We do have to make sure the generation
-			 * bit is set correctly in shadow, since it could
-			 * have changed many times while allocated to user.
-			 * We can't use the bitmap functions on the full
-			 * dma array because it is always little-endian, so
-			 * we have to flip to host-order first.
-			 * BITS_PER_LONG is slightly wrong, since it's
-			 * always 64 bits per register in chip...
-			 * We only work on 64 bit kernels, so that's OK.
-			 */
-			/* deal with 6110 chip bug on high register #s */
-			i = start / BITS_PER_LONG;
-			im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-				i ^ 1 : i;
-			__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
-				+ start, dd->ipath_pioavailshadow);
-			dma = (unsigned long) le64_to_cpu(
-				dd->ipath_pioavailregs_dma[im]);
-			if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-				+ start) % BITS_PER_LONG, &dma))
-				__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-					+ start, dd->ipath_pioavailshadow);
-			else
-				__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
-					+ start, dd->ipath_pioavailshadow);
-			__set_bit(start, dd->ipath_pioavailkernel);
-		} else {
-			__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-				dd->ipath_pioavailshadow);
-			__clear_bit(start, dd->ipath_pioavailkernel);
-		}
-		start += 2;
-	}
-
-	if (dd->ipath_pioupd_thresh) {
-		end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-		cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
-	}
-	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
-
-	/*
-	 * When moving buffers from kernel to user, if number assigned to
-	 * the user is less than the pio update threshold, and threshold
-	 * is supported (cnt was computed > 0), drop the update threshold
-	 * so we update at least once per allocated number of buffers.
-	 * In any case, if the kernel buffers are less than the threshold,
-	 * drop the threshold.  We don't bother increasing it, having once
-	 * decreased it, since it would typically just cycle back and forth.
-	 * If we don't decrease below buffers in use, we can wait a long
-	 * time for an update, until some other context uses PIO buffers.
-	 */
-	if (!avail && len < cnt)
-		cnt = len;
-	if (cnt < dd->ipath_pioupd_thresh) {
-		dd->ipath_pioupd_thresh = cnt;
-		ipath_dbg("Decreased pio update threshold to %u\n",
-			dd->ipath_pioupd_thresh);
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-			<< INFINIPATH_S_UPDTHRESH_SHIFT);
-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-}
-
-/**
- * ipath_create_rcvhdrq - create a receive header queue
- * @dd: the infinipath device
- * @pd: the port data
- *
- * this must be contiguous memory (from an i/o perspective), and must be
- * DMA'able (which means for some systems, it will go through an IOMMU,
- * or be forced into a low address range).
- */
-int ipath_create_rcvhdrq(struct ipath_devdata *dd,
-			 struct ipath_portdata *pd)
-{
-	int ret = 0;
-
-	if (!pd->port_rcvhdrq) {
-		dma_addr_t phys_hdrqtail;
-		gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-		int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-				sizeof(u32), PAGE_SIZE);
-
-		pd->port_rcvhdrq = dma_alloc_coherent(
-			&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
-			gfp_flags);
-
-		if (!pd->port_rcvhdrq) {
-			ipath_dev_err(dd, "attempt to allocate %d bytes "
-				      "for port %u rcvhdrq failed\n",
-				      amt, pd->port_port);
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-			pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
-				GFP_KERNEL);
-			if (!pd->port_rcvhdrtail_kvaddr) {
-				ipath_dev_err(dd, "attempt to allocate 1 page "
-					"for port %u rcvhdrqtailaddr "
-					"failed\n", pd->port_port);
-				ret = -ENOMEM;
-				dma_free_coherent(&dd->pcidev->dev, amt,
-					pd->port_rcvhdrq,
-					pd->port_rcvhdrq_phys);
-				pd->port_rcvhdrq = NULL;
-				goto bail;
-			}
-			pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
-			ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
-				   "physical\n", pd->port_port,
-				   (unsigned long long) phys_hdrqtail);
-		}
-
-		pd->port_rcvhdrq_size = amt;
-
-		ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
-			   "for port %u rcvhdr Q\n",
-			   amt >> PAGE_SHIFT, pd->port_rcvhdrq,
-			   (unsigned long) pd->port_rcvhdrq_phys,
-			   (unsigned long) pd->port_rcvhdrq_size,
-			   pd->port_port);
-	} else {
-		ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
-			   "hdrtailaddr@%p %llx physical\n",
-			   pd->port_port, pd->port_rcvhdrq,
-			   (unsigned long long) pd->port_rcvhdrq_phys,
-			   pd->port_rcvhdrtail_kvaddr, (unsigned long long)
-			   pd->port_rcvhdrqtailaddr_phys);
-	}
-	/* clear for security and sanity on each use */
-	memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-	if (pd->port_rcvhdrtail_kvaddr)
-		memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
-
-	/*
-	 * tell chip each time we init it, even if we are re-using previous
-	 * memory (we zero the register at process close)
-	 */
-	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
-			      pd->port_port, pd->port_rcvhdrqtailaddr_phys);
-	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-			      pd->port_port, pd->port_rcvhdrq_phys);
-
-bail:
-	return ret;
-}
-
-
-/*
- * Flush all sends that might be in the ready to send state, as well as any
- * that are in the process of being sent.   Used whenever we need to be
- * sure the send side is idle.  Cleans up all buffer state by canceling
- * all pio buffers, and issuing an abort, which cleans up anything in the
- * launch fifo.  The cancel is superfluous on some chip versions, but
- * it's safer to always do it.
- * PIOAvail bits are updated by the chip as if normal send had happened.
- */
-void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
-{
-	unsigned long flags;
-
-	if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
-		ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
-		goto bail;
-	}
-	/*
-	 * If we have SDMA, and it's not disabled, we have to kick off the
-	 * abort state machine, provided we aren't already aborting.
-	 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
-	 * we skip the rest of this routine. It is already "in progress"
-	 */
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
-		int skip_cancel;
-		unsigned long *statp = &dd->ipath_sdma_status;
-
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		skip_cancel =
-			test_and_set_bit(IPATH_SDMA_ABORTING, statp)
-			&& !test_bit(IPATH_SDMA_DISABLED, statp);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-		if (skip_cancel)
-			goto bail;
-	}
-
-	ipath_dbg("Cancelling all in-progress send buffers\n");
-
-	/* skip armlaunch errs for a while */
-	dd->ipath_lastcancel = jiffies + HZ / 2;
-
-	/*
-	 * The abort bit is auto-clearing.  We also don't want pioavail
-	 * update happening during this, and we don't want any other
-	 * sends going out, so turn those off for the duration.  We read
-	 * the scratch register to be sure that cancels and the abort
-	 * have taken effect in the chip.  Otherwise two parts are same
-	 * as ipath_force_pio_avail_update()
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
-		| INFINIPATH_S_PIOENABLE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl | INFINIPATH_S_ABORT);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/* disarm all send buffers */
-	ipath_disarm_piobufs(dd, 0,
-		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-
-	if (restore_sendctrl) {
-		/* else done by caller later if needed */
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
-			INFINIPATH_S_PIOENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		/* and again, be sure all have hit the chip */
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-
-	if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
-	    !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
-	    test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		/* only wait so long for intr */
-		dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
-		dd->ipath_sdma_reset_wait = 200;
-		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	}
-bail:;
-}
-
-/*
- * Force an update of in-memory copy of the pioavail registers, when
- * needed for any of a variety of reasons.  We read the scratch register
- * to make it highly likely that the update will have happened by the
- * time we return.  If already off (as in cancel_sends above), this
- * routine is a nop, on the assumption that the caller will "do the
- * right thing".
- */
-void ipath_force_pio_avail_update(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
-				int linitcmd)
-{
-	u64 mod_wd;
-	static const char *what[4] = {
-		[0] = "NOP",
-		[INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
-		[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
-		[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
-	};
-
-	if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
-		/*
-		 * If we are told to disable, note that so link-recovery
-		 * code does not attempt to bring us back up.
-		 */
-		preempt_disable();
-		dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-		preempt_enable();
-	} else if (linitcmd) {
-		/*
-		 * Any other linkinitcmd will lead to LINKDOWN and then
-		 * to INIT (if all is well), so clear flag to let
-		 * link-recovery code attempt to bring us back up.
-		 */
-		preempt_disable();
-		dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-		preempt_enable();
-	}
-
-	mod_wd = (linkcmd << dd->ibcc_lc_shift) |
-		(linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-	ipath_cdbg(VERBOSE,
-		"Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
-		dd->ipath_unit, what[linkcmd], linitcmd,
-		ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
-			ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl | mod_wd);
-	/* read from chip so write is flushed */
-	(void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-}
-
-int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-	u32 lstate;
-	int ret;
-
-	switch (newstate) {
-	case IPATH_IB_LINKDOWN_ONLY:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_POLL);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_SLEEP);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
-					INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKARM:
-		if (dd->ipath_flags & IPATH_LINKARMED) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags &
-		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
-
-		/*
-		 * Since the port can transition to ACTIVE by receiving
-		 * a non VL 15 packet, wait for either state.
-		 */
-		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINKACTIVE:
-		if (dd->ipath_flags & IPATH_LINKACTIVE) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
-		lstate = IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINK_LOOPBACK:
-		dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-
-		/* turn heartbeat off, as it causes loopback to fail */
-		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-				       IPATH_IB_HRTBT_OFF);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINK_EXTERNAL:
-		dev_info(&dd->pcidev->dev,
-			"Disabling IB local loopback (normal)\n");
-		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-				       IPATH_IB_HRTBT_ON);
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	/*
-	 * Heartbeat can be explicitly enabled by the user via
-	 * "hrtbt_enable" "file", and if disabled, trying to enable here
-	 * will have no effect.  Implicit changes (heartbeat off when
-	 * loopback on, and vice versa) are included to ease testing.
-	 */
-	case IPATH_IB_LINK_HRTBT:
-		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-			IPATH_IB_HRTBT_ON);
-		goto bail;
-
-	case IPATH_IB_LINK_NO_HRTBT:
-		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
-			IPATH_IB_HRTBT_OFF);
-		goto bail;
-
-	default:
-		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link INIT state...
- */
-int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-	u32 piosize;
-	int changed = 0;
-	int ret;
-
-	/*
-	 * mtu is IB data payload max.  It's the largest power of 2 less
-	 * than piosize (or even larger, since it only really controls the
-	 * largest we can receive; we can send the max of the mtu and
-	 * piosize).  We check that it's one of the valid IB sizes.
-	 */
-	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    (arg != 4096 || !ipath_mtu4096)) {
-		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (dd->ipath_ibmtu == arg) {
-		ret = 0;        /* same as current */
-		goto bail;
-	}
-
-	piosize = dd->ipath_ibmaxlen;
-	dd->ipath_ibmtu = arg;
-
-	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-		/* Only if it's not the initial value (or reset to it) */
-		if (piosize != dd->ipath_init_ibmaxlen) {
-			if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
-				piosize = dd->ipath_init_ibmaxlen;
-			dd->ipath_ibmaxlen = piosize;
-			changed = 1;
-		}
-	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-		piosize = arg + IPATH_PIO_MAXIBHDR;
-		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-			   arg);
-		dd->ipath_ibmaxlen = piosize;
-		changed = 1;
-	}
-
-	if (changed) {
-		u64 ibc = dd->ipath_ibcctrl, ibdw;
-		/*
-		 * update our housekeeping variables, and set IBC max
-		 * size, same as init code; max IBC is max we allow in
-		 * buffer, less the qword pbc, plus 1 for ICRC, in dwords
-		 */
-		dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
-		ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
-		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 dd->ibcc_mpl_shift);
-		ibc |= ibdw << dd->ibcc_mpl_shift;
-		dd->ipath_ibcctrl = ibc;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		dd->ipath_f_tidtemplate(dd);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
-{
-	dd->ipath_lid = lid;
-	dd->ipath_lmc = lmc;
-
-	dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
-		(~((1U << lmc) - 1)) << 16);
-
-	dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
-
-	return 0;
-}
-
-
-/**
- * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to write
- * @port: the port containing the register
- * @value: the value to write
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
-			  unsigned port, u64 value)
-{
-	u16 where;
-
-	if (port < dd->ipath_portcnt &&
-	    (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-	     regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-		where = regno + port;
-	else
-		where = -1;
-
-	ipath_write_kreg(dd, where, value);
-}
-
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDS, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
-
-static void ipath_run_led_override(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-	int timeoff;
-	int pidx;
-	u64 lstate, ltstate, val;
-
-	if (!(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	pidx = dd->ipath_led_override_phase++ & 1;
-	dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
-	timeoff = dd->ipath_led_override_timeoff;
-
-	/*
-	 * below potentially restores the LED values per current status,
-	 * should also possibly setup the traffic-blink register,
-	 * but leave that to per-chip functions.
-	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	ltstate = ipath_ib_linktrstate(dd, val);
-	lstate = ipath_ib_linkstate(dd, val);
-
-	dd->ipath_f_setextled(dd, lstate, ltstate);
-	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
-}
-
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
-{
-	int timeoff, freq;
-
-	if (!(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	/* First check if we are blinking. If not, use 1HZ polling */
-	timeoff = HZ;
-	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
-
-	if (freq) {
-		/* For blink, set each phase from one nybble of val */
-		dd->ipath_led_override_vals[0] = val & 0xF;
-		dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
-		timeoff = (HZ << 4)/freq;
-	} else {
-		/* Non-blink set both phases the same. */
-		dd->ipath_led_override_vals[0] = val & 0xF;
-		dd->ipath_led_override_vals[1] = val & 0xF;
-	}
-	dd->ipath_led_override_timeoff = timeoff;
-
-	/*
-	 * If the timer has not already been started, do so. Use a "quick"
-	 * timeout so the function will be called soon, to look at our request.
-	 */
-	if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
-		/* Need to start timer */
-		setup_timer(&dd->ipath_led_override_timer,
-				ipath_run_led_override, (unsigned long)dd);
-
-		dd->ipath_led_override_timer.expires = jiffies + 1;
-		add_timer(&dd->ipath_led_override_timer);
-	} else
-		atomic_dec(&dd->ipath_led_override_timer_active);
-}
-
-/**
- * ipath_shutdown_device - shut down a device
- * @dd: the infinipath device
- *
- * This is called to make the device quiet when we are about to
- * unload the driver, and also when the device is administratively
- * disabled.   It does not free any data structures.
- * Everything it does has to be setup again by ipath_init_chip(dd,1)
- */
-void ipath_shutdown_device(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	ipath_dbg("Shutting down the device\n");
-
-	ipath_hol_up(dd); /* make sure user processes aren't suspended */
-
-	dd->ipath_flags |= IPATH_LINKUNK;
-	dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
-			     IPATH_LINKINIT | IPATH_LINKARMED |
-			     IPATH_LINKACTIVE);
-	*dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
-				IPATH_STATUS_IB_READY);
-
-	/* mask interrupts, but not errors */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-	dd->ipath_rcvctrl = 0;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		teardown_sdma(dd);
-
-	/*
-	 * gracefully stop all sends allowing any in progress to trickle out
-	 * first.
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl = 0;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	/* flush it */
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/*
-	 * enough for anything that's going to trickle out to have actually
-	 * done so.
-	 */
-	udelay(5);
-
-	dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
-
-	ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
-	ipath_cancel_sends(dd, 0);
-
-	/*
-	 * we are shutting down, so tell components that care.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	signal_ib_event(dd, IB_EVENT_PORT_ERR);
-
-	/* disable IBC */
-	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control | INFINIPATH_C_FREEZEMODE);
-
-	/*
-	 * clear SerdesEnable and turn the leds off; do this here because
-	 * we are unloading, so don't count on interrupts to move along
-	 * Turn the LEDs off explicitly for the same reason.
-	 */
-	dd->ipath_f_quiet_serdes(dd);
-
-	/* stop all the timers that might still be running */
-	del_timer_sync(&dd->ipath_hol_timer);
-	if (dd->ipath_stats_timer_active) {
-		del_timer_sync(&dd->ipath_stats_timer);
-		dd->ipath_stats_timer_active = 0;
-	}
-	if (dd->ipath_intrchk_timer.data) {
-		del_timer_sync(&dd->ipath_intrchk_timer);
-		dd->ipath_intrchk_timer.data = 0;
-	}
-	if (atomic_read(&dd->ipath_led_override_timer_active)) {
-		del_timer_sync(&dd->ipath_led_override_timer);
-		atomic_set(&dd->ipath_led_override_timer_active, 0);
-	}
-
-	/*
-	 * clear all interrupts and errors, so that the next time the driver
-	 * is loaded or device is enabled, we know that whatever is set
-	 * happened while we were unloaded
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-	ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
-	ipath_update_eeprom_log(dd);
-}
-
-/**
- * ipath_free_pddata - free a port's allocated data
- * @dd: the infinipath device
- * @pd: the portdata structure
- *
- * free up any allocated data for a port
- * This should not touch anything that would affect a simultaneous
- * re-allocation of port data, because it is called after ipath_mutex
- * is released (and can be called from reinit as well).
- * It should never change any chip state, or global driver state.
- * (The only exception to global state is freeing the port0 port0_skbs.)
- */
-void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
-{
-	if (!pd)
-		return;
-
-	if (pd->port_rcvhdrq) {
-		ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
-			   "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
-			   (unsigned long) pd->port_rcvhdrq_size);
-		dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
-				  pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-		pd->port_rcvhdrq = NULL;
-		if (pd->port_rcvhdrtail_kvaddr) {
-			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					 pd->port_rcvhdrtail_kvaddr,
-					 pd->port_rcvhdrqtailaddr_phys);
-			pd->port_rcvhdrtail_kvaddr = NULL;
-		}
-	}
-	if (pd->port_port && pd->port_rcvegrbuf) {
-		unsigned e;
-
-		for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-			void *base = pd->port_rcvegrbuf[e];
-			size_t size = pd->port_rcvegrbuf_size;
-
-			ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
-				   "chunk %u/%u\n", base,
-				   (unsigned long) size,
-				   e, pd->port_rcvegrbuf_chunks);
-			dma_free_coherent(&dd->pcidev->dev, size,
-				base, pd->port_rcvegrbuf_phys[e]);
-		}
-		kfree(pd->port_rcvegrbuf);
-		pd->port_rcvegrbuf = NULL;
-		kfree(pd->port_rcvegrbuf_phys);
-		pd->port_rcvegrbuf_phys = NULL;
-		pd->port_rcvegrbuf_chunks = 0;
-	} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
-		unsigned e;
-		struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
-
-		dd->ipath_port0_skbinfo = NULL;
-		ipath_cdbg(VERBOSE, "free closed port %d "
-			   "ipath_port0_skbinfo @ %p\n", pd->port_port,
-			   skbinfo);
-		for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
-			if (skbinfo[e].skb) {
-				pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-						 dd->ipath_ibmaxlen,
-						 PCI_DMA_FROMDEVICE);
-				dev_kfree_skb(skbinfo[e].skb);
-			}
-		vfree(skbinfo);
-	}
-	kfree(pd->port_tid_pg_list);
-	vfree(pd->subport_uregbase);
-	vfree(pd->subport_rcvegrbuf);
-	vfree(pd->subport_rcvhdr_base);
-	kfree(pd);
-}
-
-static int __init infinipath_init(void)
-{
-	int ret;
-
-	if (ipath_debug & __IPATH_DBG)
-		printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
-
-	/*
-	 * These must be called before the driver is registered with
-	 * the PCI subsystem.
-	 */
-	idr_init(&unit_table);
-
-	ret = pci_register_driver(&ipath_driver);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Unable to register driver: error %d\n", -ret);
-		goto bail_unit;
-	}
-
-	ret = ipath_init_ipathfs();
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
-		       "ipathfs: error %d\n", -ret);
-		goto bail_pci;
-	}
-
-	goto bail;
-
-bail_pci:
-	pci_unregister_driver(&ipath_driver);
-
-bail_unit:
-	idr_destroy(&unit_table);
-
-bail:
-	return ret;
-}
-
-static void __exit infinipath_cleanup(void)
-{
-	ipath_exit_ipathfs();
-
-	ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
-	pci_unregister_driver(&ipath_driver);
-
-	idr_destroy(&unit_table);
-}
-
-/**
- * ipath_reset_device - reset the chip if possible
- * @unit: the device to reset
- *
- * Whether or not reset is successful, we attempt to re-initialize the chip
- * (that is, much like a driver unload/reload).  We clear the INITTED flag
- * so that the various entry points will fail until we reinitialize.  For
- * now, we only allow this if no user ports are open that use chip resources
- */
-int ipath_reset_device(int unit)
-{
-	int ret, i;
-	struct ipath_devdata *dd = ipath_lookup(unit);
-	unsigned long flags;
-
-	if (!dd) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (atomic_read(&dd->ipath_led_override_timer_active)) {
-		/* Need to stop LED timer, _then_ shut off LEDs */
-		del_timer_sync(&dd->ipath_led_override_timer);
-		atomic_set(&dd->ipath_led_override_timer_active, 0);
-	}
-
-	/* Shut off LEDs after we are sure timer is not running */
-	dd->ipath_led_override = LED_OVER_BOTH_OFF;
-	dd->ipath_f_setextled(dd, 0, 0);
-
-	dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
-
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
-		dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
-			 "not initialized or not present\n", unit);
-		ret = -ENXIO;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	if (dd->ipath_pd)
-		for (i = 1; i < dd->ipath_cfgports; i++) {
-			if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-				continue;
-			spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-			ipath_dbg("unit %u port %d is in use "
-				  "(PID %u cmd %s), can't reset\n",
-				  unit, i,
-				  pid_nr(dd->ipath_pd[i]->port_pid),
-				  dd->ipath_pd[i]->port_comm);
-			ret = -EBUSY;
-			goto bail;
-		}
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		teardown_sdma(dd);
-
-	dd->ipath_flags &= ~IPATH_INITTED;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-	ret = dd->ipath_f_reset(dd);
-	if (ret == 1) {
-		ipath_dbg("Reinitializing unit %u after reset attempt\n",
-			  unit);
-		ret = ipath_init_chip(dd, 1);
-	} else
-		ret = -EAGAIN;
-	if (ret)
-		ipath_dev_err(dd, "Reinitialize unit %u after "
-			      "reset failed with %d\n", unit, ret);
-	else
-		dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
-			 "resetting\n", unit);
-
-bail:
-	return ret;
-}
-
-/*
- * send a signal to all the processes that have the driver open
- * through the normal interfaces (i.e., everything other than diags
- * interface).  Returns number of signalled processes.
- */
-static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
-{
-	int i, sub, any = 0;
-	struct pid *pid;
-	unsigned long flags;
-
-	if (!dd->ipath_pd)
-		return 0;
-
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
-			continue;
-		pid = dd->ipath_pd[i]->port_pid;
-		if (!pid)
-			continue;
-
-		dev_info(&dd->pcidev->dev, "context %d in use "
-			  "(PID %u), sending signal %d\n",
-			  i, pid_nr(pid), sig);
-		kill_pid(pid, sig, 1);
-		any++;
-		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
-			pid = dd->ipath_pd[i]->port_subpid[sub];
-			if (!pid)
-				continue;
-			dev_info(&dd->pcidev->dev, "sub-context "
-				"%d:%d in use (PID %u), sending "
-				"signal %d\n", i, sub, pid_nr(pid), sig);
-			kill_pid(pid, sig, 1);
-			any++;
-		}
-	}
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-	return any;
-}
-
-static void ipath_hol_signal_down(struct ipath_devdata *dd)
-{
-	if (ipath_signal_procs(dd, SIGSTOP))
-		ipath_dbg("Stopped some processes\n");
-	ipath_cancel_sends(dd, 1);
-}
-
-
-static void ipath_hol_signal_up(struct ipath_devdata *dd)
-{
-	if (ipath_signal_procs(dd, SIGCONT))
-		ipath_dbg("Continued some processes\n");
-}
-
-/*
- * link is down, stop any users processes, and flush pending sends
- * to prevent HoL blocking, then start the HoL timer that
- * periodically continues, then stop procs, so they can detect
- * link down if they want, and do something about it.
- * Timer may already be running, so use mod_timer, not add_timer.
- */
-void ipath_hol_down(struct ipath_devdata *dd)
-{
-	dd->ipath_hol_state = IPATH_HOL_DOWN;
-	ipath_hol_signal_down(dd);
-	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-	dd->ipath_hol_timer.expires = jiffies +
-		msecs_to_jiffies(ipath_hol_timeout_ms);
-	mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
-}
-
-/*
- * link is up, continue any user processes, and ensure timer
- * is a nop, if running.  Let timer keep running, if set; it
- * will nop when it sees the link is up
- */
-void ipath_hol_up(struct ipath_devdata *dd)
-{
-	ipath_hol_signal_up(dd);
-	dd->ipath_hol_state = IPATH_HOL_UP;
-}
-
-/*
- * toggle the running/not running state of user proceses
- * to prevent HoL blocking on chip resources, but still allow
- * user processes to do link down special case handling.
- * Should only be called via the timer
- */
-void ipath_hol_event(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
-		&& dd->ipath_hol_state != IPATH_HOL_UP) {
-		dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
-		ipath_dbg("Stopping processes\n");
-		ipath_hol_signal_down(dd);
-	} else { /* may do "extra" if also in ipath_hol_up() */
-		dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
-		ipath_dbg("Continuing processes\n");
-		ipath_hol_signal_up(dd);
-	}
-	if (dd->ipath_hol_state == IPATH_HOL_UP)
-		ipath_dbg("link's up, don't resched timer\n");
-	else {
-		dd->ipath_hol_timer.expires = jiffies +
-			msecs_to_jiffies(ipath_hol_timeout_ms);
-		mod_timer(&dd->ipath_hol_timer,
-			dd->ipath_hol_timer.expires);
-	}
-}
-
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
-{
-	u64 val;
-
-	if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
-		return -1;
-	if (dd->ipath_rx_pol_inv != new_pol_inv) {
-		dd->ipath_rx_pol_inv = new_pol_inv;
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-			 INFINIPATH_XGXS_RX_POL_SHIFT);
-		val |= ((u64)dd->ipath_rx_pol_inv) <<
-			INFINIPATH_XGXS_RX_POL_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-	}
-	return 0;
-}
-
-/*
- * Disable and enable the armlaunch error.  Used for PIO bandwidth testing on
- * the 7220, which is count-based, rather than trigger-based.  Safe for the
- * driver check, since it's at init.   Not completely safe when used for
- * user-mode checking, since some error checking can be lost, but not
- * particularly risky, and only has problematic side-effects in the face of
- * very buggy user code.  There is no reference counting, but that's also
- * fine, given the intended use.
- */
-void ipath_enable_armlaunch(struct ipath_devdata *dd)
-{
-	dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-		INFINIPATH_E_SPIOARMLAUNCH);
-	dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-}
-
-void ipath_disable_armlaunch(struct ipath_devdata *dd)
-{
-	/* so don't re-enable if already set */
-	dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-}
-
-module_init(infinipath_init);
-module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644
index ef84107c7ce0..000000000000
--- a/drivers/staging/rdma/ipath/ipath_eeprom.c
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-
-/*
- * InfiniPath I2C driver for a serial eeprom.  This is not a generic
- * I2C interface.  For a start, the device we're using (Atmel AT24C11)
- * doesn't work like a regular I2C device.  It looks like one
- * electrically, but not logically.  Normal I2C devices have a single
- * 7-bit or 10-bit I2C address that they respond to.  Valid 7-bit
- * addresses range from 0x03 to 0x77.  Addresses 0x00 to 0x02 and 0x78
- * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
- * call" address.)  The Atmel device, on the other hand, responds to ALL
- * 7-bit addresses.  It's designed to be the only device on a given I2C
- * bus.  A 7-bit address corresponds to the memory address within the
- * Atmel device itself.
- *
- * Also, the timing requirements mean more than simple software
- * bitbanging, with readbacks from chip to ensure timing (simple udelay
- * is not enough).
- *
- * This all means that accessing the device is specialized enough
- * that using the standard kernel I2C bitbanging interface would be
- * impossible.  For example, the core I2C eeprom driver expects to find
- * a device at one or more of a limited set of addresses only.  It doesn't
- * allow writing to an eeprom.  It also doesn't provide any means of
- * accessing eeprom contents from within the kernel, only via sysfs.
- */
-
-/* Added functionality for IBA7220-based cards */
-#define IPATH_EEPROM_DEV_V1 0xA0
-#define IPATH_EEPROM_DEV_V2 0xA2
-#define IPATH_TEMP_DEV 0x98
-#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
-#define IPATH_NO_DEV (0xFF)
-
-/*
- * The number of I2C chains is proliferating. Table below brings
- * some order to the madness. The basic principle is that the
- * table is scanned from the top, and a "probe" is made to the
- * device probe_dev. If that succeeds, the chain is considered
- * to be of that type, and dd->i2c_chain_type is set to the index+1
- * of the entry.
- * The +1 is so static initialization can mean "unknown, do probe."
- */
-static struct i2c_chain_desc {
-	u8 probe_dev;	/* If seen at probe, chain is this type */
-	u8 eeprom_dev;	/* Dev addr (if any) for EEPROM */
-	u8 temp_dev;	/* Dev Addr (if any) for Temp-sense */
-} i2c_chains[] = {
-	{ IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
-	{ IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
-	{ IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
-	{ IPATH_NO_DEV }
-};
-
-enum i2c_type {
-	i2c_line_scl = 0,
-	i2c_line_sda
-};
-
-enum i2c_state {
-	i2c_line_low = 0,
-	i2c_line_high
-};
-
-#define READ_CMD 1
-#define WRITE_CMD 0
-
-/**
- * i2c_gpio_set - set a GPIO line
- * @dd: the infinipath device
- * @line: the line to set
- * @new_line_state: the state to set
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.
- */
-static int i2c_gpio_set(struct ipath_devdata *dd,
-			enum i2c_type line,
-			enum i2c_state new_line_state)
-{
-	u64 out_mask, dir_mask, *gpioval;
-	unsigned long flags = 0;
-
-	gpioval = &dd->ipath_gpio_out;
-
-	if (line == i2c_line_scl) {
-		dir_mask = dd->ipath_gpio_scl;
-		out_mask = (1UL << dd->ipath_gpio_scl_num);
-	} else {
-		dir_mask = dd->ipath_gpio_sda;
-		out_mask = (1UL << dd->ipath_gpio_sda_num);
-	}
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	if (new_line_state == i2c_line_high) {
-		/* tri-state the output rather than force high */
-		dd->ipath_extctrl &= ~dir_mask;
-	} else {
-		/* config line to be an output */
-		dd->ipath_extctrl |= dir_mask;
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-
-	/* set output as well (no real verify) */
-	if (new_line_state == i2c_line_high)
-		*gpioval |= out_mask;
-	else
-		*gpioval &= ~out_mask;
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	return 0;
-}
-
-/**
- * i2c_gpio_get - get a GPIO line state
- * @dd: the infinipath device
- * @line: the line to get
- * @curr_statep: where to put the line state
- *
- * Returns 0 if the line was set to the new state successfully, non-zero
- * on error.  curr_state is not set on error.
- */
-static int i2c_gpio_get(struct ipath_devdata *dd,
-			enum i2c_type line,
-			enum i2c_state *curr_statep)
-{
-	u64 read_val, mask;
-	int ret;
-	unsigned long flags = 0;
-
-	/* check args */
-	if (curr_statep == NULL) {
-		ret = 1;
-		goto bail;
-	}
-
-	/* config line to be an input */
-	if (line == i2c_line_scl)
-		mask = dd->ipath_gpio_scl;
-	else
-		mask = dd->ipath_gpio_sda;
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	dd->ipath_extctrl &= ~mask;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
-	/*
-	 * Below is very unlikely to reflect true input state if Output
-	 * Enable actually changed.
-	 */
-	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	if (read_val & mask)
-		*curr_statep = i2c_line_high;
-	else
-		*curr_statep = i2c_line_low;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * i2c_wait_for_writes - wait for a write
- * @dd: the infinipath device
- *
- * We use this instead of udelay directly, so we can make sure
- * that previous register writes have been flushed all the way
- * to the chip.  Since we are delaying anyway, the cost doesn't
- * hurt, and makes the bit twiddling more regular
- */
-static void i2c_wait_for_writes(struct ipath_devdata *dd)
-{
-	(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-	rmb();
-}
-
-static void scl_out(struct ipath_devdata *dd, u8 bit)
-{
-	udelay(1);
-	i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
-
-	i2c_wait_for_writes(dd);
-}
-
-static void sda_out(struct ipath_devdata *dd, u8 bit)
-{
-	i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
-
-	i2c_wait_for_writes(dd);
-}
-
-static u8 sda_in(struct ipath_devdata *dd, int wait)
-{
-	enum i2c_state bit;
-
-	if (i2c_gpio_get(dd, i2c_line_sda, &bit))
-		ipath_dbg("get bit failed!\n");
-
-	if (wait)
-		i2c_wait_for_writes(dd);
-
-	return bit == i2c_line_high ? 1U : 0;
-}
-
-/**
- * i2c_ackrcv - see if ack following write is true
- * @dd: the infinipath device
- */
-static int i2c_ackrcv(struct ipath_devdata *dd)
-{
-	u8 ack_received;
-
-	/* AT ENTRY SCL = LOW */
-	/* change direction, ignore data */
-	ack_received = sda_in(dd, 1);
-	scl_out(dd, i2c_line_high);
-	ack_received = sda_in(dd, 1) == 0;
-	scl_out(dd, i2c_line_low);
-	return ack_received;
-}
-
-/**
- * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
- * @dd: the infinipath device
- *
- * Returns byte shifted out of device
- */
-static int rd_byte(struct ipath_devdata *dd)
-{
-	int bit_cntr, data;
-
-	data = 0;
-
-	for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
-		data <<= 1;
-		scl_out(dd, i2c_line_high);
-		data |= sda_in(dd, 0);
-		scl_out(dd, i2c_line_low);
-	}
-	return data;
-}
-
-/**
- * wr_byte - write a byte, one bit at a time
- * @dd: the infinipath device
- * @data: the byte to write
- *
- * Returns 0 if we got the following ack, otherwise 1
- */
-static int wr_byte(struct ipath_devdata *dd, u8 data)
-{
-	int bit_cntr;
-	u8 bit;
-
-	for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
-		bit = (data >> bit_cntr) & 1;
-		sda_out(dd, bit);
-		scl_out(dd, i2c_line_high);
-		scl_out(dd, i2c_line_low);
-	}
-	return (!i2c_ackrcv(dd)) ? 1 : 0;
-}
-
-static void send_ack(struct ipath_devdata *dd)
-{
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_high);
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_high);
-}
-
-/**
- * i2c_startcmd - transmit the start condition, followed by address/cmd
- * @dd: the infinipath device
- * @offset_dir: direction byte
- *
- *      (both clock/data high, clock high, data low while clock is high)
- */
-static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
-{
-	int res;
-
-	/* issue start sequence */
-	sda_out(dd, i2c_line_high);
-	scl_out(dd, i2c_line_high);
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_low);
-
-	/* issue length and direction byte */
-	res = wr_byte(dd, offset_dir);
-
-	if (res)
-		ipath_cdbg(VERBOSE, "No ack to complete start\n");
-
-	return res;
-}
-
-/**
- * stop_cmd - transmit the stop condition
- * @dd: the infinipath device
- *
- * (both clock/data low, clock high, data high while clock is high)
- */
-static void stop_cmd(struct ipath_devdata *dd)
-{
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_low);
-	scl_out(dd, i2c_line_high);
-	sda_out(dd, i2c_line_high);
-	udelay(2);
-}
-
-/**
- * eeprom_reset - reset I2C communication
- * @dd: the infinipath device
- */
-
-static int eeprom_reset(struct ipath_devdata *dd)
-{
-	int clock_cycles_left = 9;
-	u64 *gpioval = &dd->ipath_gpio_out;
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	/* Make sure shadows are consistent */
-	dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-	*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
-	ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
-		   "is %llx\n", (unsigned long long) *gpioval);
-
-	/*
-	 * This is to get the i2c into a known state, by first going low,
-	 * then tristate sda (and then tristate scl as first thing
-	 * in loop)
-	 */
-	scl_out(dd, i2c_line_low);
-	sda_out(dd, i2c_line_high);
-
-	/* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
-	while (clock_cycles_left--) {
-		scl_out(dd, i2c_line_high);
-
-		/* SDA seen high, issue START by dropping it while SCL high */
-		if (sda_in(dd, 0)) {
-			sda_out(dd, i2c_line_low);
-			scl_out(dd, i2c_line_low);
-			/* ATMEL spec says must be followed by STOP. */
-			scl_out(dd, i2c_line_high);
-			sda_out(dd, i2c_line_high);
-			ret = 0;
-			goto bail;
-		}
-
-		scl_out(dd, i2c_line_low);
-	}
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/*
- * Probe for I2C device at specified address. Returns 0 for "success"
- * to match rest of this file.
- * Leave bus in "reasonable" state for further commands.
- */
-static int i2c_probe(struct ipath_devdata *dd, int devaddr)
-{
-	int ret;
-
-	ret = eeprom_reset(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
-			      devaddr);
-		return ret;
-	}
-	/*
-	 * Reset no longer leaves bus in start condition, so normal
-	 * i2c_startcmd() will do.
-	 */
-	ret = i2c_startcmd(dd, devaddr | READ_CMD);
-	if (ret)
-		ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
-			   devaddr);
-	else {
-		/*
-		 * Device did respond. Complete a single-byte read, because some
-		 * devices apparently cannot handle STOP immediately after they
-		 * ACK the start-cmd.
-		 */
-		int data;
-		data = rd_byte(dd);
-		stop_cmd(dd);
-		ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
-	}
-	return ret;
-}
-
-/*
- * Returns the "i2c type". This is a pointer to a struct that describes
- * the I2C chain on this board. To minimize impact on struct ipath_devdata,
- * the (small integer) index into the table is actually memoized, rather
- * then the pointer.
- * Memoization is because the type is determined on the first call per chip.
- * An alternative would be to move type determination to early
- * init code.
- */
-static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
-{
-	int idx;
-
-	/* Get memoized index, from previous successful probes */
-	idx = dd->ipath_i2c_chain_type - 1;
-	if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
-		goto done;
-
-	idx = 0;
-	while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
-		/* if probe succeeds, this is type */
-		if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
-			break;
-		++idx;
-	}
-
-	/*
-	 * Old EEPROM (first entry) may require a reset after probe,
-	 * rather than being able to "start" after "stop"
-	 */
-	if (idx == 0)
-		eeprom_reset(dd);
-
-	if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
-		idx = -1;
-	else
-		dd->ipath_i2c_chain_type = idx + 1;
-done:
-	return (idx >= 0) ? i2c_chains + idx : NULL;
-}
-
-static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
-					u8 eeprom_offset, void *buffer, int len)
-{
-	int ret;
-	struct i2c_chain_desc *icd;
-	u8 *bp = buffer;
-
-	ret = 1;
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->eeprom_dev == IPATH_NO_DEV) {
-		/* legacy not-really-I2C */
-		ipath_cdbg(VERBOSE, "Start command only address\n");
-		eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-		ret = i2c_startcmd(dd, eeprom_offset);
-	} else {
-		/* Actual I2C */
-		ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
-		if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-			ipath_dbg("Failed EEPROM startcmd\n");
-			stop_cmd(dd);
-			ret = 1;
-			goto bail;
-		}
-		ret = wr_byte(dd, eeprom_offset);
-		stop_cmd(dd);
-		if (ret) {
-			ipath_dev_err(dd, "Failed to write EEPROM address\n");
-			ret = 1;
-			goto bail;
-		}
-		ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
-	}
-	if (ret) {
-		ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
-		stop_cmd(dd);
-		ret = 1;
-		goto bail;
-	}
-
-	/*
-	 * eeprom keeps clocking data out as long as we ack, automatically
-	 * incrementing the address.
-	 */
-	while (len-- > 0) {
-		/* get and store data */
-		*bp++ = rd_byte(dd);
-		/* send ack if not the last byte */
-		if (len)
-			send_ack(dd);
-	}
-
-	stop_cmd(dd);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
-				       const void *buffer, int len)
-{
-	int sub_len;
-	const u8 *bp = buffer;
-	int max_wait_time, i;
-	int ret;
-	struct i2c_chain_desc *icd;
-
-	ret = 1;
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	while (len > 0) {
-		if (icd->eeprom_dev == IPATH_NO_DEV) {
-			if (i2c_startcmd(dd,
-					 (eeprom_offset << 1) | WRITE_CMD)) {
-				ipath_dbg("Failed to start cmd offset %u\n",
-					eeprom_offset);
-				goto failed_write;
-			}
-		} else {
-			/* Real I2C */
-			if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
-				ipath_dbg("Failed EEPROM startcmd\n");
-				goto failed_write;
-			}
-			ret = wr_byte(dd, eeprom_offset);
-			if (ret) {
-				ipath_dev_err(dd, "Failed to write EEPROM "
-					      "address\n");
-				goto failed_write;
-			}
-		}
-
-		sub_len = min(len, 4);
-		eeprom_offset += sub_len;
-		len -= sub_len;
-
-		for (i = 0; i < sub_len; i++) {
-			if (wr_byte(dd, *bp++)) {
-				ipath_dbg("no ack after byte %u/%u (%u "
-					  "total remain)\n", i, sub_len,
-					  len + sub_len - i);
-				goto failed_write;
-			}
-		}
-
-		stop_cmd(dd);
-
-		/*
-		 * wait for write complete by waiting for a successful
-		 * read (the chip replies with a zero after the write
-		 * cmd completes, and before it writes to the eeprom.
-		 * The startcmd for the read will fail the ack until
-		 * the writes have completed.   We do this inline to avoid
-		 * the debug prints that are in the real read routine
-		 * if the startcmd fails.
-		 * We also use the proper device address, so it doesn't matter
-		 * whether we have real eeprom_dev. legacy likes any address.
-		 */
-		max_wait_time = 100;
-		while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
-			stop_cmd(dd);
-			if (!--max_wait_time) {
-				ipath_dbg("Did not get successful read to "
-					  "complete write\n");
-				goto failed_write;
-			}
-		}
-		/* now read (and ignore) the resulting byte */
-		rd_byte(dd);
-		stop_cmd(dd);
-	}
-
-	ret = 0;
-	goto bail;
-
-failed_write:
-	stop_cmd(dd);
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
- */
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-			void *buff, int len)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	return ret;
-}
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-			const void *buff, int len)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	return ret;
-}
-
-static u8 flash_csum(struct ipath_flash *ifp, int adjust)
-{
-	u8 *ip = (u8 *) ifp;
-	u8 csum = 0, len;
-
-	/*
-	 * Limit length checksummed to max length of actual data.
-	 * Checksum of erased eeprom will still be bad, but we avoid
-	 * reading past the end of the buffer we were passed.
-	 */
-	len = ifp->if_length;
-	if (len > sizeof(struct ipath_flash))
-		len = sizeof(struct ipath_flash);
-	while (len--)
-		csum += *ip++;
-	csum -= ifp->if_csum;
-	csum = ~csum;
-	if (adjust)
-		ifp->if_csum = csum;
-
-	return csum;
-}
-
-/**
- * ipath_get_guid - get the GUID from the i2c device
- * @dd: the infinipath device
- *
- * We have the capability to use the ipath_nguid field, and get
- * the guid from the first chip's flash, to use for all of them.
- */
-void ipath_get_eeprom_info(struct ipath_devdata *dd)
-{
-	void *buf;
-	struct ipath_flash *ifp;
-	__be64 guid;
-	int len, eep_stat;
-	u8 csum, *bguid;
-	int t = dd->ipath_unit;
-	struct ipath_devdata *dd0 = ipath_lookup(0);
-
-	if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
-		u8 oguid;
-		dd->ipath_guid = dd0->ipath_guid;
-		bguid = (u8 *) & dd->ipath_guid;
-
-		oguid = bguid[7];
-		bguid[7] += t;
-		if (oguid > bguid[7]) {
-			if (bguid[6] == 0xff) {
-				if (bguid[5] == 0xff) {
-					ipath_dev_err(
-						dd,
-						"Can't set %s GUID from "
-						"base, wraps to OUI!\n",
-						ipath_get_unit_name(t));
-					dd->ipath_guid = 0;
-					goto bail;
-				}
-				bguid[5]++;
-			}
-			bguid[6]++;
-		}
-		dd->ipath_nguid = 1;
-
-		ipath_dbg("nguid %u, so adding %u to device 0 guid, "
-			  "for %llx\n",
-			  dd0->ipath_nguid, t,
-			  (unsigned long long) be64_to_cpu(dd->ipath_guid));
-		goto bail;
-	}
-
-	/*
-	 * read full flash, not just currently used part, since it may have
-	 * been written with a newer definition
-	 * */
-	len = sizeof(struct ipath_flash);
-	buf = vmalloc(len);
-	if (!buf) {
-		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-			      "bytes from eeprom for GUID\n", len);
-		goto bail;
-	}
-
-	mutex_lock(&dd->ipath_eep_lock);
-	eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
-	mutex_unlock(&dd->ipath_eep_lock);
-
-	if (eep_stat) {
-		ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
-		goto done;
-	}
-	ifp = (struct ipath_flash *)buf;
-
-	csum = flash_csum(ifp, 0);
-	if (csum != ifp->if_csum) {
-		dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
-			 "0x%x, not 0x%x\n", csum, ifp->if_csum);
-		goto done;
-	}
-	if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
-	    *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
-		ipath_dev_err(dd, "Invalid GUID %llx from flash; "
-			      "ignoring\n",
-			      *(unsigned long long *) ifp->if_guid);
-		/* don't allow GUID if all 0 or all 1's */
-		goto done;
-	}
-
-	/* complain, but allow it */
-	if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
-		dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
-			 "default, probably not correct!\n",
-			 *(unsigned long long *) ifp->if_guid);
-
-	bguid = ifp->if_guid;
-	if (!bguid[0] && !bguid[1] && !bguid[2]) {
-		/* original incorrect GUID format in flash; fix in
-		 * core copy, by shifting up 2 octets; don't need to
-		 * change top octet, since both it and shifted are
-		 * 0.. */
-		bguid[1] = bguid[3];
-		bguid[2] = bguid[4];
-		bguid[3] = bguid[4] = 0;
-		guid = *(__be64 *) ifp->if_guid;
-		ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
-			   "shifting 2 octets\n");
-	} else
-		guid = *(__be64 *) ifp->if_guid;
-	dd->ipath_guid = guid;
-	dd->ipath_nguid = ifp->if_numguid;
-	/*
-	 * Things are slightly complicated by the desire to transparently
-	 * support both the Pathscale 10-digit serial number and the QLogic
-	 * 13-character version.
-	 */
-	if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
-		&& ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
-		/* This board has a Serial-prefix, which is stored
-		 * elsewhere for backward-compatibility.
-		 */
-		char *snp = dd->ipath_serial;
-		memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
-		snp[sizeof ifp->if_sprefix] = '\0';
-		len = strlen(snp);
-		snp += len;
-		len = (sizeof dd->ipath_serial) - len;
-		if (len > sizeof ifp->if_serial) {
-			len = sizeof ifp->if_serial;
-		}
-		memcpy(snp, ifp->if_serial, len);
-	} else
-		memcpy(dd->ipath_serial, ifp->if_serial,
-		       sizeof ifp->if_serial);
-	if (!strstr(ifp->if_comment, "Tested successfully"))
-		ipath_dev_err(dd, "Board SN %s did not pass functional "
-			"test: %s\n", dd->ipath_serial,
-			ifp->if_comment);
-
-	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
-		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
-
-	memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
-	/*
-	 * Power-on (actually "active") hours are kept as little-endian value
-	 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
-	 * atomic_t while running.
-	 */
-	atomic_set(&dd->ipath_active_time, 0);
-	dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
-
-done:
-	vfree(buf);
-
-bail:;
-}
-
-/**
- * ipath_update_eeprom_log - copy active-time and error counters to eeprom
- * @dd: the infinipath device
- *
- * Although the time is kept as seconds in the ipath_devdata struct, it is
- * rounded to hours for re-write, as we have only 16 bits in EEPROM.
- * First-cut code reads whole (expected) struct ipath_flash, modifies,
- * re-writes. Future direction: read/write only what we need, assuming
- * that the EEPROM had to have been "good enough" for driver init, and
- * if not, we aren't making it worse.
- *
- */
-
-int ipath_update_eeprom_log(struct ipath_devdata *dd)
-{
-	void *buf;
-	struct ipath_flash *ifp;
-	int len, hi_water;
-	uint32_t new_time, new_hrs;
-	u8 csum;
-	int ret, idx;
-	unsigned long flags;
-
-	/* first, check if we actually need to do anything. */
-	ret = 0;
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		if (dd->ipath_eep_st_new_errs[idx]) {
-			ret = 1;
-			break;
-		}
-	}
-	new_time = atomic_read(&dd->ipath_active_time);
-
-	if (ret == 0 && new_time < 3600)
-		return 0;
-
-	/*
-	 * The quick-check above determined that there is something worthy
-	 * of logging, so get current contents and do a more detailed idea.
-	 * read full flash, not just currently used part, since it may have
-	 * been written with a newer definition
-	 */
-	len = sizeof(struct ipath_flash);
-	buf = vmalloc(len);
-	ret = 1;
-	if (!buf) {
-		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
-				"bytes from eeprom for logging\n", len);
-		goto bail;
-	}
-
-	/* Grab semaphore and read current EEPROM. If we get an
-	 * error, let go, but if not, keep it until we finish write.
-	 */
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (ret) {
-		ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
-		goto free_bail;
-	}
-	ret = ipath_eeprom_internal_read(dd, 0, buf, len);
-	if (ret) {
-		mutex_unlock(&dd->ipath_eep_lock);
-		ipath_dev_err(dd, "Unable read EEPROM for logging\n");
-		goto free_bail;
-	}
-	ifp = (struct ipath_flash *)buf;
-
-	csum = flash_csum(ifp, 0);
-	if (csum != ifp->if_csum) {
-		mutex_unlock(&dd->ipath_eep_lock);
-		ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
-				csum, ifp->if_csum);
-		ret = 1;
-		goto free_bail;
-	}
-	hi_water = 0;
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		int new_val = dd->ipath_eep_st_new_errs[idx];
-		if (new_val) {
-			/*
-			 * If we have seen any errors, add to EEPROM values
-			 * We need to saturate at 0xFF (255) and we also
-			 * would need to adjust the checksum if we were
-			 * trying to minimize EEPROM traffic
-			 * Note that we add to actual current count in EEPROM,
-			 * in case it was altered while we were running.
-			 */
-			new_val += ifp->if_errcntp[idx];
-			if (new_val > 0xFF)
-				new_val = 0xFF;
-			if (ifp->if_errcntp[idx] != new_val) {
-				ifp->if_errcntp[idx] = new_val;
-				hi_water = offsetof(struct ipath_flash,
-						if_errcntp) + idx;
-			}
-			/*
-			 * update our shadow (used to minimize EEPROM
-			 * traffic), to match what we are about to write.
-			 */
-			dd->ipath_eep_st_errs[idx] = new_val;
-			dd->ipath_eep_st_new_errs[idx] = 0;
-		}
-	}
-	/*
-	 * now update active-time. We would like to round to the nearest hour
-	 * but unless atomic_t are sure to be proper signed ints we cannot,
-	 * because we need to account for what we "transfer" to EEPROM and
-	 * if we log an hour at 31 minutes, then we would need to set
-	 * active_time to -29 to accurately count the _next_ hour.
-	 */
-	if (new_time >= 3600) {
-		new_hrs = new_time / 3600;
-		atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
-		new_hrs += dd->ipath_eep_hrs;
-		if (new_hrs > 0xFFFF)
-			new_hrs = 0xFFFF;
-		dd->ipath_eep_hrs = new_hrs;
-		if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
-			ifp->if_powerhour[0] = new_hrs & 0xFF;
-			hi_water = offsetof(struct ipath_flash, if_powerhour);
-		}
-		if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
-			ifp->if_powerhour[1] = new_hrs >> 8;
-			hi_water = offsetof(struct ipath_flash, if_powerhour)
-					+ 1;
-		}
-	}
-	/*
-	 * There is a tiny possibility that we could somehow fail to write
-	 * the EEPROM after updating our shadows, but problems from holding
-	 * the spinlock too long are a much bigger issue.
-	 */
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-	if (hi_water) {
-		/* we made some change to the data, uopdate cksum and write */
-		csum = flash_csum(ifp, 1);
-		ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
-	}
-	mutex_unlock(&dd->ipath_eep_lock);
-	if (ret)
-		ipath_dev_err(dd, "Failed updating EEPROM\n");
-
-free_bail:
-	vfree(buf);
-bail:
-	return ret;
-
-}
-
-/**
- * ipath_inc_eeprom_err - increment one of the four error counters
- * that are logged to EEPROM.
- * @dd: the infinipath device
- * @eidx: 0..3, the counter to increment
- * @incr: how much to add
- *
- * Each counter is 8-bits, and saturates at 255 (0xFF). They
- * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
- * is called, but it can only be called in a context that allows sleep.
- * This function can be called even at interrupt level.
- */
-
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
-{
-	uint new_val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
-	if (new_val > 255)
-		new_val = 255;
-	dd->ipath_eep_st_new_errs[eidx] = new_val;
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-	return;
-}
-
-static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
-{
-	int ret;
-	struct i2c_chain_desc *icd;
-
-	ret = -ENOENT;
-
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->temp_dev == IPATH_NO_DEV) {
-		/* tempsense only exists on new, real-I2C boards */
-		ret = -ENXIO;
-		goto bail;
-	}
-
-	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-		ipath_dbg("Failed tempsense startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, regnum);
-	stop_cmd(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
-			      regnum);
-		ret = -ENXIO;
-		goto bail;
-	}
-	if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
-		ipath_dbg("Failed tempsense RD startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	/*
-	 * We can only clock out one byte per command, sensibly
-	 */
-	ret = rd_byte(dd);
-	stop_cmd(dd);
-
-bail:
-	return ret;
-}
-
-#define VALID_TS_RD_REG_MASK 0xBF
-
-/**
- * ipath_tempsense_read - read register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to read from
- *
- * returns reg contents (0..255) or < 0 for error
- */
-int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
-{
-	int ret;
-
-	if (regnum > 7)
-		return -EINVAL;
-
-	/* return a bogus value for (the one) register we do not have */
-	if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
-		return 0;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_tempsense_internal_read(dd, regnum);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	/*
-	 * There are three possibilities here:
-	 * ret is actual value (0..255)
-	 * ret is -ENXIO or -EINVAL from code in this file
-	 * ret is -EINTR from mutex_lock_interruptible.
-	 */
-	return ret;
-}
-
-static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
-					  u8 regnum, u8 data)
-{
-	int ret = -ENOENT;
-	struct i2c_chain_desc *icd;
-
-	icd = ipath_i2c_type(dd);
-	if (!icd)
-		goto bail;
-
-	if (icd->temp_dev == IPATH_NO_DEV) {
-		/* tempsense only exists on new, real-I2C boards */
-		ret = -ENXIO;
-		goto bail;
-	}
-	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
-		ipath_dbg("Failed tempsense startcmd\n");
-		stop_cmd(dd);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, regnum);
-	if (ret) {
-		stop_cmd(dd);
-		ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
-			      regnum);
-		ret = -ENXIO;
-		goto bail;
-	}
-	ret = wr_byte(dd, data);
-	stop_cmd(dd);
-	ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
-	if (ret) {
-		ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
-			      regnum);
-		ret = -ENXIO;
-	}
-
-bail:
-	return ret;
-}
-
-#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
-
-/**
- * ipath_tempsense_write - write register of temp sensor via I2C
- * @dd: the infinipath device
- * @regnum: register to write
- * @data: data to write
- *
- * returns 0 for success or < 0 for error
- */
-int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
-{
-	int ret;
-
-	if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
-		return -EINVAL;
-
-	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
-	if (!ret) {
-		ret = ipath_tempsense_internal_write(dd, regnum, data);
-		mutex_unlock(&dd->ipath_eep_lock);
-	}
-
-	/*
-	 * There are three possibilities here:
-	 * ret is 0 for success
-	 * ret is -ENXIO or -EINVAL from code in this file
-	 * ret is -EINTR from mutex_lock_interruptible.
-	 */
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644
index 6187b848b3ca..000000000000
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ /dev/null
@@ -1,2619 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/poll.h>
-#include <linux/cdev.h>
-#include <linux/swap.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/cpu.h>
-#include <linux/uio.h>
-#include <asm/pgtable.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-#include "ipath_user_sdma.h"
-
-static int ipath_open(struct inode *, struct file *);
-static int ipath_close(struct inode *, struct file *);
-static ssize_t ipath_write(struct file *, const char __user *, size_t,
-			   loff_t *);
-static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
-static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
-static int ipath_mmap(struct file *, struct vm_area_struct *);
-
-/*
- * This is really, really weird shit - write() and writev() here
- * have completely unrelated semantics.  Sucky userland ABI,
- * film at 11.
- */
-static const struct file_operations ipath_file_ops = {
-	.owner = THIS_MODULE,
-	.write = ipath_write,
-	.write_iter = ipath_write_iter,
-	.open = ipath_open,
-	.release = ipath_close,
-	.poll = ipath_poll,
-	.mmap = ipath_mmap,
-	.llseek = noop_llseek,
-};
-
-/*
- * Convert kernel virtual addresses to physical addresses so they don't
- * potentially conflict with the chip addresses used as mmap offsets.
- * It doesn't really matter what mmap offset we use as long as we can
- * interpret it correctly.
- */
-static u64 cvt_kvaddr(void *p)
-{
-	struct page *page;
-	u64 paddr = 0;
-
-	page = vmalloc_to_page(p);
-	if (page)
-		paddr = page_to_pfn(page) << PAGE_SHIFT;
-
-	return paddr;
-}
-
-static int ipath_get_base_info(struct file *fp,
-			       void __user *ubase, size_t ubase_size)
-{
-	struct ipath_portdata *pd = port_fp(fp);
-	int ret = 0;
-	struct ipath_base_info *kinfo = NULL;
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned subport_cnt;
-	int shared, master;
-	size_t sz;
-
-	subport_cnt = pd->port_subport_cnt;
-	if (!subport_cnt) {
-		shared = 0;
-		master = 0;
-		subport_cnt = 1;
-	} else {
-		shared = 1;
-		master = !subport_fp(fp);
-	}
-
-	sz = sizeof(*kinfo);
-	/* If port sharing is not requested, allow the old size structure */
-	if (!shared)
-		sz -= 7 * sizeof(u64);
-	if (ubase_size < sz) {
-		ipath_cdbg(PROC,
-			   "Base size %zu, need %zu (version mismatch?)\n",
-			   ubase_size, sz);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
-	if (kinfo == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	ret = dd->ipath_f_get_base_info(pd, kinfo);
-	if (ret < 0)
-		goto bail;
-
-	kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
-	kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
-	kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
-	kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
-	/*
-	 * have to mmap whole thing
-	 */
-	kinfo->spi_rcv_egrbuftotlen =
-		pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
-	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
-		pd->port_rcvegrbuf_chunks;
-	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
-	if (master)
-		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
-	/*
-	 * for this use, may be ipath_cfgports summed over all chips that
-	 * are are configured and present
-	 */
-	kinfo->spi_nports = dd->ipath_cfgports;
-	/* unit (chip/board) our port is on */
-	kinfo->spi_unit = dd->ipath_unit;
-	/* for now, only a single page */
-	kinfo->spi_tid_maxsize = PAGE_SIZE;
-
-	/*
-	 * Doing this per port, and based on the skip value, etc.  This has
-	 * to be the actual buffer size, since the protocol code treats it
-	 * as an array.
-	 *
-	 * These have to be set to user addresses in the user code via mmap.
-	 * These values are used on return to user code for the mmap target
-	 * addresses only.  For 32 bit, same 44 bit address problem, so use
-	 * the physical address, not virtual.  Before 2.6.11, using the
-	 * page_address() macro worked, but in 2.6.11, even that returns the
-	 * full 64 bit address (upper bits all 1's).  So far, using the
-	 * physical addresses (or chip offsets, for chip mapping) works, but
-	 * no doubt some future kernel release will change that, and we'll be
-	 * on to yet another method of dealing with this.
-	 */
-	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
-	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
-	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
-	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
-		(void *) dd->ipath_statusp -
-		(void *) dd->ipath_pioavailregs_dma;
-	if (!shared) {
-		kinfo->spi_piocnt = pd->port_piocnt;
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_ureg_align * pd->port_port;
-	} else if (master) {
-		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
-				    (pd->port_piocnt % subport_cnt);
-		/* Master's PIO buffers are after all the slave's */
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-			dd->ipath_palign *
-			(pd->port_piocnt - kinfo->spi_piocnt);
-	} else {
-		unsigned slave = subport_fp(fp) - 1;
-
-		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
-		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
-			dd->ipath_palign * kinfo->spi_piocnt * slave;
-	}
-
-	if (shared) {
-		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_ureg_align * pd->port_port;
-		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
-		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
-		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
-
-		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
-			PAGE_SIZE * subport_fp(fp));
-
-		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * subport_fp(fp));
-		kinfo->spi_rcvhdr_tailaddr = 0;
-		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
-			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
-			subport_fp(fp));
-
-		kinfo->spi_subport_uregbase =
-			cvt_kvaddr(pd->subport_uregbase);
-		kinfo->spi_subport_rcvegrbuf =
-			cvt_kvaddr(pd->subport_rcvegrbuf);
-		kinfo->spi_subport_rcvhdr_base =
-			cvt_kvaddr(pd->subport_rcvhdr_base);
-		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-			kinfo->spi_port, kinfo->spi_runtime_flags,
-			(unsigned long long) kinfo->spi_subport_uregbase,
-			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
-			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
-	}
-
-	/*
-	 * All user buffers are 2KB buffers.  If we ever support
-	 * giving 4KB buffers to user processes, this will need some
-	 * work.
-	 */
-	kinfo->spi_pioindex = (kinfo->spi_piobufbase -
-		(dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
-	kinfo->spi_pioalign = dd->ipath_palign;
-
-	kinfo->spi_qpair = IPATH_KD_QP;
-	/*
-	 * user mode PIO buffers are always 2KB, even when 4KB can
-	 * be received, and sent via the kernel; this is ibmaxlen
-	 * for 2K MTU.
-	 */
-	kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
-	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
-	kinfo->spi_port = pd->port_port;
-	kinfo->spi_subport = subport_fp(fp);
-	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
-	kinfo->spi_hw_version = dd->ipath_revision;
-
-	if (master) {
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-	}
-
-	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
-	if (copy_to_user(ubase, kinfo, sz))
-		ret = -EFAULT;
-
-bail:
-	kfree(kinfo);
-	return ret;
-}
-
-/**
- * ipath_tid_update - update a port TID
- * @pd: the port
- * @fp: the ipath device file
- * @ti: the TID information
- *
- * The new implementation as of Oct 2004 is that the driver assigns
- * the tid and returns it to the caller.   To make it easier to
- * catch bugs, and to reduce search time, we keep a cursor for
- * each port, walking the shadow tid array to find one that's not
- * in use.
- *
- * For now, if we can't allocate the full list, we fail, although
- * in the long run, we'll allocate as many as we can, and the
- * caller will deal with that by trying the remaining pages later.
- * That means that when we fail, we have to mark the tids as not in
- * use again, in our shadow copy.
- *
- * It's up to the caller to free the tids when they are done.
- * We'll unlock the pages as they free them.
- *
- * Also, right now we are locking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.
- */
-static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
-			    const struct ipath_tid_info *ti)
-{
-	int ret = 0, ntids;
-	u32 tid, porttid, cnt, i, tidcnt, tidoff;
-	u16 *tidlist;
-	struct ipath_devdata *dd = pd->port_dd;
-	u64 physaddr;
-	unsigned long vaddr;
-	u64 __iomem *tidbase;
-	unsigned long tidmap[8];
-	struct page **pagep = NULL;
-	unsigned subport = subport_fp(fp);
-
-	if (!dd->ipath_pageshadow) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	cnt = ti->tidcnt;
-	if (!cnt) {
-		ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
-			  (unsigned long long) ti->tidlist);
-		/*
-		 * Should we treat as success?  likely a bug
-		 */
-		ret = -EFAULT;
-		goto done;
-	}
-	porttid = pd->port_port * dd->ipath_rcvtidcnt;
-	if (!pd->port_subport_cnt) {
-		tidcnt = dd->ipath_rcvtidcnt;
-		tid = pd->port_tidcursor;
-		tidoff = 0;
-	} else if (!subport) {
-		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-		tidoff = dd->ipath_rcvtidcnt - tidcnt;
-		porttid += tidoff;
-		tid = tidcursor_fp(fp);
-	} else {
-		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-		tidoff = tidcnt * (subport - 1);
-		porttid += tidoff;
-		tid = tidcursor_fp(fp);
-	}
-	if (cnt > tidcnt) {
-		/* make sure it all fits in port_tid_pg_list */
-		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
-			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
-		cnt = tidcnt;
-	}
-	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
-	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
-
-	memset(tidmap, 0, sizeof(tidmap));
-	/* before decrement; chip actual # */
-	ntids = tidcnt;
-	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   porttid * sizeof(*tidbase));
-
-	ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
-		   pd->port_port, cnt, tid, tidbase);
-
-	/* virtual address of first page in transfer */
-	vaddr = ti->tidvaddr;
-	if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
-		       cnt * PAGE_SIZE)) {
-		ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
-			  (void *)vaddr, cnt);
-		ret = -EFAULT;
-		goto done;
-	}
-	ret = ipath_get_user_pages(vaddr, cnt, pagep);
-	if (ret) {
-		if (ret == -EBUSY) {
-			ipath_dbg("Failed to lock addr %p, %u pages "
-				  "(already locked)\n",
-				  (void *) vaddr, cnt);
-			/*
-			 * for now, continue, and see what happens but with
-			 * the new implementation, this should never happen,
-			 * unless perhaps the user has mpin'ed the pages
-			 * themselves (something we need to test)
-			 */
-			ret = 0;
-		} else {
-			dev_info(&dd->pcidev->dev,
-				 "Failed to lock addr %p, %u pages: "
-				 "errno %d\n", (void *) vaddr, cnt, -ret);
-			goto done;
-		}
-	}
-	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
-		for (; ntids--; tid++) {
-			if (tid == tidcnt)
-				tid = 0;
-			if (!dd->ipath_pageshadow[porttid + tid])
-				break;
-		}
-		if (ntids < 0) {
-			/*
-			 * oops, wrapped all the way through their TIDs,
-			 * and didn't have enough free; see comments at
-			 * start of routine
-			 */
-			ipath_dbg("Not enough free TIDs for %u pages "
-				  "(index %d), failing\n", cnt, i);
-			i--;	/* last tidlist[i] not filled in */
-			ret = -ENOMEM;
-			break;
-		}
-		tidlist[i] = tid + tidoff;
-		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-			   "vaddr %lx\n", i, tid + tidoff, vaddr);
-		/* we "know" system pages and TID pages are same size */
-		dd->ipath_pageshadow[porttid + tid] = pagep[i];
-		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
-			dd->pcidev, pagep[i], 0, PAGE_SIZE,
-			PCI_DMA_FROMDEVICE);
-		/*
-		 * don't need atomic or it's overhead
-		 */
-		__set_bit(tid, tidmap);
-		physaddr = dd->ipath_physshadow[porttid + tid];
-		ipath_stats.sps_pagelocks++;
-		ipath_cdbg(VERBOSE,
-			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
-			   tid, vaddr, (unsigned long long) physaddr,
-			   pagep[i]);
-		dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
-				    physaddr);
-		/*
-		 * don't check this tid in ipath_portshadow, since we
-		 * just filled it in; start with the next one.
-		 */
-		tid++;
-	}
-
-	if (ret) {
-		u32 limit;
-	cleanup:
-		/* jump here if copy out of updated info failed... */
-		ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
-			  -ret, i, cnt);
-		/* same code that's in ipath_free_tid() */
-		limit = sizeof(tidmap) * BITS_PER_BYTE;
-		if (limit > tidcnt)
-			/* just in case size changes in future */
-			limit = tidcnt;
-		tid = find_first_bit((const unsigned long *)tidmap, limit);
-		for (; tid < limit; tid++) {
-			if (!test_bit(tid, tidmap))
-				continue;
-			if (dd->ipath_pageshadow[porttid + tid]) {
-				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
-					   tid);
-				dd->ipath_f_put_tid(dd, &tidbase[tid],
-						    RCVHQ_RCV_TYPE_EXPECTED,
-						    dd->ipath_tidinvalid);
-				pci_unmap_page(dd->pcidev,
-					dd->ipath_physshadow[porttid + tid],
-					PAGE_SIZE, PCI_DMA_FROMDEVICE);
-				dd->ipath_pageshadow[porttid + tid] = NULL;
-				ipath_stats.sps_pageunlocks++;
-			}
-		}
-		ipath_release_user_pages(pagep, cnt);
-	} else {
-		/*
-		 * Copy the updated array, with ipath_tid's filled in, back
-		 * to user.  Since we did the copy in already, this "should
-		 * never fail" If it does, we have to clean up...
-		 */
-		if (copy_to_user((void __user *)
-				 (unsigned long) ti->tidlist,
-				 tidlist, cnt * sizeof(*tidlist))) {
-			ret = -EFAULT;
-			goto cleanup;
-		}
-		if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
-				 tidmap, sizeof tidmap)) {
-			ret = -EFAULT;
-			goto cleanup;
-		}
-		if (tid == tidcnt)
-			tid = 0;
-		if (!pd->port_subport_cnt)
-			pd->port_tidcursor = tid;
-		else
-			tidcursor_fp(fp) = tid;
-	}
-
-done:
-	if (ret)
-		ipath_dbg("Failed to map %u TID pages, failing with %d\n",
-			  ti->tidcnt, -ret);
-	return ret;
-}
-
-/**
- * ipath_tid_free - free a port TID
- * @pd: the port
- * @subport: the subport
- * @ti: the TID info
- *
- * right now we are unlocking one page at a time, but since
- * the intended use of this routine is for a single group of
- * virtually contiguous pages, that should change to improve
- * performance.  We check that the TID is in range for this port
- * but otherwise don't check validity; if user has an error and
- * frees the wrong tid, it's only their own data that can thereby
- * be corrupted.  We do check that the TID was in use, for sanity
- * We always use our idea of the saved address, not the address that
- * they pass in to us.
- */
-
-static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
-			  const struct ipath_tid_info *ti)
-{
-	int ret = 0;
-	u32 tid, porttid, cnt, limit, tidcnt;
-	struct ipath_devdata *dd = pd->port_dd;
-	u64 __iomem *tidbase;
-	unsigned long tidmap[8];
-
-	if (!dd->ipath_pageshadow) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
-			   sizeof tidmap)) {
-		ret = -EFAULT;
-		goto done;
-	}
-
-	porttid = pd->port_port * dd->ipath_rcvtidcnt;
-	if (!pd->port_subport_cnt)
-		tidcnt = dd->ipath_rcvtidcnt;
-	else if (!subport) {
-		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
-			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
-		porttid += dd->ipath_rcvtidcnt - tidcnt;
-	} else {
-		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
-		porttid += tidcnt * (subport - 1);
-	}
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   porttid * sizeof(*tidbase));
-
-	limit = sizeof(tidmap) * BITS_PER_BYTE;
-	if (limit > tidcnt)
-		/* just in case size changes in future */
-		limit = tidcnt;
-	tid = find_first_bit(tidmap, limit);
-	ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
-		   "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
-		   limit, tid, porttid);
-	for (cnt = 0; tid < limit; tid++) {
-		/*
-		 * small optimization; if we detect a run of 3 or so without
-		 * any set, use find_first_bit again.  That's mainly to
-		 * accelerate the case where we wrapped, so we have some at
-		 * the beginning, and some at the end, and a big gap
-		 * in the middle.
-		 */
-		if (!test_bit(tid, tidmap))
-			continue;
-		cnt++;
-		if (dd->ipath_pageshadow[porttid + tid]) {
-			struct page *p;
-			p = dd->ipath_pageshadow[porttid + tid];
-			dd->ipath_pageshadow[porttid + tid] = NULL;
-			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
-				   pid_nr(pd->port_pid), tid);
-			dd->ipath_f_put_tid(dd, &tidbase[tid],
-					    RCVHQ_RCV_TYPE_EXPECTED,
-					    dd->ipath_tidinvalid);
-			pci_unmap_page(dd->pcidev,
-				dd->ipath_physshadow[porttid + tid],
-				PAGE_SIZE, PCI_DMA_FROMDEVICE);
-			ipath_release_user_pages(&p, 1);
-			ipath_stats.sps_pageunlocks++;
-		} else
-			ipath_dbg("Unused tid %u, ignoring\n", tid);
-	}
-	if (cnt != ti->tidcnt)
-		ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
-			  ti->tidcnt, cnt);
-done:
-	if (ret)
-		ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
-			  ti->tidcnt, -ret);
-	return ret;
-}
-
-/**
- * ipath_set_part_key - set a partition key
- * @pd: the port
- * @key: the key
- *
- * We can have up to 4 active at a time (other than the default, which is
- * always allowed).  This is somewhat tricky, since multiple ports may set
- * the same key, so we reference count them, and clean up at exit.  All 4
- * partition keys are packed into a single infinipath register.  It's an
- * error for a process to set the same pkey multiple times.  We provide no
- * mechanism to de-allocate a pkey at this time, we may eventually need to
- * do that.  I've used the atomic operations, and no locking, and only make
- * a single pass through what's available.  This should be more than
- * adequate for some time. I'll think about spinlocks or the like if and as
- * it's necessary.
- */
-static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	int i, any = 0, pidx = -1;
-	u16 lkey = key & 0x7FFF;
-	int ret;
-
-	if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
-		/* nothing to do; this key always valid */
-		ret = 0;
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
-		   "%hx:%x %hx:%x %hx:%x %hx:%x\n",
-		   pd->port_port, key, dd->ipath_pkeys[0],
-		   atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
-		   atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
-		   atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
-		   atomic_read(&dd->ipath_pkeyrefs[3]));
-
-	if (!lkey) {
-		ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
-			   pd->port_port);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Set the full membership bit, because it has to be
-	 * set in the register or the packet, and it seems
-	 * cleaner to set in the register than to force all
-	 * callers to set it. (see bug 4331)
-	 */
-	key |= 0x8000;
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		if (!pd->port_pkeys[i] && pidx == -1)
-			pidx = i;
-		if (pd->port_pkeys[i] == key) {
-			ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
-				   "(%x) more than once\n",
-				   pd->port_port, key);
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (pidx == -1) {
-		ipath_dbg("All pkeys for port %u already in use, "
-			  "can't set %x\n", pd->port_port, key);
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		if (dd->ipath_pkeys[i] == key) {
-			atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
-
-			if (atomic_inc_return(pkrefs) > 1) {
-				pd->port_pkeys[pidx] = key;
-				ipath_cdbg(VERBOSE, "p%u set key %x "
-					   "matches #%d, count now %d\n",
-					   pd->port_port, key, i,
-					   atomic_read(pkrefs));
-				ret = 0;
-				goto bail;
-			} else {
-				/*
-				 * lost race, decrement count, catch below
-				 */
-				atomic_dec(pkrefs);
-				ipath_cdbg(VERBOSE, "Lost race, count was "
-					   "0, after dec, it's %d\n",
-					   atomic_read(pkrefs));
-				any++;
-			}
-		}
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			/*
-			 * It makes no sense to have both the limited and
-			 * full membership PKEY set at the same time since
-			 * the unlimited one will disable the limited one.
-			 */
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ipath_dbg("port %u, all pkeys already in use, "
-			  "can't set %x\n", pd->port_port, key);
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			u64 pkey;
-
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
-			pkey =
-				(u64) dd->ipath_pkeys[0] |
-				((u64) dd->ipath_pkeys[1] << 16) |
-				((u64) dd->ipath_pkeys[2] << 32) |
-				((u64) dd->ipath_pkeys[3] << 48);
-			ipath_cdbg(PROC, "p%u set key %x in #%d, "
-				   "portidx %d, new pkey reg %llx\n",
-				   pd->port_port, key, i, pidx,
-				   (unsigned long long) pkey);
-			ipath_write_kreg(
-				dd, dd->ipath_kregs->kr_partitionkey, pkey);
-
-			ret = 0;
-			goto bail;
-		}
-	}
-	ipath_dbg("port %u, all pkeys already in use 2nd pass, "
-		  "can't set %x\n", pd->port_port, key);
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_manage_rcvq - manage a port's receive queue
- * @pd: the port
- * @subport: the subport
- * @start_stop: action to carry out
- *
- * start_stop == 0 disables receive on the port, for use in queue
- * overflow conditions.  start_stop==1 re-enables, to be used to
- * re-init the software copy of the head register
- */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
-			     int start_stop)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-
-	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
-		   start_stop ? "en" : "dis", dd->ipath_unit,
-		   pd->port_port, subport);
-	if (subport)
-		goto bail;
-	/* atomically clear receive enable port. */
-	if (start_stop) {
-		/*
-		 * On enable, force in-memory copy of the tail register to
-		 * 0, so that protocol code doesn't have to worry about
-		 * whether or not the chip has yet updated the in-memory
-		 * copy or not on return from the system call. The chip
-		 * always resets it's tail register back to 0 on a
-		 * transition from disabled to enabled.  This could cause a
-		 * problem if software was broken, and did the enable w/o
-		 * the disable, but eventually the in-memory copy will be
-		 * updated and correct itself, even in the face of software
-		 * bugs.
-		 */
-		if (pd->port_rcvhdrtail_kvaddr)
-			ipath_clear_rcvhdrtail(pd);
-		set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-			&dd->ipath_rcvctrl);
-	} else
-		clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
-			  &dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-	/* now be sure chip saw it before we return */
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	if (start_stop) {
-		/*
-		 * And try to be sure that tail reg update has happened too.
-		 * This should in theory interlock with the RXE changes to
-		 * the tail register.  Don't assign it to the tail register
-		 * in memory copy, since we could overwrite an update by the
-		 * chip if we did.
-		 */
-		ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-	}
-	/* always; new head should be equal to new tail; see above */
-bail:
-	return 0;
-}
-
-static void ipath_clean_part_key(struct ipath_portdata *pd,
-				 struct ipath_devdata *dd)
-{
-	int i, j, pchanged = 0;
-	u64 oldpkey;
-
-	/* for debugging only */
-	oldpkey = (u64) dd->ipath_pkeys[0] |
-		((u64) dd->ipath_pkeys[1] << 16) |
-		((u64) dd->ipath_pkeys[2] << 32) |
-		((u64) dd->ipath_pkeys[3] << 48);
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		if (!pd->port_pkeys[i])
-			continue;
-		ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
-			   pd->port_pkeys[i]);
-		for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
-			/* check for match independent of the global bit */
-			if ((dd->ipath_pkeys[j] & 0x7fff) !=
-			    (pd->port_pkeys[i] & 0x7fff))
-				continue;
-			if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
-				ipath_cdbg(VERBOSE, "p%u clear key "
-					   "%x matches #%d\n",
-					   pd->port_port,
-					   pd->port_pkeys[i], j);
-				ipath_stats.sps_pkeys[j] =
-					dd->ipath_pkeys[j] = 0;
-				pchanged++;
-			} else {
-				ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
-					   "but ref still %d\n", pd->port_port,
-					   pd->port_pkeys[i], j,
-					   atomic_read(&dd->ipath_pkeyrefs[j]));
-				break;
-			}
-		}
-		pd->port_pkeys[i] = 0;
-	}
-	if (pchanged) {
-		u64 pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
-			   "new pkey reg %llx\n", pd->port_port,
-			   (unsigned long long) oldpkey,
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-	}
-}
-
-/*
- * Initialize the port data with the receive buffer sizes
- * so this can be done while the master port is locked.
- * Otherwise, there is a race with a slave opening the port
- * and seeing these fields uninitialized.
- */
-static void init_user_egr_sizes(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned egrperchunk, egrcnt, size;
-
-	/*
-	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
-	 * physically contiguous memory, advance through it until used up
-	 * and then allocate more.  Of course, we need memory to store those
-	 * extra pointers, now.  Started out with 256KB, but under heavy
-	 * memory pressure (creating large files and then copying them over
-	 * NFS while doing lots of MPI jobs), we hit some allocation
-	 * failures, even though we can sleep...  (2.6.10) Still get
-	 * failures at 64K.  32K is the lowest we can go without wasting
-	 * additional memory.
-	 */
-	size = 0x8000;
-	egrperchunk = size / dd->ipath_rcvegrbufsize;
-	egrcnt = dd->ipath_rcvegrcnt;
-	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
-	pd->port_rcvegrbufs_perchunk = egrperchunk;
-	pd->port_rcvegrbuf_size = size;
-}
-
-/**
- * ipath_create_user_egr - allocate eager TID buffers
- * @pd: the port to allocate TID buffers for
- *
- * This routine is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance
- * This is the user port version
- *
- * Allocate the eager TID buffers and program them into infinipath
- * They are no longer completely contiguous, we do multiple allocation
- * calls.
- */
-static int ipath_create_user_egr(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-	size_t size;
-	int ret;
-	gfp_t gfp_flags;
-
-	/*
-	 * GFP_USER, but without GFP_FS, so buffer cache can be
-	 * coalesced (we hope); otherwise, even at order 4,
-	 * heavy filesystem activity makes these fail, and we can
-	 * use compound pages.
-	 */
-	gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
-	egrcnt = dd->ipath_rcvegrcnt;
-	/* TID number offset for this port */
-	egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
-	egrsize = dd->ipath_rcvegrbufsize;
-	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
-		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
-
-	chunk = pd->port_rcvegrbuf_chunks;
-	egrperchunk = pd->port_rcvegrbufs_perchunk;
-	size = pd->port_rcvegrbuf_size;
-	pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
-					   GFP_KERNEL);
-	if (!pd->port_rcvegrbuf) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	pd->port_rcvegrbuf_phys =
-		kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
-			      GFP_KERNEL);
-	if (!pd->port_rcvegrbuf_phys) {
-		ret = -ENOMEM;
-		goto bail_rcvegrbuf;
-	}
-	for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
-
-		pd->port_rcvegrbuf[e] = dma_alloc_coherent(
-			&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
-			gfp_flags);
-
-		if (!pd->port_rcvegrbuf[e]) {
-			ret = -ENOMEM;
-			goto bail_rcvegrbuf_phys;
-		}
-	}
-
-	pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
-
-	for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
-		dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
-		unsigned i;
-
-		for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
-			dd->ipath_f_put_tid(dd, e + egroff +
-					    (u64 __iomem *)
-					    ((char __iomem *)
-					     dd->ipath_kregbase +
-					     dd->ipath_rcvegrbase),
-					    RCVHQ_RCV_TYPE_EAGER, pa);
-			pa += egrsize;
-		}
-		cond_resched();	/* don't hog the cpu */
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_rcvegrbuf_phys:
-	for (e = 0; e < pd->port_rcvegrbuf_chunks &&
-		pd->port_rcvegrbuf[e]; e++) {
-		dma_free_coherent(&dd->pcidev->dev, size,
-				  pd->port_rcvegrbuf[e],
-				  pd->port_rcvegrbuf_phys[e]);
-
-	}
-	kfree(pd->port_rcvegrbuf_phys);
-	pd->port_rcvegrbuf_phys = NULL;
-bail_rcvegrbuf:
-	kfree(pd->port_rcvegrbuf);
-	pd->port_rcvegrbuf = NULL;
-bail:
-	return ret;
-}
-
-
-/* common code for the mappings on dma_alloc_coherent mem */
-static int ipath_mmap_mem(struct vm_area_struct *vma,
-	struct ipath_portdata *pd, unsigned len, int write_ok,
-	void *kvaddr, char *what)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned long pfn;
-	int ret;
-
-	if ((vma->vm_end - vma->vm_start) > len) {
-		dev_info(&dd->pcidev->dev,
-		         "FAIL on %s: len %lx > %x\n", what,
-			 vma->vm_end - vma->vm_start, len);
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	if (!write_ok) {
-		if (vma->vm_flags & VM_WRITE) {
-			dev_info(&dd->pcidev->dev,
-				 "%s must be mapped readonly\n", what);
-			ret = -EPERM;
-			goto bail;
-		}
-
-		/* don't allow them to later change with mprotect */
-		vma->vm_flags &= ~VM_MAYWRITE;
-	}
-
-	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
-	ret = remap_pfn_range(vma, vma->vm_start, pfn,
-			      len, vma->vm_page_prot);
-	if (ret)
-		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
-			 "bytes r%c failed: %d\n", what, pd->port_port,
-			 pfn, len, write_ok?'w':'o', ret);
-	else
-		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
-			   "r%c\n", what, pd->port_port, pfn, len,
-			   write_ok?'w':'o');
-bail:
-	return ret;
-}
-
-static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
-		     u64 ureg)
-{
-	unsigned long phys;
-	int ret;
-
-	/*
-	 * This is real hardware, so use io_remap.  This is the mechanism
-	 * for the user process to update the head registers for their port
-	 * in the chip.
-	 */
-	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
-		dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
-			 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
-		ret = -EFAULT;
-	} else {
-		phys = dd->ipath_physaddr + ureg;
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 phys >> PAGE_SHIFT,
-					 vma->vm_end - vma->vm_start,
-					 vma->vm_page_prot);
-	}
-	return ret;
-}
-
-static int mmap_piobufs(struct vm_area_struct *vma,
-			struct ipath_devdata *dd,
-			struct ipath_portdata *pd,
-			unsigned piobufs, unsigned piocnt)
-{
-	unsigned long phys;
-	int ret;
-
-	/*
-	 * When we map the PIO buffers in the chip, we want to map them as
-	 * writeonly, no read possible.   This prevents access to previous
-	 * process data, and catches users who might try to read the i/o
-	 * space due to a bug.
-	 */
-	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
-		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
-			 "reqlen %lx > PAGE\n",
-			 vma->vm_end - vma->vm_start);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	phys = dd->ipath_physaddr + piobufs;
-
-#if defined(__powerpc__)
-	/* There isn't a generic way to specify writethrough mappings */
-	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
-	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
-	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
-#endif
-
-	/*
-	 * don't allow them to later change to readable with mprotect (for when
-	 * not initially mapped readable, as is normally the case)
-	 */
-	vma->vm_flags &= ~VM_MAYREAD;
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
-
-	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
-				 vma->vm_end - vma->vm_start,
-				 vma->vm_page_prot);
-bail:
-	return ret;
-}
-
-static int mmap_rcvegrbufs(struct vm_area_struct *vma,
-			   struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	unsigned long start, size;
-	size_t total_size, i;
-	unsigned long pfn;
-	int ret;
-
-	size = pd->port_rcvegrbuf_size;
-	total_size = pd->port_rcvegrbuf_chunks * size;
-	if ((vma->vm_end - vma->vm_start) > total_size) {
-		dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
-			 "reqlen %lx > actual %lx\n",
-			 vma->vm_end - vma->vm_start,
-			 (unsigned long) total_size);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (vma->vm_flags & VM_WRITE) {
-		dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
-			 "writable (flags=%lx)\n", vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
-	/* don't allow them to later change to writeable with mprotect */
-	vma->vm_flags &= ~VM_MAYWRITE;
-
-	start = vma->vm_start;
-
-	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
-		ret = remap_pfn_range(vma, start, pfn, size,
-				      vma->vm_page_prot);
-		if (ret < 0)
-			goto bail;
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/*
- * ipath_file_vma_fault - handle a VMA page fault.
- */
-static int ipath_file_vma_fault(struct vm_area_struct *vma,
-					struct vm_fault *vmf)
-{
-	struct page *page;
-
-	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
-	if (!page)
-		return VM_FAULT_SIGBUS;
-	get_page(page);
-	vmf->page = page;
-
-	return 0;
-}
-
-static const struct vm_operations_struct ipath_file_vm_ops = {
-	.fault = ipath_file_vma_fault,
-};
-
-static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-		       struct ipath_portdata *pd, unsigned subport)
-{
-	unsigned long len;
-	struct ipath_devdata *dd;
-	void *addr;
-	size_t size;
-	int ret = 0;
-
-	/* If the port is not shared, all addresses should be physical */
-	if (!pd->port_subport_cnt)
-		goto bail;
-
-	dd = pd->port_dd;
-	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
-
-	/*
-	 * Each process has all the subport uregbase, rcvhdrq, and
-	 * rcvegrbufs mmapped - as an array for all the processes,
-	 * and also separately for this process.
-	 */
-	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
-		addr = pd->subport_uregbase;
-		size = PAGE_SIZE * pd->port_subport_cnt;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
-		addr = pd->subport_rcvhdr_base;
-		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
-	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
-		addr = pd->subport_rcvegrbuf;
-		size *= pd->port_subport_cnt;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
-                                        PAGE_SIZE * subport)) {
-                addr = pd->subport_uregbase + PAGE_SIZE * subport;
-                size = PAGE_SIZE;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
-                                pd->port_rcvhdrq_size * subport)) {
-                addr = pd->subport_rcvhdr_base +
-                        pd->port_rcvhdrq_size * subport;
-                size = pd->port_rcvhdrq_size;
-        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
-                               size * subport)) {
-                addr = pd->subport_rcvegrbuf + size * subport;
-                /* rcvegrbufs are read-only on the slave */
-                if (vma->vm_flags & VM_WRITE) {
-                        dev_info(&dd->pcidev->dev,
-                                 "Can't map eager buffers as "
-                                 "writable (flags=%lx)\n", vma->vm_flags);
-                        ret = -EPERM;
-                        goto bail;
-                }
-                /*
-                 * Don't allow permission to later change to writeable
-                 * with mprotect.
-                 */
-                vma->vm_flags &= ~VM_MAYWRITE;
-	} else {
-		goto bail;
-	}
-	len = vma->vm_end - vma->vm_start;
-	if (len > size) {
-		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-	vma->vm_ops = &ipath_file_vm_ops;
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_mmap - mmap various structures into user space
- * @fp: the file pointer
- * @vma: the VM area
- *
- * We use this to have a shared buffer between the kernel and the user code
- * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
- * buffers in the chip.  We have the open and close entries so we can bump
- * the ref count and keep the driver from being unloaded while still mapped.
- */
-static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
-{
-	struct ipath_portdata *pd;
-	struct ipath_devdata *dd;
-	u64 pgaddr, ureg;
-	unsigned piobufs, piocnt;
-	int ret;
-
-	pd = port_fp(fp);
-	if (!pd) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	dd = pd->port_dd;
-
-	/*
-	 * This is the ipath_do_user_init() code, mapping the shared buffers
-	 * into the user process. The address referred to by vm_pgoff is the
-	 * file offset passed via mmap().  For shared ports, this is the
-	 * kernel vmalloc() address of the pages to share with the master.
-	 * For non-shared or master ports, this is a physical address.
-	 * We only do one mmap for each space mapped.
-	 */
-	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
-
-	/*
-	 * Check for 0 in case one of the allocations failed, but user
-	 * called mmap anyway.
-	 */
-	if (!pgaddr)  {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
-		   (unsigned long long) pgaddr, vma->vm_start,
-		   vma->vm_end - vma->vm_start, dd->ipath_unit,
-		   pd->port_port, subport_fp(fp));
-
-	/*
-	 * Physical addresses must fit in 40 bits for our hardware.
-	 * Check for kernel virtual addresses first, anything else must
-	 * match a HW or memory address.
-	 */
-	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
-	if (ret) {
-		if (ret > 0)
-			ret = 0;
-		goto bail;
-	}
-
-	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
-	if (!pd->port_subport_cnt) {
-		/* port is not shared */
-		piocnt = pd->port_piocnt;
-		piobufs = pd->port_piobufs;
-	} else if (!subport_fp(fp)) {
-		/* caller is the master */
-		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
-			 (pd->port_piocnt % pd->port_subport_cnt);
-		piobufs = pd->port_piobufs +
-			dd->ipath_palign * (pd->port_piocnt - piocnt);
-	} else {
-		unsigned slave = subport_fp(fp) - 1;
-
-		/* caller is a slave */
-		piocnt = pd->port_piocnt / pd->port_subport_cnt;
-		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
-	}
-
-	if (pgaddr == ureg)
-		ret = mmap_ureg(vma, dd, ureg);
-	else if (pgaddr == piobufs)
-		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
-	else if (pgaddr == dd->ipath_pioavailregs_phys)
-		/* in-memory copy of pioavail registers */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-			      	     (void *) dd->ipath_pioavailregs_dma,
-				     "pioavail registers");
-	else if (pgaddr == pd->port_rcvegr_phys)
-		ret = mmap_rcvegrbufs(vma, pd);
-	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
-		/*
-		 * The rcvhdrq itself; readonly except on HT (so have
-		 * to allow writable mapping), multiple pages, contiguous
-		 * from an i/o perspective.
-		 */
-		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
-				     pd->port_rcvhdrq,
-				     "rcvhdrq");
-	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
-		/* in-memory copy of rcvhdrq tail register */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     pd->port_rcvhdrtail_kvaddr,
-				     "rcvhdrq tail");
-	else
-		ret = -EINVAL;
-
-	vma->vm_private_data = NULL;
-
-	if (ret < 0)
-		dev_info(&dd->pcidev->dev,
-			 "Failure %d on off %llx len %lx\n",
-			 -ret, (unsigned long long)pgaddr,
-			 vma->vm_end - vma->vm_start);
-bail:
-	return ret;
-}
-
-static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
-{
-	unsigned pollflag = 0;
-
-	if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
-	    pd->port_hdrqfull != pd->port_hdrqfull_poll) {
-		pollflag |= POLLIN | POLLRDNORM;
-		pd->port_hdrqfull_poll = pd->port_hdrqfull;
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
-				      struct file *fp,
-				      struct poll_table_struct *pt)
-{
-	unsigned pollflag = 0;
-	struct ipath_devdata *dd;
-
-	dd = pd->port_dd;
-
-	/* variable access in ipath_poll_hdrqfull() needs this */
-	rmb();
-	pollflag = ipath_poll_hdrqfull(pd);
-
-	if (pd->port_urgent != pd->port_urgent_poll) {
-		pollflag |= POLLIN | POLLRDNORM;
-		pd->port_urgent_poll = pd->port_urgent;
-	}
-
-	if (!pollflag) {
-		/* this saves a spin_lock/unlock in interrupt handler... */
-		set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-		/* flush waiting flag so don't miss an event... */
-		wmb();
-		poll_wait(fp, &pd->port_wait, pt);
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll_next(struct ipath_portdata *pd,
-				    struct file *fp,
-				    struct poll_table_struct *pt)
-{
-	u32 head;
-	u32 tail;
-	unsigned pollflag = 0;
-	struct ipath_devdata *dd;
-
-	dd = pd->port_dd;
-
-	/* variable access in ipath_poll_hdrqfull() needs this */
-	rmb();
-	pollflag = ipath_poll_hdrqfull(pd);
-
-	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-	if (pd->port_rcvhdrtail_kvaddr)
-		tail = ipath_get_rcvhdrtail(pd);
-	else
-		tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
-
-	if (head != tail)
-		pollflag |= POLLIN | POLLRDNORM;
-	else {
-		/* this saves a spin_lock/unlock in interrupt handler */
-		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-		/* flush waiting flag so we don't miss an event */
-		wmb();
-
-		set_bit(pd->port_port + dd->ipath_r_intravail_shift,
-			&dd->ipath_rcvctrl);
-
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-				 dd->ipath_rcvctrl);
-
-		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-			ipath_write_ureg(dd, ur_rcvhdrhead,
-					 dd->ipath_rhdrhead_intr_off | head,
-					 pd->port_port);
-
-		poll_wait(fp, &pd->port_wait, pt);
-	}
-
-	return pollflag;
-}
-
-static unsigned int ipath_poll(struct file *fp,
-			       struct poll_table_struct *pt)
-{
-	struct ipath_portdata *pd;
-	unsigned pollflag;
-
-	pd = port_fp(fp);
-	if (!pd)
-		pollflag = 0;
-	else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
-		pollflag = ipath_poll_urgent(pd, fp, pt);
-	else
-		pollflag = ipath_poll_next(pd, fp, pt);
-
-	return pollflag;
-}
-
-static int ipath_supports_subports(int user_swmajor, int user_swminor)
-{
-	/* no subport implementation prior to software version 1.3 */
-	return (user_swmajor > 1) || (user_swminor >= 3);
-}
-
-static int ipath_compatible_subports(int user_swmajor, int user_swminor)
-{
-	/* this code is written long-hand for clarity */
-	if (IPATH_USER_SWMAJOR != user_swmajor) {
-		/* no promise of compatibility if major mismatch */
-		return 0;
-	}
-	if (IPATH_USER_SWMAJOR == 1) {
-		switch (IPATH_USER_SWMINOR) {
-		case 0:
-		case 1:
-		case 2:
-			/* no subport implementation so cannot be compatible */
-			return 0;
-		case 3:
-			/* 3 is only compatible with itself */
-			return user_swminor == 3;
-		default:
-			/* >= 4 are compatible (or are expected to be) */
-			return user_swminor >= 4;
-		}
-	}
-	/* make no promises yet for future major versions */
-	return 0;
-}
-
-static int init_subports(struct ipath_devdata *dd,
-			 struct ipath_portdata *pd,
-			 const struct ipath_user_info *uinfo)
-{
-	int ret = 0;
-	unsigned num_subports;
-	size_t size;
-
-	/*
-	 * If the user is requesting zero subports,
-	 * skip the subport allocation.
-	 */
-	if (uinfo->spu_subport_cnt <= 0)
-		goto bail;
-
-	/* Self-consistency check for ipath_compatible_subports() */
-	if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
-	    !ipath_compatible_subports(IPATH_USER_SWMAJOR,
-				       IPATH_USER_SWMINOR)) {
-		dev_info(&dd->pcidev->dev,
-			 "Inconsistent ipath_compatible_subports()\n");
-		goto bail;
-	}
-
-	/* Check for subport compatibility */
-	if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
-				       uinfo->spu_userversion & 0xffff)) {
-		dev_info(&dd->pcidev->dev,
-			 "Mismatched user version (%d.%d) and driver "
-			 "version (%d.%d) while port sharing. Ensure "
-                         "that driver and library are from the same "
-                         "release.\n",
-			 (int) (uinfo->spu_userversion >> 16),
-                         (int) (uinfo->spu_userversion & 0xffff),
-			 IPATH_USER_SWMAJOR,
-	                 IPATH_USER_SWMINOR);
-		goto bail;
-	}
-	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	num_subports = uinfo->spu_subport_cnt;
-	pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
-	if (!pd->subport_uregbase) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
-	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-		     sizeof(u32), PAGE_SIZE) * num_subports;
-	pd->subport_rcvhdr_base = vzalloc(size);
-	if (!pd->subport_rcvhdr_base) {
-		ret = -ENOMEM;
-		goto bail_ureg;
-	}
-
-	pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
-					pd->port_rcvegrbuf_size *
-					num_subports);
-	if (!pd->subport_rcvegrbuf) {
-		ret = -ENOMEM;
-		goto bail_rhdr;
-	}
-
-	pd->port_subport_cnt = uinfo->spu_subport_cnt;
-	pd->port_subport_id = uinfo->spu_subport_id;
-	pd->active_slaves = 1;
-	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-	goto bail;
-
-bail_rhdr:
-	vfree(pd->subport_rcvhdr_base);
-bail_ureg:
-	vfree(pd->subport_uregbase);
-	pd->subport_uregbase = NULL;
-bail:
-	return ret;
-}
-
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-			  struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	struct ipath_portdata *pd;
-	int ret;
-
-	if (!(pd = dd->ipath_pd[port])) {
-		void *ptmp;
-
-		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
-
-		/*
-		 * Allocate memory for use in ipath_tid_update() just once
-		 * at open, not per call.  Reduces cost of expected send
-		 * setup.
-		 */
-		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
-			       dd->ipath_rcvtidcnt * sizeof(struct page **),
-			       GFP_KERNEL);
-		if (!pd || !ptmp) {
-			ipath_dev_err(dd, "Unable to allocate portdata "
-				      "memory, failing open\n");
-			ret = -ENOMEM;
-			kfree(pd);
-			kfree(ptmp);
-			goto bail;
-		}
-		dd->ipath_pd[port] = pd;
-		dd->ipath_pd[port]->port_port = port;
-		dd->ipath_pd[port]->port_dd = dd;
-		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
-		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
-	}
-	if (!pd->port_cnt) {
-		pd->userversion = uinfo->spu_userversion;
-		init_user_egr_sizes(pd);
-		if ((ret = init_subports(dd, pd, uinfo)) != 0)
-			goto bail;
-		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
-			   current->comm, current->pid, dd->ipath_unit,
-			   port);
-		pd->port_cnt = 1;
-		port_fp(fp) = pd;
-		pd->port_pid = get_pid(task_pid(current));
-		strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-		ipath_stats.sps_ports++;
-		ret = 0;
-	} else
-		ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-static inline int usable(struct ipath_devdata *dd)
-{
-	return dd &&
-		(dd->ipath_flags & IPATH_PRESENT) &&
-		dd->ipath_kregbase &&
-		dd->ipath_lid &&
-		!(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
-				     | IPATH_LINKUNK));
-}
-
-static int find_free_port(int unit, struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	struct ipath_devdata *dd = ipath_lookup(unit);
-	int ret, i;
-
-	if (!dd) {
-		ret = -ENODEV;
-		goto bail;
-	}
-
-	if (!usable(dd)) {
-		ret = -ENETDOWN;
-		goto bail;
-	}
-
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		ret = try_alloc_port(dd, i, fp, uinfo);
-		if (ret != -EBUSY)
-			goto bail;
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-static int find_best_unit(struct file *fp,
-			  const struct ipath_user_info *uinfo)
-{
-	int ret = 0, i, prefunit = -1, devmax;
-	int maxofallports, npresent, nup;
-	int ndev;
-
-	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
-
-	/*
-	 * This code is present to allow a knowledgeable person to
-	 * specify the layout of processes to processors before opening
-	 * this driver, and then we'll assign the process to the "closest"
-	 * InfiniPath chip to that processor (we assume reasonable connectivity,
-	 * for now).  This code assumes that if affinity has been set
-	 * before this point, that at most one cpu is set; for now this
-	 * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
-	 * in case some kernel variant sets none of the bits when no
-	 * affinity is set.  2.6.11 and 12 kernels have all present
-	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT chips connected
-	 * in tunnel fashion.  Eventually this needs real topology
-	 * information.  There may be some issues with dual core numbering
-	 * as well.  This needs more work prior to release.
-	 */
-	if (!cpumask_empty(tsk_cpus_allowed(current)) &&
-	    !cpumask_full(tsk_cpus_allowed(current))) {
-		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
-		get_online_cpus();
-		for_each_online_cpu(i)
-			if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
-				ipath_cdbg(PROC, "%s[%u] affinity set for "
-					   "cpu %d/%d\n", current->comm,
-					   current->pid, i, ncpus);
-				curcpu = i;
-				nset++;
-			}
-		put_online_cpus();
-		if (curcpu != -1 && nset != ncpus) {
-			if (npresent) {
-				prefunit = curcpu / (ncpus / npresent);
-				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
-					  "%d cpus/chip, select unit %d\n",
-					  current->comm, current->pid,
-					  npresent, ncpus, ncpus / npresent,
-					  prefunit);
-			}
-		}
-	}
-
-	/*
-	 * user ports start at 1, kernel port is 0
-	 * For now, we do round-robin access across all chips
-	 */
-
-	if (prefunit != -1)
-		devmax = prefunit + 1;
-recheck:
-	for (i = 1; i < maxofallports; i++) {
-		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
-		     ndev++) {
-			struct ipath_devdata *dd = ipath_lookup(ndev);
-
-			if (!usable(dd))
-				continue; /* can't use this unit */
-			if (i >= dd->ipath_cfgports)
-				/*
-				 * Maxed out on users of this unit. Try
-				 * next.
-				 */
-				continue;
-			ret = try_alloc_port(dd, i, fp, uinfo);
-			if (!ret)
-				goto done;
-		}
-	}
-
-	if (npresent) {
-		if (nup == 0) {
-			ret = -ENETDOWN;
-			ipath_dbg("No ports available (none initialized "
-				  "and ready)\n");
-		} else {
-			if (prefunit > 0) {
-				/* if started above 0, retry from 0 */
-				ipath_cdbg(PROC,
-					   "%s[%u] no ports on prefunit "
-					   "%d, clear and re-check\n",
-					   current->comm, current->pid,
-					   prefunit);
-				devmax = ipath_count_units(NULL, NULL,
-							   NULL);
-				prefunit = -1;
-				goto recheck;
-			}
-			ret = -EBUSY;
-			ipath_dbg("No ports available\n");
-		}
-	} else {
-		ret = -ENXIO;
-		ipath_dbg("No boards found\n");
-	}
-
-done:
-	return ret;
-}
-
-static int find_shared_port(struct file *fp,
-			    const struct ipath_user_info *uinfo)
-{
-	int devmax, ndev, i;
-	int ret = 0;
-
-	devmax = ipath_count_units(NULL, NULL, NULL);
-
-	for (ndev = 0; ndev < devmax; ndev++) {
-		struct ipath_devdata *dd = ipath_lookup(ndev);
-
-		if (!usable(dd))
-			continue;
-		for (i = 1; i < dd->ipath_cfgports; i++) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-
-			/* Skip ports which are not yet open */
-			if (!pd || !pd->port_cnt)
-				continue;
-			/* Skip port if it doesn't match the requested one */
-			if (pd->port_subport_id != uinfo->spu_subport_id)
-				continue;
-			/* Verify the sharing process matches the master */
-			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
-			    pd->userversion != uinfo->spu_userversion ||
-			    pd->port_cnt >= pd->port_subport_cnt) {
-				ret = -EINVAL;
-				goto done;
-			}
-			port_fp(fp) = pd;
-			subport_fp(fp) = pd->port_cnt++;
-			pd->port_subpid[subport_fp(fp)] =
-				get_pid(task_pid(current));
-			tidcursor_fp(fp) = 0;
-			pd->active_slaves |= 1 << subport_fp(fp);
-			ipath_cdbg(PROC,
-				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
-				   current->comm, current->pid,
-				   subport_fp(fp),
-				   pd->port_comm, pid_nr(pd->port_pid),
-				   dd->ipath_unit, pd->port_port);
-			ret = 1;
-			goto done;
-		}
-	}
-
-done:
-	return ret;
-}
-
-static int ipath_open(struct inode *in, struct file *fp)
-{
-	/* The real work is performed later in ipath_assign_port() */
-	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
-	return fp->private_data ? 0 : -ENOMEM;
-}
-
-/* Get port early, so can set affinity prior to memory allocation */
-static int ipath_assign_port(struct file *fp,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret;
-	int i_minor;
-	unsigned swmajor, swminor;
-
-	/* Check to be sure we haven't already initialized this file */
-	if (port_fp(fp)) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	/* for now, if major version is different, bail */
-	swmajor = uinfo->spu_userversion >> 16;
-	if (swmajor != IPATH_USER_SWMAJOR) {
-		ipath_dbg("User major version %d not same as driver "
-			  "major %d\n", uinfo->spu_userversion >> 16,
-			  IPATH_USER_SWMAJOR);
-		ret = -ENODEV;
-		goto done;
-	}
-
-	swminor = uinfo->spu_userversion & 0xffff;
-	if (swminor != IPATH_USER_SWMINOR)
-		ipath_dbg("User minor version %d not same as driver "
-			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
-
-	mutex_lock(&ipath_mutex);
-
-	if (ipath_compatible_subports(swmajor, swminor) &&
-	    uinfo->spu_subport_cnt &&
-	    (ret = find_shared_port(fp, uinfo))) {
-		if (ret > 0)
-			ret = 0;
-		goto done_chk_sdma;
-	}
-
-	i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
-	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-		   (long)file_inode(fp)->i_rdev, i_minor);
-
-	if (i_minor)
-		ret = find_free_port(i_minor - 1, fp, uinfo);
-	else
-		ret = find_best_unit(fp, uinfo);
-
-done_chk_sdma:
-	if (!ret) {
-		struct ipath_filedata *fd = fp->private_data;
-		const struct ipath_portdata *pd = fd->pd;
-		const struct ipath_devdata *dd = pd->port_dd;
-
-		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
-						      dd->ipath_unit,
-						      pd->port_port,
-						      fd->subport);
-
-		if (!fd->pq)
-			ret = -ENOMEM;
-	}
-
-	mutex_unlock(&ipath_mutex);
-
-done:
-	return ret;
-}
-
-
-static int ipath_do_user_init(struct file *fp,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret;
-	struct ipath_portdata *pd = port_fp(fp);
-	struct ipath_devdata *dd;
-	u32 head32;
-
-	/* Subports don't need to initialize anything since master did it. */
-	if (subport_fp(fp)) {
-		ret = wait_event_interruptible(pd->port_wait,
-			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
-		goto done;
-	}
-
-	dd = pd->port_dd;
-
-	if (uinfo->spu_rcvhdrsize) {
-		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-		if (ret)
-			goto done;
-	}
-
-	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-	/* some ports may get extra buffers, calculate that here */
-	if (pd->port_port <= dd->ipath_ports_extrabuf)
-		pd->port_piocnt = dd->ipath_pbufsport + 1;
-	else
-		pd->port_piocnt = dd->ipath_pbufsport;
-
-	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
-	if (pd->port_port <= dd->ipath_ports_extrabuf)
-		pd->port_pio_base = (dd->ipath_pbufsport + 1)
-			* (pd->port_port - 1);
-	else
-		pd->port_pio_base = dd->ipath_ports_extrabuf +
-			dd->ipath_pbufsport * (pd->port_port - 1);
-	pd->port_piobufs = dd->ipath_piobufbase +
-		pd->port_pio_base * dd->ipath_palign;
-	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
-		" first pio %u\n", pd->port_port, pd->port_piobufs,
-		pd->port_piocnt, pd->port_pio_base);
-	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
-
-	/*
-	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-	 * array for time being.  If pd->port_port > chip-supported,
-	 * we need to do extra stuff here to handle by handling overflow
-	 * through port 0, someday
-	 */
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = ipath_create_user_egr(pd);
-	if (ret)
-		goto done;
-
-	/*
-	 * set the eager head register for this port to the current values
-	 * of the tail pointers, since we don't know if they were
-	 * updated on last use of the port.
-	 */
-	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-	pd->port_lastrcvhdrqtail = -1;
-	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-		pd->port_port, head32);
-	pd->port_tidcursor = 0;	/* start at beginning after open */
-
-	/* initialize poll variables... */
-	pd->port_urgent = 0;
-	pd->port_urgent_poll = 0;
-	pd->port_hdrqfull_poll = pd->port_hdrqfull;
-
-	/*
-	 * Now enable the port for receive.
-	 * For chips that are set to DMA the tail register to memory
-	 * when they change (and when the update bit transitions from
-	 * 0 to 1.  So for those chips, we turn it off and then back on.
-	 * This will (very briefly) affect any other open ports, but the
-	 * duration is very short, and therefore isn't an issue.  We
-	 * explicitly set the in-memory tail copy to 0 beforehand, so we
-	 * don't have to wait to be sure the DMA update has happened
-	 * (chip resets head/tail to 0 on transition to enable).
-	 */
-	set_bit(dd->ipath_r_portenable_shift + pd->port_port,
-		&dd->ipath_rcvctrl);
-	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
-		if (pd->port_rcvhdrtail_kvaddr)
-			ipath_clear_rcvhdrtail(pd);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			dd->ipath_rcvctrl &
-			~(1ULL << dd->ipath_r_tailupd_shift));
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-	/* Notify any waiting slaves */
-	if (pd->port_subport_cnt) {
-		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-		wake_up(&pd->port_wait);
-	}
-done:
-	return ret;
-}
-
-/**
- * unlock_exptid - unlock any expected TID entries port still had in use
- * @pd: port
- *
- * We don't actually update the chip here, because we do a bulk update
- * below, using ipath_f_clear_tids.
- */
-static void unlock_expected_tids(struct ipath_portdata *pd)
-{
-	struct ipath_devdata *dd = pd->port_dd;
-	int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
-	int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-
-	ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
-		   pd->port_port);
-	for (i = port_tidbase; i < maxtid; i++) {
-		struct page *ps = dd->ipath_pageshadow[i];
-
-		if (!ps)
-			continue;
-
-		dd->ipath_pageshadow[i] = NULL;
-		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
-			PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		ipath_release_user_pages_on_close(&ps, 1);
-		cnt++;
-		ipath_stats.sps_pageunlocks++;
-	}
-	if (cnt)
-		ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
-			   pd->port_port, cnt);
-
-	if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
-		ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
-			   (unsigned long long) ipath_stats.sps_pagelocks,
-			   (unsigned long long)
-			   ipath_stats.sps_pageunlocks);
-}
-
-static int ipath_close(struct inode *in, struct file *fp)
-{
-	struct ipath_filedata *fd;
-	struct ipath_portdata *pd;
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	unsigned port;
-	struct pid *pid;
-
-	ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
-		   (long)in->i_rdev, fp->private_data);
-
-	mutex_lock(&ipath_mutex);
-
-	fd = fp->private_data;
-	fp->private_data = NULL;
-	pd = fd->pd;
-	if (!pd) {
-		mutex_unlock(&ipath_mutex);
-		goto bail;
-	}
-
-	dd = pd->port_dd;
-
-	/* drain user sdma queue */
-	ipath_user_sdma_queue_drain(dd, fd->pq);
-	ipath_user_sdma_queue_destroy(fd->pq);
-
-	if (--pd->port_cnt) {
-		/*
-		 * XXX If the master closes the port before the slave(s),
-		 * revoke the mmap for the eager receive queue so
-		 * the slave(s) don't wait for receive data forever.
-		 */
-		pd->active_slaves &= ~(1 << fd->subport);
-		put_pid(pd->port_subpid[fd->subport]);
-		pd->port_subpid[fd->subport] = NULL;
-		mutex_unlock(&ipath_mutex);
-		goto bail;
-	}
-	/* early; no interrupt users after this */
-	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
-	port = pd->port_port;
-	dd->ipath_pd[port] = NULL;
-	pid = pd->port_pid;
-	pd->port_pid = NULL;
-	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
-
-	if (pd->port_rcvwait_to || pd->port_piowait_to
-	    || pd->port_rcvnowait || pd->port_pionowait) {
-		ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
-			   "%u rcv %u, pio already\n",
-			   pd->port_port, pd->port_rcvwait_to,
-			   pd->port_piowait_to, pd->port_rcvnowait,
-			   pd->port_pionowait);
-		pd->port_rcvwait_to = pd->port_piowait_to =
-			pd->port_rcvnowait = pd->port_pionowait = 0;
-	}
-	if (pd->port_flag) {
-		ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
-			  pd->port_port, pd->port_flag);
-		pd->port_flag = 0;
-	}
-
-	if (dd->ipath_kregbase) {
-		/* atomically clear receive enable port and intr avail. */
-		clear_bit(dd->ipath_r_portenable_shift + port,
-			  &dd->ipath_rcvctrl);
-		clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
-			  &dd->ipath_rcvctrl);
-		ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
-			dd->ipath_rcvctrl);
-		/* and read back from chip to be sure that nothing
-		 * else is in flight when we do the rest */
-		(void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-		/* clean up the pkeys for this port user */
-		ipath_clean_part_key(pd, dd);
-		/*
-		 * be paranoid, and never write 0's to these, just use an
-		 * unused part of the port 0 tail page.  Of course,
-		 * rcvhdraddr points to a large chunk of memory, so this
-		 * could still trash things, but at least it won't trash
-		 * page 0, and by disabling the port, it should stop "soon",
-		 * even if a packet or two is in already in flight after we
-		 * disabled the port.
-		 */
-		ipath_write_kreg_port(dd,
-		        dd->ipath_kregs->kr_rcvhdrtailaddr, port,
-			dd->ipath_dummy_hdrq_phys);
-		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
-			pd->port_port, dd->ipath_dummy_hdrq_phys);
-
-		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
-		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
-			pd->port_piocnt, 1);
-
-		dd->ipath_f_clear_tids(dd, pd->port_port);
-
-		if (dd->ipath_pageshadow)
-			unlock_expected_tids(pd);
-		ipath_stats.sps_ports--;
-		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-			   pd->port_comm, pid_nr(pid),
-			   dd->ipath_unit, port);
-	}
-
-	put_pid(pid);
-	mutex_unlock(&ipath_mutex);
-	ipath_free_pddata(dd, pd); /* after releasing the mutex */
-
-bail:
-	kfree(fd);
-	return 0;
-}
-
-static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
-			   struct ipath_port_info __user *uinfo)
-{
-	struct ipath_port_info info;
-	int nup;
-	int ret;
-	size_t sz;
-
-	(void) ipath_count_units(NULL, &nup, NULL);
-	info.num_active = nup;
-	info.unit = pd->port_dd->ipath_unit;
-	info.port = pd->port_port;
-	info.subport = subport;
-	/* Don't return new fields if old library opened the port. */
-	if (ipath_supports_subports(pd->userversion >> 16,
-				    pd->userversion & 0xffff)) {
-		/* Number of user ports available for this device. */
-		info.num_ports = pd->port_dd->ipath_cfgports - 1;
-		info.num_subports = pd->port_subport_cnt;
-		sz = sizeof(info);
-	} else
-		sz = sizeof(info) - 2 * sizeof(u16);
-
-	if (copy_to_user(uinfo, &info, sz)) {
-		ret = -EFAULT;
-		goto bail;
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_get_slave_info(struct ipath_portdata *pd,
-				void __user *slave_mask_addr)
-{
-	int ret = 0;
-
-	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
-		ret = -EFAULT;
-	return ret;
-}
-
-static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
-				   u32 __user *inflightp)
-{
-	const u32 val = ipath_user_sdma_inflight_counter(pq);
-
-	if (put_user(val, inflightp))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int ipath_sdma_get_complete(struct ipath_devdata *dd,
-				   struct ipath_user_sdma_queue *pq,
-				   u32 __user *completep)
-{
-	u32 val;
-	int err;
-
-	err = ipath_user_sdma_make_progress(dd, pq);
-	if (err < 0)
-		return err;
-
-	val = ipath_user_sdma_complete_counter(pq);
-	if (put_user(val, completep))
-		return -EFAULT;
-
-	return 0;
-}
-
-static ssize_t ipath_write(struct file *fp, const char __user *data,
-			   size_t count, loff_t *off)
-{
-	const struct ipath_cmd __user *ucmd;
-	struct ipath_portdata *pd;
-	const void __user *src;
-	size_t consumed, copy;
-	struct ipath_cmd cmd;
-	ssize_t ret = 0;
-	void *dest;
-
-	if (count < sizeof(cmd.type)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ucmd = (const struct ipath_cmd __user *) data;
-
-	if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	consumed = sizeof(cmd.type);
-
-	switch (cmd.type) {
-	case IPATH_CMD_ASSIGN_PORT:
-	case __IPATH_CMD_USER_INIT:
-	case IPATH_CMD_USER_INIT:
-		copy = sizeof(cmd.cmd.user_info);
-		dest = &cmd.cmd.user_info;
-		src = &ucmd->cmd.user_info;
-		break;
-	case IPATH_CMD_RECV_CTRL:
-		copy = sizeof(cmd.cmd.recv_ctrl);
-		dest = &cmd.cmd.recv_ctrl;
-		src = &ucmd->cmd.recv_ctrl;
-		break;
-	case IPATH_CMD_PORT_INFO:
-		copy = sizeof(cmd.cmd.port_info);
-		dest = &cmd.cmd.port_info;
-		src = &ucmd->cmd.port_info;
-		break;
-	case IPATH_CMD_TID_UPDATE:
-	case IPATH_CMD_TID_FREE:
-		copy = sizeof(cmd.cmd.tid_info);
-		dest = &cmd.cmd.tid_info;
-		src = &ucmd->cmd.tid_info;
-		break;
-	case IPATH_CMD_SET_PART_KEY:
-		copy = sizeof(cmd.cmd.part_key);
-		dest = &cmd.cmd.part_key;
-		src = &ucmd->cmd.part_key;
-		break;
-	case __IPATH_CMD_SLAVE_INFO:
-		copy = sizeof(cmd.cmd.slave_mask_addr);
-		dest = &cmd.cmd.slave_mask_addr;
-		src = &ucmd->cmd.slave_mask_addr;
-		break;
-	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
-		copy = 0;
-		src = NULL;
-		dest = NULL;
-		break;
-	case IPATH_CMD_POLL_TYPE:
-		copy = sizeof(cmd.cmd.poll_type);
-		dest = &cmd.cmd.poll_type;
-		src = &ucmd->cmd.poll_type;
-		break;
-	case IPATH_CMD_ARMLAUNCH_CTRL:
-		copy = sizeof(cmd.cmd.armlaunch_ctrl);
-		dest = &cmd.cmd.armlaunch_ctrl;
-		src = &ucmd->cmd.armlaunch_ctrl;
-		break;
-	case IPATH_CMD_SDMA_INFLIGHT:
-		copy = sizeof(cmd.cmd.sdma_inflight);
-		dest = &cmd.cmd.sdma_inflight;
-		src = &ucmd->cmd.sdma_inflight;
-		break;
-	case IPATH_CMD_SDMA_COMPLETE:
-		copy = sizeof(cmd.cmd.sdma_complete);
-		dest = &cmd.cmd.sdma_complete;
-		src = &ucmd->cmd.sdma_complete;
-		break;
-	default:
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (copy) {
-		if ((count - consumed) < copy) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		if (copy_from_user(dest, src, copy)) {
-			ret = -EFAULT;
-			goto bail;
-		}
-
-		consumed += copy;
-	}
-
-	pd = port_fp(fp);
-	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
-		cmd.type != IPATH_CMD_ASSIGN_PORT) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	switch (cmd.type) {
-	case IPATH_CMD_ASSIGN_PORT:
-		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		break;
-	case __IPATH_CMD_USER_INIT:
-		/* backwards compatibility, get port first */
-		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		/* and fall through to current version. */
-	case IPATH_CMD_USER_INIT:
-		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
-		if (ret)
-			goto bail;
-		ret = ipath_get_base_info(
-			fp, (void __user *) (unsigned long)
-			cmd.cmd.user_info.spu_base_info,
-			cmd.cmd.user_info.spu_base_info_size);
-		break;
-	case IPATH_CMD_RECV_CTRL:
-		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
-		break;
-	case IPATH_CMD_PORT_INFO:
-		ret = ipath_port_info(pd, subport_fp(fp),
-				      (struct ipath_port_info __user *)
-				      (unsigned long) cmd.cmd.port_info);
-		break;
-	case IPATH_CMD_TID_UPDATE:
-		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
-		break;
-	case IPATH_CMD_TID_FREE:
-		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
-		break;
-	case IPATH_CMD_SET_PART_KEY:
-		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
-		break;
-	case __IPATH_CMD_SLAVE_INFO:
-		ret = ipath_get_slave_info(pd,
-					   (void __user *) (unsigned long)
-					   cmd.cmd.slave_mask_addr);
-		break;
-	case IPATH_CMD_PIOAVAILUPD:
-		ipath_force_pio_avail_update(pd->port_dd);
-		break;
-	case IPATH_CMD_POLL_TYPE:
-		pd->poll_type = cmd.cmd.poll_type;
-		break;
-	case IPATH_CMD_ARMLAUNCH_CTRL:
-		if (cmd.cmd.armlaunch_ctrl)
-			ipath_enable_armlaunch(pd->port_dd);
-		else
-			ipath_disable_armlaunch(pd->port_dd);
-		break;
-	case IPATH_CMD_SDMA_INFLIGHT:
-		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
-					      (u32 __user *) (unsigned long)
-					      cmd.cmd.sdma_inflight);
-		break;
-	case IPATH_CMD_SDMA_COMPLETE:
-		ret = ipath_sdma_get_complete(pd->port_dd,
-					      user_sdma_queue_fp(fp),
-					      (u32 __user *) (unsigned long)
-					      cmd.cmd.sdma_complete);
-		break;
-	}
-
-	if (ret >= 0)
-		ret = consumed;
-
-bail:
-	return ret;
-}
-
-static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
-	struct file *filp = iocb->ki_filp;
-	struct ipath_filedata *fp = filp->private_data;
-	struct ipath_portdata *pd = port_fp(filp);
-	struct ipath_user_sdma_queue *pq = fp->pq;
-
-	if (!iter_is_iovec(from) || !from->nr_segs)
-		return -EINVAL;
-
-	return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
-}
-
-static struct class *ipath_class;
-
-static int init_cdev(int minor, char *name, const struct file_operations *fops,
-		     struct cdev **cdevp, struct device **devp)
-{
-	const dev_t dev = MKDEV(IPATH_MAJOR, minor);
-	struct cdev *cdev = NULL;
-	struct device *device = NULL;
-	int ret;
-
-	cdev = cdev_alloc();
-	if (!cdev) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not allocate cdev for minor %d, %s\n",
-		       minor, name);
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	cdev->owner = THIS_MODULE;
-	cdev->ops = fops;
-	kobject_set_name(&cdev->kobj, name);
-
-	ret = cdev_add(cdev, dev, 1);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME
-		       ": Could not add cdev for minor %d, %s (err %d)\n",
-		       minor, name, -ret);
-		goto err_cdev;
-	}
-
-	device = device_create(ipath_class, NULL, dev, NULL, name);
-
-	if (IS_ERR(device)) {
-		ret = PTR_ERR(device);
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-		       "device for minor %d, %s (err %d)\n",
-		       minor, name, -ret);
-		goto err_cdev;
-	}
-
-	goto done;
-
-err_cdev:
-	cdev_del(cdev);
-	cdev = NULL;
-
-done:
-	if (ret >= 0) {
-		*cdevp = cdev;
-		*devp = device;
-	} else {
-		*cdevp = NULL;
-		*devp = NULL;
-	}
-
-	return ret;
-}
-
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-		    struct cdev **cdevp, struct device **devp)
-{
-	return init_cdev(minor, name, fops, cdevp, devp);
-}
-
-static void cleanup_cdev(struct cdev **cdevp,
-			 struct device **devp)
-{
-	struct device *dev = *devp;
-
-	if (dev) {
-		device_unregister(dev);
-		*devp = NULL;
-	}
-
-	if (*cdevp) {
-		cdev_del(*cdevp);
-		*cdevp = NULL;
-	}
-}
-
-void ipath_cdev_cleanup(struct cdev **cdevp,
-			struct device **devp)
-{
-	cleanup_cdev(cdevp, devp);
-}
-
-static struct cdev *wildcard_cdev;
-static struct device *wildcard_dev;
-
-static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
-
-static int user_init(void)
-{
-	int ret;
-
-	ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
-	if (ret < 0) {
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
-		       "chrdev region (err %d)\n", -ret);
-		goto done;
-	}
-
-	ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
-
-	if (IS_ERR(ipath_class)) {
-		ret = PTR_ERR(ipath_class);
-		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
-		       "device class (err %d)\n", -ret);
-		goto bail;
-	}
-
-	goto done;
-bail:
-	unregister_chrdev_region(dev, IPATH_NMINORS);
-done:
-	return ret;
-}
-
-static void user_cleanup(void)
-{
-	if (ipath_class) {
-		class_destroy(ipath_class);
-		ipath_class = NULL;
-	}
-
-	unregister_chrdev_region(dev, IPATH_NMINORS);
-}
-
-static atomic_t user_count = ATOMIC_INIT(0);
-static atomic_t user_setup = ATOMIC_INIT(0);
-
-int ipath_user_add(struct ipath_devdata *dd)
-{
-	char name[10];
-	int ret;
-
-	if (atomic_inc_return(&user_count) == 1) {
-		ret = user_init();
-		if (ret < 0) {
-			ipath_dev_err(dd, "Unable to set up user support: "
-				      "error %d\n", -ret);
-			goto bail;
-		}
-		ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
-				&wildcard_dev);
-		if (ret < 0) {
-			ipath_dev_err(dd, "Could not create wildcard "
-				      "minor: error %d\n", -ret);
-			goto bail_user;
-		}
-
-		atomic_set(&user_setup, 1);
-	}
-
-	snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
-
-	ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
-			&dd->user_cdev, &dd->user_dev);
-	if (ret < 0)
-		ipath_dev_err(dd, "Could not create user minor %d, %s\n",
-			      dd->ipath_unit + 1, name);
-
-	goto bail;
-
-bail_user:
-	user_cleanup();
-bail:
-	return ret;
-}
-
-void ipath_user_remove(struct ipath_devdata *dd)
-{
-	cleanup_cdev(&dd->user_cdev, &dd->user_dev);
-
-	if (atomic_dec_return(&user_count) == 0) {
-		if (atomic_read(&user_setup) == 0)
-			goto bail;
-
-		cleanup_cdev(&wildcard_cdev, &wildcard_dev);
-		user_cleanup();
-
-		atomic_set(&user_setup, 0);
-	}
-bail:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644
index 476fcdf05acb..000000000000
--- a/drivers/staging/rdma/ipath/ipath_fs.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-#define IPATHFS_MAGIC 0x726a77
-
-static struct super_block *ipath_super;
-
-static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
-			 umode_t mode, const struct file_operations *fops,
-			 void *data)
-{
-	int error;
-	struct inode *inode = new_inode(dir->i_sb);
-
-	if (!inode) {
-		error = -EPERM;
-		goto bail;
-	}
-
-	inode->i_ino = get_next_ino();
-	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->i_private = data;
-	if (S_ISDIR(mode)) {
-		inode->i_op = &simple_dir_inode_operations;
-		inc_nlink(inode);
-		inc_nlink(dir);
-	}
-
-	inode->i_fop = fops;
-
-	d_instantiate(dentry, inode);
-	error = 0;
-
-bail:
-	return error;
-}
-
-static int create_file(const char *name, umode_t mode,
-		       struct dentry *parent, struct dentry **dentry,
-		       const struct file_operations *fops, void *data)
-{
-	int error;
-
-	inode_lock(d_inode(parent));
-	*dentry = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(*dentry))
-		error = ipathfs_mknod(d_inode(parent), *dentry,
-				      mode, fops, data);
-	else
-		error = PTR_ERR(*dentry);
-	inode_unlock(d_inode(parent));
-
-	return error;
-}
-
-static ssize_t atomic_stats_read(struct file *file, char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
-				       sizeof ipath_stats);
-}
-
-static const struct file_operations atomic_stats_ops = {
-	.read = atomic_stats_read,
-	.llseek = default_llseek,
-};
-
-static ssize_t atomic_counters_read(struct file *file, char __user *buf,
-				    size_t count, loff_t *ppos)
-{
-	struct infinipath_counters counters;
-	struct ipath_devdata *dd;
-
-	dd = file_inode(file)->i_private;
-	dd->ipath_f_read_counters(dd, &counters);
-
-	return simple_read_from_buffer(buf, count, ppos, &counters,
-				       sizeof counters);
-}
-
-static const struct file_operations atomic_counters_ops = {
-	.read = atomic_counters_read,
-	.llseek = default_llseek,
-};
-
-static ssize_t flash_read(struct file *file, char __user *buf,
-			  size_t count, loff_t *ppos)
-{
-	struct ipath_devdata *dd;
-	ssize_t ret;
-	loff_t pos;
-	char *tmp;
-
-	pos = *ppos;
-
-	if ( pos < 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (pos >= sizeof(struct ipath_flash)) {
-		ret = 0;
-		goto bail;
-	}
-
-	if (count > sizeof(struct ipath_flash) - pos)
-		count = sizeof(struct ipath_flash) - pos;
-
-	tmp = kmalloc(count, GFP_KERNEL);
-	if (!tmp) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	dd = file_inode(file)->i_private;
-	if (ipath_eeprom_read(dd, pos, tmp, count)) {
-		ipath_dev_err(dd, "failed to read from flash\n");
-		ret = -ENXIO;
-		goto bail_tmp;
-	}
-
-	if (copy_to_user(buf, tmp, count)) {
-		ret = -EFAULT;
-		goto bail_tmp;
-	}
-
-	*ppos = pos + count;
-	ret = count;
-
-bail_tmp:
-	kfree(tmp);
-
-bail:
-	return ret;
-}
-
-static ssize_t flash_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
-{
-	struct ipath_devdata *dd;
-	ssize_t ret;
-	loff_t pos;
-	char *tmp;
-
-	pos = *ppos;
-
-	if (pos != 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (count != sizeof(struct ipath_flash)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	tmp = memdup_user(buf, count);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
-
-	dd = file_inode(file)->i_private;
-	if (ipath_eeprom_write(dd, pos, tmp, count)) {
-		ret = -ENXIO;
-		ipath_dev_err(dd, "failed to write to flash\n");
-		goto bail_tmp;
-	}
-
-	*ppos = pos + count;
-	ret = count;
-
-bail_tmp:
-	kfree(tmp);
-
-bail:
-	return ret;
-}
-
-static const struct file_operations flash_ops = {
-	.read = flash_read,
-	.write = flash_write,
-	.llseek = default_llseek,
-};
-
-static int create_device_files(struct super_block *sb,
-			       struct ipath_devdata *dd)
-{
-	struct dentry *dir, *tmp;
-	char unit[10];
-	int ret;
-
-	snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-	ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
-			  &simple_dir_operations, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-	ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
-			  &atomic_counters_ops, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s/atomic_counters) "
-		       "failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-	ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
-			  &flash_ops, dd);
-	if (ret) {
-		printk(KERN_ERR "create_file(%s/flash) "
-		       "failed: %d\n", unit, ret);
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-static int remove_file(struct dentry *parent, char *name)
-{
-	struct dentry *tmp;
-	int ret;
-
-	tmp = lookup_one_len(name, parent, strlen(name));
-
-	if (IS_ERR(tmp)) {
-		ret = PTR_ERR(tmp);
-		goto bail;
-	}
-
-	spin_lock(&tmp->d_lock);
-	if (simple_positive(tmp)) {
-		dget_dlock(tmp);
-		__d_drop(tmp);
-		spin_unlock(&tmp->d_lock);
-		simple_unlink(d_inode(parent), tmp);
-	} else
-		spin_unlock(&tmp->d_lock);
-
-	ret = 0;
-bail:
-	/*
-	 * We don't expect clients to care about the return value, but
-	 * it's there if they need it.
-	 */
-	return ret;
-}
-
-static int remove_device_files(struct super_block *sb,
-			       struct ipath_devdata *dd)
-{
-	struct dentry *dir, *root;
-	char unit[10];
-	int ret;
-
-	root = dget(sb->s_root);
-	inode_lock(d_inode(root));
-	snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
-	dir = lookup_one_len(unit, root, strlen(unit));
-
-	if (IS_ERR(dir)) {
-		ret = PTR_ERR(dir);
-		printk(KERN_ERR "Lookup of %s failed\n", unit);
-		goto bail;
-	}
-
-	remove_file(dir, "flash");
-	remove_file(dir, "atomic_counters");
-	d_delete(dir);
-	ret = simple_rmdir(d_inode(root), dir);
-
-bail:
-	inode_unlock(d_inode(root));
-	dput(root);
-	return ret;
-}
-
-static int ipathfs_fill_super(struct super_block *sb, void *data,
-			      int silent)
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-	int ret;
-
-	static struct tree_descr files[] = {
-		[2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
-		{""},
-	};
-
-	ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
-	if (ret) {
-		printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-		ret = create_device_files(sb, dd);
-		if (ret)
-			goto bail;
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-bail:
-	return ret;
-}
-
-static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
-			int flags, const char *dev_name, void *data)
-{
-	struct dentry *ret;
-	ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
-	if (!IS_ERR(ret))
-		ipath_super = ret->d_sb;
-	return ret;
-}
-
-static void ipathfs_kill_super(struct super_block *s)
-{
-	kill_litter_super(s);
-	ipath_super = NULL;
-}
-
-int ipathfs_add_device(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (ipath_super == NULL) {
-		ret = 0;
-		goto bail;
-	}
-
-	ret = create_device_files(ipath_super, dd);
-
-bail:
-	return ret;
-}
-
-int ipathfs_remove_device(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (ipath_super == NULL) {
-		ret = 0;
-		goto bail;
-	}
-
-	ret = remove_device_files(ipath_super, dd);
-
-bail:
-	return ret;
-}
-
-static struct file_system_type ipathfs_fs_type = {
-	.owner =	THIS_MODULE,
-	.name =		"ipathfs",
-	.mount =	ipathfs_mount,
-	.kill_sb =	ipathfs_kill_super,
-};
-MODULE_ALIAS_FS("ipathfs");
-
-int __init ipath_init_ipathfs(void)
-{
-	return register_filesystem(&ipathfs_fs_type);
-}
-
-void __exit ipath_exit_ipathfs(void)
-{
-	unregister_filesystem(&ipathfs_fs_type);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644
index 5f13572a5e24..000000000000
--- a/drivers/staging/rdma/ipath/ipath_iba6110.c
+++ /dev/null
@@ -1,1939 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains all of the code that is specific to the InfiniPath
- * HT chip.
- */
-
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/htirq.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
-
-
-/*
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- *
- * The names are in InterCap form because they're taken straight from
- * the chip specification.  Since they're only used in this file, they
- * don't pollute the rest of the source.
-*/
-
-struct _infinipath_do_not_use_kernel_regs {
-	unsigned long long Revision;
-	unsigned long long Control;
-	unsigned long long PageAlign;
-	unsigned long long PortCnt;
-	unsigned long long DebugPortSelect;
-	unsigned long long DebugPort;
-	unsigned long long SendRegBase;
-	unsigned long long UserRegBase;
-	unsigned long long CounterRegBase;
-	unsigned long long Scratch;
-	unsigned long long ReservedMisc1;
-	unsigned long long InterruptConfig;
-	unsigned long long IntBlocked;
-	unsigned long long IntMask;
-	unsigned long long IntStatus;
-	unsigned long long IntClear;
-	unsigned long long ErrorMask;
-	unsigned long long ErrorStatus;
-	unsigned long long ErrorClear;
-	unsigned long long HwErrMask;
-	unsigned long long HwErrStatus;
-	unsigned long long HwErrClear;
-	unsigned long long HwDiagCtrl;
-	unsigned long long MDIO;
-	unsigned long long IBCStatus;
-	unsigned long long IBCCtrl;
-	unsigned long long ExtStatus;
-	unsigned long long ExtCtrl;
-	unsigned long long GPIOOut;
-	unsigned long long GPIOMask;
-	unsigned long long GPIOStatus;
-	unsigned long long GPIOClear;
-	unsigned long long RcvCtrl;
-	unsigned long long RcvBTHQP;
-	unsigned long long RcvHdrSize;
-	unsigned long long RcvHdrCnt;
-	unsigned long long RcvHdrEntSize;
-	unsigned long long RcvTIDBase;
-	unsigned long long RcvTIDCnt;
-	unsigned long long RcvEgrBase;
-	unsigned long long RcvEgrCnt;
-	unsigned long long RcvBufBase;
-	unsigned long long RcvBufSize;
-	unsigned long long RxIntMemBase;
-	unsigned long long RxIntMemSize;
-	unsigned long long RcvPartitionKey;
-	unsigned long long ReservedRcv[10];
-	unsigned long long SendCtrl;
-	unsigned long long SendPIOBufBase;
-	unsigned long long SendPIOSize;
-	unsigned long long SendPIOBufCnt;
-	unsigned long long SendPIOAvailAddr;
-	unsigned long long TxIntMemBase;
-	unsigned long long TxIntMemSize;
-	unsigned long long ReservedSend[9];
-	unsigned long long SendBufferError;
-	unsigned long long SendBufferErrorCONT1;
-	unsigned long long SendBufferErrorCONT2;
-	unsigned long long SendBufferErrorCONT3;
-	unsigned long long ReservedSBE[4];
-	unsigned long long RcvHdrAddr0;
-	unsigned long long RcvHdrAddr1;
-	unsigned long long RcvHdrAddr2;
-	unsigned long long RcvHdrAddr3;
-	unsigned long long RcvHdrAddr4;
-	unsigned long long RcvHdrAddr5;
-	unsigned long long RcvHdrAddr6;
-	unsigned long long RcvHdrAddr7;
-	unsigned long long RcvHdrAddr8;
-	unsigned long long ReservedRHA[7];
-	unsigned long long RcvHdrTailAddr0;
-	unsigned long long RcvHdrTailAddr1;
-	unsigned long long RcvHdrTailAddr2;
-	unsigned long long RcvHdrTailAddr3;
-	unsigned long long RcvHdrTailAddr4;
-	unsigned long long RcvHdrTailAddr5;
-	unsigned long long RcvHdrTailAddr6;
-	unsigned long long RcvHdrTailAddr7;
-	unsigned long long RcvHdrTailAddr8;
-	unsigned long long ReservedRHTA[7];
-	unsigned long long Sync;	/* Software only */
-	unsigned long long Dump;	/* Software only */
-	unsigned long long SimVer;	/* Software only */
-	unsigned long long ReservedSW[5];
-	unsigned long long SerdesConfig0;
-	unsigned long long SerdesConfig1;
-	unsigned long long SerdesStatus;
-	unsigned long long XGXSConfig;
-	unsigned long long ReservedSW2[4];
-};
-
-struct _infinipath_do_not_use_counters {
-	__u64 LBIntCnt;
-	__u64 LBFlowStallCnt;
-	__u64 Reserved1;
-	__u64 TxUnsupVLErrCnt;
-	__u64 TxDataPktCnt;
-	__u64 TxFlowPktCnt;
-	__u64 TxDwordCnt;
-	__u64 TxLenErrCnt;
-	__u64 TxMaxMinLenErrCnt;
-	__u64 TxUnderrunCnt;
-	__u64 TxFlowStallCnt;
-	__u64 TxDroppedPktCnt;
-	__u64 RxDroppedPktCnt;
-	__u64 RxDataPktCnt;
-	__u64 RxFlowPktCnt;
-	__u64 RxDwordCnt;
-	__u64 RxLenErrCnt;
-	__u64 RxMaxMinLenErrCnt;
-	__u64 RxICRCErrCnt;
-	__u64 RxVCRCErrCnt;
-	__u64 RxFlowCtrlErrCnt;
-	__u64 RxBadFormatCnt;
-	__u64 RxLinkProblemCnt;
-	__u64 RxEBPCnt;
-	__u64 RxLPCRCErrCnt;
-	__u64 RxBufOvflCnt;
-	__u64 RxTIDFullErrCnt;
-	__u64 RxTIDValidErrCnt;
-	__u64 RxPKeyMismatchCnt;
-	__u64 RxP0HdrEgrOvflCnt;
-	__u64 RxP1HdrEgrOvflCnt;
-	__u64 RxP2HdrEgrOvflCnt;
-	__u64 RxP3HdrEgrOvflCnt;
-	__u64 RxP4HdrEgrOvflCnt;
-	__u64 RxP5HdrEgrOvflCnt;
-	__u64 RxP6HdrEgrOvflCnt;
-	__u64 RxP7HdrEgrOvflCnt;
-	__u64 RxP8HdrEgrOvflCnt;
-	__u64 Reserved6;
-	__u64 Reserved7;
-	__u64 IBStatusChangeCnt;
-	__u64 IBLinkErrRecoveryCnt;
-	__u64 IBLinkDownedCnt;
-	__u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
-	struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
-	struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_ht_kregs = {
-	.kr_control = IPATH_KREG_OFFSET(Control),
-	.kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
-	.kr_debugport = IPATH_KREG_OFFSET(DebugPort),
-	.kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
-	.kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
-	.kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
-	.kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
-	.kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
-	.kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
-	.kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
-	.kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
-	.kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
-	.kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
-	.kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
-	.kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
-	.kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
-	.kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
-	.kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
-	.kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
-	.kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
-	.kr_intclear = IPATH_KREG_OFFSET(IntClear),
-	.kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
-	.kr_intmask = IPATH_KREG_OFFSET(IntMask),
-	.kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
-	.kr_mdio = IPATH_KREG_OFFSET(MDIO),
-	.kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
-	.kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
-	.kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
-	.kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
-	.kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
-	.kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
-	.kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
-	.kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
-	.kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
-	.kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
-	.kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
-	.kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
-	.kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
-	.kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
-	.kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
-	.kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
-	.kr_revision = IPATH_KREG_OFFSET(Revision),
-	.kr_scratch = IPATH_KREG_OFFSET(Scratch),
-	.kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
-	.kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
-	.kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
-	.kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
-	.kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
-	.kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
-	.kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
-	.kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
-	.kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
-	.kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-	.kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
-	.kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
-	.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
-	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-	/*
-	 * These should not be used directly via ipath_write_kreg64(),
-	 * use them with ipath_write_kreg64_port(),
-	 */
-	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
-	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
-};
-
-static const struct ipath_cregs ipath_ht_cregs = {
-	.cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
-	.cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
-	.cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
-	.cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
-	.cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
-	.cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
-	.cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
-	.cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
-	.cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
-	.cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
-	.cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
-	.cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
-	/* calc from Reg_CounterRegBase + offset */
-	.cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
-	.cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
-	.cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
-	.cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
-	.cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
-	.cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
-	.cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
-	.cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
-	.cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
-	.cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
-	.cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
-	.cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
-	.cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
-	.cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
-	.cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
-	.cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
-	.cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
-	.cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
-	.cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
-	.cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
-	.cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
-#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR   0x0000000000800000ULL
-#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR   0x0000000001000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR   0x0000000002000000ULL
-#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR   0x0000000004000000ULL
-#define INFINIPATH_HWE_HTCMISCERR4          0x0000000008000000ULL
-#define INFINIPATH_HWE_HTCMISCERR5          0x0000000010000000ULL
-#define INFINIPATH_HWE_HTCMISCERR6          0x0000000020000000ULL
-#define INFINIPATH_HWE_HTCMISCERR7          0x0000000040000000ULL
-#define INFINIPATH_HWE_HTCBUSTREQPARITYERR  0x0000000080000000ULL
-#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
-#define INFINIPATH_HWE_HTCBUSIREQPARITYERR  0x0000000200000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_FBSLIP        0x0200000000000000ULL
-#define INFINIPATH_HWE_HTBPLL_RFSLIP        0x0400000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_FBSLIP        0x0800000000000000ULL
-#define INFINIPATH_HWE_HTAPLL_RFSLIP        0x1000000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED      0x2000000000000000ULL
-
-#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6110_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
-
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
-
-/*
- * masks and bits that are different in different chips, or present only
- * in one
- */
-static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
-    INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
-static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
-    INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
-
-static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
-    INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
-static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
-    INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA \
-	(1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL \
-	(1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-/* keep the code below somewhat more readable; not used elsewhere */
-#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |	\
-				infinipath_hwe_htclnkabyte1crcerr)
-#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr |	\
-				infinipath_hwe_htclnkbbyte1crcerr)
-#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr |	\
-				infinipath_hwe_htclnkbbyte0crcerr)
-#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr |	\
-				infinipath_hwe_htclnkbbyte1crcerr)
-
-static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
-			  char *msg, size_t msgl)
-{
-	char bitsmsg[64];
-	ipath_err_t crcbits = hwerrs &
-		(_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
-	/* don't check if 8bit HT */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-		crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
-	/* don't check if 8bit HT */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-		crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
-	/*
-	 * we'll want to ignore link errors on link that is
-	 * not in use, if any.  For now, complain about both
-	 */
-	if (crcbits) {
-		u16 ctrl0, ctrl1;
-		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
-			 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
-			 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
-				    ? "1 (B)" : "0+1 (A+B)"),
-			 !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
-			 : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
-			    "0+1"), (unsigned long long) crcbits);
-		strlcat(msg, bitsmsg, msgl);
-
-		/*
-		 * print extra info for debugging.  slave/primary
-		 * config word 4, 8 (link control 0, 1)
-		 */
-
-		if (pci_read_config_word(dd->pcidev,
-					 dd->ipath_ht_slave_off + 0x4,
-					 &ctrl0))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkctrl0 of slave/primary "
-				 "config block\n");
-		else if (!(ctrl0 & 1 << 6))
-			/* not if EOC bit set */
-			ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
-				  ((ctrl0 >> 8) & 7) ? " CRC" : "",
-				  ((ctrl0 >> 4) & 1) ? "linkfail" :
-				  "");
-		if (pci_read_config_word(dd->pcidev,
-					 dd->ipath_ht_slave_off + 0x8,
-					 &ctrl1))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkctrl1 of slave/primary "
-				 "config block\n");
-		else if (!(ctrl1 & 1 << 6))
-			/* not if EOC bit set */
-			ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
-				  ((ctrl1 >> 8) & 7) ? " CRC" : "",
-				  ((ctrl1 >> 4) & 1) ? "linkfail" :
-				  "");
-
-		/* disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~crcbits;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-		ipath_dbg("HT crc errs: %s\n", msg);
-	} else
-		ipath_dbg("ignoring HT crc errors 0x%llx, "
-			  "not in use\n", (unsigned long long)
-			  (hwerrs & (_IPATH_HTLINK0_CRCBITS |
-				     _IPATH_HTLINK1_CRCBITS)));
-}
-
-/* 6110 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
-	INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
-	INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
-	INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
-	INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
-	INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
-	INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
-	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
-	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
-		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
-		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
-			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-	++ipath_stats.sps_txeparity;
-	dev_info(&dd->pcidev->dev,
-		"Recovering from TXE PIO parity error\n");
-}
-
-
-/**
- * ipath_ht_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use.  Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue.  We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
-				     size_t msgl)
-{
-	ipath_err_t hwerrs;
-	u32 bits, ctrl;
-	int isfatal = 0;
-	char bitsmsg[64];
-	int log_idx;
-
-	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-
-	if (!hwerrs) {
-		ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
-		/*
-		 * better than printing cofusing messages
-		 * This seems to be related to clearing the crc error, or
-		 * the pll error during init.
-		 */
-		goto bail;
-	} else if (hwerrs == -1LL) {
-		ipath_dev_err(dd, "Read of hardware error status failed "
-			      "(all bits set); ignoring\n");
-		goto bail;
-	}
-	ipath_stats.sps_hwerrs++;
-
-	/* Always clear the error status register, except MEMBISTFAIL,
-	 * regardless of whether we continue or stop using the chip.
-	 * We want that set so we know it failed, even across driver reload.
-	 * We'll still ignore it in the hwerrmask.  We do this partly for
-	 * diagnostics, but also for support */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
-	hwerrs &= dd->ipath_hwerrmask;
-
-	/* We log some errors to EEPROM, check if we have any of those. */
-	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
-		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
-			ipath_inc_eeprom_err(dd, log_idx, 1);
-
-	/*
-	 * make sure we get this much out, unless told to be quiet,
-	 * it's a parity error we may recover from,
-	 * or it's occurred within the last 5 seconds
-	 */
-	if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
-		RXE_EAGER_PARITY)) ||
-		(ipath_debug & __IPATH_VERBDBG))
-		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
-			 "(cleared)\n", (unsigned long long) hwerrs);
-	dd->ipath_lasthwerror |= hwerrs;
-
-	if (hwerrs & ~dd->ipath_hwe_bitsextant)
-		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
-			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~dd->ipath_hwe_bitsextant));
-
-	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
-		/*
-		 * parity errors in send memory are recoverable,
-		 * just cancel the send (if indicated in * sendbuffererror),
-		 * count the occurrence, unfreeze (if no other handled
-		 * hardware error bits are set), and continue. They can
-		 * occur if a processor speculative read is done to the PIO
-		 * buffer while we are sending a packet, for example.
-		 */
-		if (hwerrs & TXE_PIO_PARITY) {
-			ipath_ht_txe_recover(dd);
-			hwerrs &= ~TXE_PIO_PARITY;
-		}
-
-		if (!hwerrs) {
-			ipath_dbg("Clearing freezemode on ignored or "
-				  "recovered hardware error\n");
-			ipath_clear_freeze(dd);
-		}
-	}
-
-	*msg = '\0';
-
-	/*
-	 * may someday want to decode into which bits are which
-	 * functional area for parity errors, etc.
-	 */
-	if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
-		      << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-
-	ipath_format_hwerrors(hwerrs,
-			      ipath_6110_hwerror_msgs,
-			      ARRAY_SIZE(ipath_6110_hwerror_msgs),
-			      msg, msgl);
-
-	if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
-		hwerr_crcbits(dd, hwerrs, msg, msgl);
-
-	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
-			msgl);
-		/* ignore from now on, so disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |	\
-			 INFINIPATH_HWE_COREPLL_RFSLIP |	\
-			 INFINIPATH_HWE_HTBPLL_FBSLIP |		\
-			 INFINIPATH_HWE_HTBPLL_RFSLIP |		\
-			 INFINIPATH_HWE_HTAPLL_FBSLIP |		\
-			 INFINIPATH_HWE_HTAPLL_RFSLIP)
-
-	if (hwerrs & _IPATH_PLL_FAIL) {
-		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), InfiniPath hardware unusable]",
-			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
-		strlcat(msg, bitsmsg, msgl);
-		/* ignore from now on, so disable until driver reloaded */
-		dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
-		/*
-		 * If it occurs, it is left masked since the eternal
-		 * interface is unused
-		 */
-		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-				 dd->ipath_hwerrmask);
-	}
-
-	if (hwerrs) {
-		/*
-		 * if any set that we aren't ignoring; only
-		 * make the complaint once, in case it's stuck
-		 * or recurring, and we get here multiple
-		 * times.
-		 * force link down, so switch knows, and
-		 * LEDs are turned off
-		 */
-		if (dd->ipath_flags & IPATH_INITTED) {
-			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-			ipath_setup_ht_setextled(dd,
-				INFINIPATH_IBCS_L_STATE_DOWN,
-				INFINIPATH_IBCS_LT_STATE_DISABLED);
-			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-					  "mode), no longer usable, SN %.16s\n",
-					  dd->ipath_serial);
-			isfatal = 1;
-		}
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		/* mark as having had error */
-		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-		/*
-		 * mark as not usable, at a minimum until driver
-		 * is reloaded, probably until reboot, since no
-		 * other reset is possible.
-		 */
-		dd->ipath_flags &= ~IPATH_INITTED;
-	} else {
-		*msg = 0; /* recovered from all of them */
-	}
-	if (*msg)
-		ipath_dev_err(dd, "%s hardware error\n", msg);
-	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
-		/*
-		 * for status file; if no trailing brace is copied,
-		 * we'll know it was truncated.
-		 */
-		snprintf(dd->ipath_freezemsg,
-			 dd->ipath_freezelen, "{%s}", msg);
-
-bail:;
-}
-
-/**
- * ipath_ht_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * fill in the board name, based on the board revision register
- */
-static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen)
-{
-	char *n = NULL;
-	u8 boardrev = dd->ipath_boardrev;
-	int ret = 0;
-
-	switch (boardrev) {
-	case 5:
-		/*
-		 * original production board; two production levels, with
-		 * different serial number ranges.   See ipath_ht_early_init() for
-		 * case where we enable IPATH_GPIO_INTR for later serial # range.
-		 * Original 112* serial number is no longer supported.
-		 */
-		n = "InfiniPath_QHT7040";
-		break;
-	case 7:
-		/* small form factor production board */
-		n = "InfiniPath_QHT7140";
-		break;
-	default:		/* don't know, just print the number */
-		ipath_dev_err(dd, "Don't yet know about board "
-			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
-			 boardrev);
-		break;
-	}
-	if (n)
-		snprintf(name, namelen, "%s", n);
-
-	if (ret) {
-		ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-		goto bail;
-	}
-	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-		dd->ipath_minrev > 4)) {
-		/*
-		 * This version of the driver only supports Rev 3.2 - 3.4
-		 */
-		ipath_dev_err(dd,
-			      "Unsupported InfiniPath hardware revision %u.%u!\n",
-			      dd->ipath_majrev, dd->ipath_minrev);
-		ret = 1;
-		goto bail;
-	}
-	/*
-	 * pkt/word counters are 32 bit, and therefore wrap fast enough
-	 * that we snapshot them from a timer, and maintain 64 bit shadow
-	 * copies
-	 */
-	dd->ipath_flags |= IPATH_32BITCOUNTERS;
-	dd->ipath_flags |= IPATH_GPIO_INTR;
-	if (dd->ipath_lbus_speed != 800)
-		ipath_dev_err(dd,
-			      "Incorrectly configured for HT @ %uMHz\n",
-			      dd->ipath_lbus_speed);
-
-	/*
-	 * set here, not in ipath_init_*_funcs because we have to do
-	 * it after we can read chip registers.
-	 */
-	dd->ipath_ureg_align =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
-bail:
-	return ret;
-}
-
-static void ipath_check_htlink(struct ipath_devdata *dd)
-{
-	u8 linkerr, link_off, i;
-
-	for (i = 0; i < 2; i++) {
-		link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
-		if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
-			dev_info(&dd->pcidev->dev, "Couldn't read "
-				 "linkerror%d of HT slave/primary block\n",
-				 i);
-		else if (linkerr & 0xf0) {
-			ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-				   "clearing\n", linkerr >> 4, i);
-			/*
-			 * writing the linkerr bits that are set should
-			 * clear them
-			 */
-			if (pci_write_config_byte(dd->pcidev, link_off,
-						  linkerr))
-				ipath_dbg("Failed write to clear HT "
-					  "linkerror%d\n", i);
-			if (pci_read_config_byte(dd->pcidev, link_off,
-						 &linkerr))
-				dev_info(&dd->pcidev->dev,
-					 "Couldn't reread linkerror%d of "
-					 "HT slave/primary block\n", i);
-			else if (linkerr & 0xf0)
-				dev_info(&dd->pcidev->dev,
-					 "HT linkerror%d bits 0x%x "
-					 "couldn't be cleared\n",
-					 i, linkerr >> 4);
-		}
-	}
-}
-
-static int ipath_setup_ht_reset(struct ipath_devdata *dd)
-{
-	ipath_dbg("No reset possible for this InfiniPath hardware\n");
-	return 0;
-}
-
-#define HT_INTR_DISC_CONFIG  0x80	/* HT interrupt and discovery cap */
-#define HT_INTR_REG_INDEX    2	/* intconfig requires indirect accesses */
-
-/*
- * Bits 13-15 of command==0 is slave/primary block.  Clear any HT CRC
- * errors.  We only bother to do this at load time, because it's OK if
- * it happened before we were loaded (first time after boot/reset),
- * but any time after that, it's fatal anyway.  Also need to not check
- * for upper byte errors if we are in 8 bit mode, so figure out
- * our width.  For now, at least, also complain if it's 8 bit.
- */
-static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
-			     int pos, u8 cap_type)
-{
-	u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
-	u16 linkctrl = 0;
-	int i;
-
-	dd->ipath_ht_slave_off = pos;
-	/* command word, master_host bit */
-	/* master host || slave */
-	if ((cap_type >> 2) & 1)
-		link_a_b_off = 4;
-	else
-		link_a_b_off = 0;
-	ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
-		   link_a_b_off ? 1 : 0,
-		   link_a_b_off ? 'B' : 'A');
-
-	link_a_b_off += pos;
-
-	/*
-	 * check both link control registers; clear both HT CRC sets if
-	 * necessary.
-	 */
-	for (i = 0; i < 2; i++) {
-		link_off = pos + i * 4 + 0x4;
-		if (pci_read_config_word(pdev, link_off, &linkctrl))
-			ipath_dev_err(dd, "Couldn't read HT link control%d "
-				      "register\n", i);
-		else if (linkctrl & (0xf << 8)) {
-			ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
-				   "bits %x\n", i, linkctrl & (0xf << 8));
-			/*
-			 * now write them back to clear the error.
-			 */
-			pci_write_config_word(pdev, link_off,
-					      linkctrl & (0xf << 8));
-		}
-	}
-
-	/*
-	 * As with HT CRC bits, same for protocol errors that might occur
-	 * during boot.
-	 */
-	for (i = 0; i < 2; i++) {
-		link_off = pos + i * 4 + 0xd;
-		if (pci_read_config_byte(pdev, link_off, &linkerr))
-			dev_info(&pdev->dev, "Couldn't read linkerror%d "
-				 "of HT slave/primary block\n", i);
-		else if (linkerr & 0xf0) {
-			ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
-				   "clearing\n", linkerr >> 4, i);
-			/*
-			 * writing the linkerr bits that are set will clear
-			 * them
-			 */
-			if (pci_write_config_byte
-			    (pdev, link_off, linkerr))
-				ipath_dbg("Failed write to clear HT "
-					  "linkerror%d\n", i);
-			if (pci_read_config_byte(pdev, link_off, &linkerr))
-				dev_info(&pdev->dev, "Couldn't reread "
-					 "linkerror%d of HT slave/primary "
-					 "block\n", i);
-			else if (linkerr & 0xf0)
-				dev_info(&pdev->dev, "HT linkerror%d bits "
-					 "0x%x couldn't be cleared\n",
-					 i, linkerr >> 4);
-		}
-	}
-
-	/*
-	 * this is just for our link to the host, not devices connected
-	 * through tunnel.
-	 */
-
-	if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
-		ipath_dev_err(dd, "Couldn't read HT link width "
-			      "config register\n");
-	else {
-		u32 width;
-		switch (linkwidth & 7) {
-		case 5:
-			width = 4;
-			break;
-		case 4:
-			width = 2;
-			break;
-		case 3:
-			width = 32;
-			break;
-		case 1:
-			width = 16;
-			break;
-		case 0:
-		default:	/* if wrong, assume 8 bit */
-			width = 8;
-			break;
-		}
-
-		dd->ipath_lbus_width = width;
-
-		if (linkwidth != 0x11) {
-			ipath_dev_err(dd, "Not configured for 16 bit HT "
-				      "(%x)\n", linkwidth);
-			if (!(linkwidth & 0xf)) {
-				ipath_dbg("Will ignore HT lane1 errors\n");
-				dd->ipath_flags |= IPATH_8BIT_IN_HT0;
-			}
-		}
-	}
-
-	/*
-	 * this is just for our link to the host, not devices connected
-	 * through tunnel.
-	 */
-	if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
-		ipath_dev_err(dd, "Couldn't read HT link frequency "
-			      "config register\n");
-	else {
-		u32 speed;
-		switch (linkwidth & 0xf) {
-		case 6:
-			speed = 1000;
-			break;
-		case 5:
-			speed = 800;
-			break;
-		case 4:
-			speed = 600;
-			break;
-		case 3:
-			speed = 500;
-			break;
-		case 2:
-			speed = 400;
-			break;
-		case 1:
-			speed = 300;
-			break;
-		default:
-			/*
-			 * assume reserved and vendor-specific are 200...
-			 */
-		case 0:
-			speed = 200;
-			break;
-		}
-		dd->ipath_lbus_speed = speed;
-	}
-
-	snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
-		"HyperTransport,%uMHz,x%u\n",
-		dd->ipath_lbus_speed,
-		dd->ipath_lbus_width);
-}
-
-static int ipath_ht_intconfig(struct ipath_devdata *dd)
-{
-	int ret;
-
-	if (dd->ipath_intconfig) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
-				 dd->ipath_intconfig);	/* interrupt address */
-		ret = 0;
-	} else {
-		ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
-			      "interrupt address\n");
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
-				struct ht_irq_msg *msg)
-{
-	struct ipath_devdata *dd = pci_get_drvdata(dev);
-	u64 prev_intconfig = dd->ipath_intconfig;
-
-	dd->ipath_intconfig = msg->address_lo;
-	dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
-
-	/*
-	 * If the previous value of dd->ipath_intconfig is zero, we're
-	 * getting configured for the first time, and must not program the
-	 * intconfig register here (it will be programmed later, when the
-	 * hardware is ready).  Otherwise, we should.
-	 */
-	if (prev_intconfig)
-		ipath_ht_intconfig(dd);
-}
-
-/**
- * ipath_setup_ht_config - setup the interruptconfig register
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * setup the interruptconfig register from the HT config info.
- * Also clear CRC errors in HT linkcontrol, if necessary.
- * This is done only for the real hardware.  It is done before
- * chip address space is initted, so can't touch infinipath registers
- */
-static int ipath_setup_ht_config(struct ipath_devdata *dd,
-				 struct pci_dev *pdev)
-{
-	int pos, ret;
-
-	ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
-	if (ret < 0) {
-		ipath_dev_err(dd, "Couldn't create interrupt handler: "
-			      "err %d\n", ret);
-		goto bail;
-	}
-	dd->ipath_irq = ret;
-	ret = 0;
-
-	/*
-	 * Handle clearing CRC errors in linkctrl register if necessary.  We
-	 * do this early, before we ever enable errors or hardware errors,
-	 * mostly to avoid causing the chip to enter freeze mode.
-	 */
-	pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
-	if (!pos) {
-		ipath_dev_err(dd, "Couldn't find HyperTransport "
-			      "capability; no interrupts\n");
-		ret = -ENODEV;
-		goto bail;
-	}
-	do {
-		u8 cap_type;
-
-		/*
-		 * The HT capability type byte is 3 bytes after the
-		 * capability byte.
-		 */
-		if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
-			dev_info(&pdev->dev, "Couldn't read config "
-				 "command @ %d\n", pos);
-			continue;
-		}
-		if (!(cap_type & 0xE0))
-			slave_or_pri_blk(dd, pdev, pos, cap_type);
-	} while ((pos = pci_find_next_capability(pdev, pos,
-						 PCI_CAP_ID_HT)));
-
-	dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Called during driver unload.
- * This is currently a nop for the HT chip, not for all chips
- */
-static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
-{
-}
-
-/**
- * ipath_setup_ht_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * Set the state of the two external LEDs, to indicate physical and
- * logical state of IB link.   For this chip (at least with recommended
- * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
- * (logical state)
- *
- * Note:  We try to match the Mellanox HCA LED behavior as best
- * we can.  Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate.  That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
-				     u64 lst, u64 ltst)
-{
-	u64 extctl;
-	unsigned long flags = 0;
-
-	/* the diags use the LED to indicate diag info, so we leave
-	 * the external LED alone when the diags are running */
-	if (ipath_diag_inuse)
-		return;
-
-	/* Allow override of LED display for, e.g. Locating system in rack */
-	if (dd->ipath_led_override) {
-		ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
-			? INFINIPATH_IBCS_LT_STATE_LINKUP
-			: INFINIPATH_IBCS_LT_STATE_DISABLED;
-		lst = (dd->ipath_led_override & IPATH_LED_LOG)
-			? INFINIPATH_IBCS_L_STATE_ACTIVE
-			: INFINIPATH_IBCS_L_STATE_DOWN;
-	}
-
-	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
-	/*
-	 * start by setting both LED control bits to off, then turn
-	 * on the appropriate bit(s).
-	 */
-	if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
-		/*
-		 * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
-		 * is inverted,  because it is normally used to indicate
-		 * a hardware fault at reset, if there were errors
-		 */
-		extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
-			| INFINIPATH_EXTC_LEDGBLERR_OFF;
-		if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-			extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
-		if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-			extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
-	} else {
-		extctl = dd->ipath_extctrl &
-			~(INFINIPATH_EXTC_LED1PRIPORT_ON |
-			  INFINIPATH_EXTC_LED2PRIPORT_ON);
-		if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
-			extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
-		if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
-			extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
-	}
-	dd->ipath_extctrl = extctl;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
-	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-static void ipath_init_ht_variables(struct ipath_devdata *dd)
-{
-	/*
-	 * setup the register offsets, since they are different for each
-	 * chip
-	 */
-	dd->ipath_kregs = &ipath_ht_kregs;
-	dd->ipath_cregs = &ipath_ht_cregs;
-
-	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
-	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
-	/*
-	 * Fill in data for field-values that change in newer chips.
-	 * We dynamically specify only the mask for LINKTRAININGSTATE
-	 * and only the shift for LINKSTATE, as they are the only ones
-	 * that change.  Also precalculate the 3 link states of interest
-	 * and the combined mask.
-	 */
-	dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
-	dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
-	dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
-		dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
-	dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
-	dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
-	dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
-		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
-		(INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
-	/*
-	 * Fill in data for ibcc field-values that change in newer chips.
-	 * We dynamically specify only the mask for LINKINITCMD
-	 * and only the shift for LINKCMD and MAXPKTLEN, as they are
-	 * the only ones that change.
-	 */
-	dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
-	dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
-	dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
-	/* Fill in shifts for RcvCtrl. */
-	dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
-	dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
-	dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
-	dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
-
-	dd->ipath_i_bitsextant =
-		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
-		(INFINIPATH_I_RCVAVAIL_MASK <<
-		 INFINIPATH_I_RCVAVAIL_SHIFT) |
-		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
-		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-
-	dd->ipath_e_bitsextant =
-		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
-		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
-		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
-		INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
-		INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
-		INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
-		INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-		INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
-		INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
-		INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
-		INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
-		INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
-		INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
-		INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
-		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
-		INFINIPATH_E_HARDWARE;
-
-	dd->ipath_hwe_bitsextant =
-		(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
-		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
-		(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-		 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
-		INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
-		INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
-		INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
-		INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
-		INFINIPATH_HWE_HTCMISCERR4 |
-		INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
-		INFINIPATH_HWE_HTCMISCERR7 |
-		INFINIPATH_HWE_HTCBUSTREQPARITYERR |
-		INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
-		INFINIPATH_HWE_HTCBUSIREQPARITYERR |
-		INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
-		INFINIPATH_HWE_MEMBISTFAILED |
-		INFINIPATH_HWE_COREPLL_FBSLIP |
-		INFINIPATH_HWE_COREPLL_RFSLIP |
-		INFINIPATH_HWE_HTBPLL_FBSLIP |
-		INFINIPATH_HWE_HTBPLL_RFSLIP |
-		INFINIPATH_HWE_HTAPLL_FBSLIP |
-		INFINIPATH_HWE_HTAPLL_RFSLIP |
-		INFINIPATH_HWE_SERDESPLLFAILED |
-		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
-		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-
-	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
-	dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
-	dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
-	/*
-	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
-	 * 2 is Some Misc, 3 is reserved for future.
-	 */
-	dd->ipath_eep_st_masks[0].hwerrs_to_log =
-		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
-		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
-	dd->ipath_eep_st_masks[1].hwerrs_to_log =
-		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
-		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
-	dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
-	dd->delay_mult = 2; /* SDR, 4X, can't change */
-
-	dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
-	dd->ipath_link_speed_supported = IPATH_IB_SDR;
-	dd->ipath_link_width_enabled = IB_WIDTH_4X;
-	dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
-	/* these can't change for this chip, so set once */
-	dd->ipath_link_width_active = dd->ipath_link_width_enabled;
-	dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
-}
-
-/**
- * ipath_ht_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
-{
-	ipath_err_t val;
-	u64 extsval;
-
-	extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
-	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
-		ipath_dev_err(dd, "MemBIST did not complete!\n");
-	if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
-		ipath_dbg("MemBIST corrected\n");
-
-	ipath_check_htlink(dd);
-
-	/* barring bugs, all hwerrors become interrupts, which can */
-	val = -1LL;
-	/* don't look at crc lane1 if 8 bit */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
-		val &= ~infinipath_hwe_htclnkabyte1crcerr;
-	/* don't look at crc lane1 if 8 bit */
-	if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
-		val &= ~infinipath_hwe_htclnkbbyte1crcerr;
-
-	/*
-	 * disable RXDSYNCMEMPARITY because external serdes is unused,
-	 * and therefore the logic will never be used or initialized,
-	 * and uninitialized state will normally result in this error
-	 * being asserted.  Similarly for the external serdess pll
-	 * lock signal.
-	 */
-	val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
-		 INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
-
-	/*
-	 * Disable MISCERR4 because of an inversion in the HT core
-	 * logic checking for errors that cause this bit to be set.
-	 * The errata can also cause the protocol error bit to be set
-	 * in the HT config space linkerror register(s).
-	 */
-	val &= ~INFINIPATH_HWE_HTCMISCERR4;
-
-	/*
-	 * PLL ignored because unused MDIO interface has a logic problem
-	 */
-	if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
-		val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-	dd->ipath_hwerrmask = val;
-}
-
-
-
-
-/**
- * ipath_ht_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
-{
-	u64 val, config1;
-	int ret = 0, change = 0;
-
-	ipath_dbg("Trying to bringup serdes\n");
-
-	if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
-	    INFINIPATH_HWE_SERDESPLLFAILED)
-	{
-		ipath_dbg("At start, serdes PLL failed bit set in "
-			  "hwerrstatus, clearing and continuing\n");
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-				 INFINIPATH_HWE_SERDESPLLFAILED);
-	}
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-	config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
-	ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
-		   "config1=%llx, sstatus=%llx xgxs %llx\n",
-		   (unsigned long long) val, (unsigned long long) config1,
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-	/* force reset on */
-	val |= INFINIPATH_SERDC0_RESET_PLL
-		/* | INFINIPATH_SERDC0_RESET_MASK */
-		;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-	udelay(15);		/* need pll reset set at least for a bit */
-
-	if (val & INFINIPATH_SERDC0_RESET_PLL) {
-		u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
-		/* set lane resets, and tx idle, during pll reset */
-		val2 |= INFINIPATH_SERDC0_RESET_MASK |
-			INFINIPATH_SERDC0_TXIDLE;
-		ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
-			   "%llx)\n", (unsigned long long) val2);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-				 val2);
-		/*
-		 * be sure chip saw it
-		 */
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		/*
-		 * need pll reset clear at least 11 usec before lane
-		 * resets cleared; give it a few more
-		 */
-		udelay(15);
-		val = val2;	/* for check below */
-	}
-
-	if (val & (INFINIPATH_SERDC0_RESET_PLL |
-		   INFINIPATH_SERDC0_RESET_MASK |
-		   INFINIPATH_SERDC0_TXIDLE)) {
-		val &= ~(INFINIPATH_SERDC0_RESET_PLL |
-			 INFINIPATH_SERDC0_RESET_MASK |
-			 INFINIPATH_SERDC0_TXIDLE);
-		/* clear them */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
-				 val);
-	}
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-	if (val & INFINIPATH_XGXS_RESET) {
-		/* normally true after boot */
-		val &= ~INFINIPATH_XGXS_RESET;
-		change = 1;
-	}
-	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
-	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
-		/* need to compensate for Tx inversion in partner */
-		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
-		         INFINIPATH_XGXS_RX_POL_SHIFT);
-		val |= dd->ipath_rx_pol_inv <<
-			INFINIPATH_XGXS_RX_POL_SHIFT;
-		change = 1;
-	}
-	if (change)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-	/* clear current and de-emphasis bits */
-	config1 &= ~0x0ffffffff00ULL;
-	/* set current to 20ma */
-	config1 |= 0x00000000000ULL;
-	/* set de-emphasis to -5.68dB */
-	config1 |= 0x0cccc000000ULL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
-	ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
-		   "config1=%llx, sstatus=%llx xgxs %llx\n",
-		   (unsigned long long) val, (unsigned long long) config1,
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
-		   (unsigned long long)
-		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
-	return ret;		/* for now, say we always succeeded */
-}
-
-/**
- * ipath_ht_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * driver is being unloaded
- */
-static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
-{
-	u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
-	val |= INFINIPATH_SERDC0_TXIDLE;
-	ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
-		  (unsigned long long) val);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to update
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_ht_put_tid(struct ipath_devdata *dd,
-			     u64 __iomem *tidptr, u32 type,
-			     unsigned long pa)
-{
-	if (!dd->ipath_kregbase)
-		return;
-
-	if (pa != dd->ipath_tidinvalid) {
-		if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
-			dev_info(&dd->pcidev->dev,
-				 "physaddr %lx has more than "
-				 "40 bits, using only 40!!!\n", pa);
-			pa &= INFINIPATH_RT_ADDR_MASK;
-		}
-		if (type == RCVHQ_RCV_TYPE_EAGER)
-			pa |= dd->ipath_tidtemplate;
-		else {
-			/* in words (fixed, full page).  */
-			u64 lenvalid = PAGE_SIZE >> 2;
-			lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-			pa |= lenvalid | INFINIPATH_RT_VALID;
-		}
-	}
-
-	writeq(pa, tidptr);
-}
-
-
-/**
- * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * Used from ipath_close(), and at chip initialization.
- */
-static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
-	u64 __iomem *tidbase;
-	int i;
-
-	if (!dd->ipath_kregbase)
-		return;
-
-	ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
-	/*
-	 * need to invalidate all of the expected TID entries for this
-	 * port, so we don't have valid entries that might somehow get
-	 * used (early in next use of this port, or through some bug)
-	 */
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvtidbase +
-				   port * dd->ipath_rcvtidcnt *
-				   sizeof(*tidbase));
-	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
-				 dd->ipath_tidinvalid);
-
-	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
-				   dd->ipath_rcvegrbase +
-				   port * dd->ipath_rcvegrcnt *
-				   sizeof(*tidbase));
-
-	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
-				 dd->ipath_tidinvalid);
-}
-
-/**
- * ipath_ht_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
-{
-	dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
-	dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
-	dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
-
-	/*
-	 * work around chip errata bug 7358, by marking invalid tids
-	 * as having max length
-	 */
-	dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
-		INFINIPATH_RT_BUFSIZE_SHIFT;
-}
-
-static int ipath_ht_early_init(struct ipath_devdata *dd)
-{
-	u32 __iomem *piobuf;
-	u32 pioincr, val32;
-	int i;
-
-	/*
-	 * one cache line; long IB headers will spill over into received
-	 * buffer
-	 */
-	dd->ipath_rcvhdrentsize = 16;
-	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
-
-	/*
-	 * For HT, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest
-	 * packet that we can send out.  To truly support a 4KB MTU,
-	 * we need to bump this to a large value.  To date, other than
-	 * testing, we have never encountered an HCA that can really
-	 * send 4KB MTU packets, so we do not handle that (we'll get
-	 * errors interrupts if we ever see one).
-	 */
-	dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
-
-	/*
-	 * the min() check here is currently a nop, but it may not
-	 * always be, depending on just how we do ipath_rcvegrbufsize
-	 */
-	dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
-				 dd->ipath_rcvegrbufsize);
-	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-	ipath_ht_tidtemplate(dd);
-
-	/*
-	 * zero all the TID entries at startup.  We do this for sanity,
-	 * in case of a previous driver crash of some kind, and also
-	 * because the chip powers up with these memories in an unknown
-	 * state.  Use portcnt, not cfgports, since this is for the
-	 * full chip, not for current (possibly different) configuration
-	 * value.
-	 * Chip Errata bug 6447
-	 */
-	for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
-		ipath_ht_clear_tids(dd, val32);
-
-	/*
-	 * write the pbc of each buffer, to be sure it's initialized, then
-	 * cancel all the buffers, and also abort any packets that might
-	 * have been in flight for some reason (the latter is for driver
-	 * unload/reload, but isn't a bad idea at first init).	PIO send
-	 * isn't enabled at this point, so there is no danger of sending
-	 * these out on the wire.
-	 * Chip Errata bug 6610
-	 */
-	piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
-				  dd->ipath_piobufbase);
-	pioincr = dd->ipath_palign / sizeof(*piobuf);
-	for (i = 0; i < dd->ipath_piobcnt2k; i++) {
-		/*
-		 * reasonable word count, just to init pbc
-		 */
-		writel(16, piobuf);
-		piobuf += pioincr;
-	}
-
-	ipath_get_eeprom_info(dd);
-	if (dd->ipath_boardrev == 5) {
-		/*
-		 * Later production QHT7040 has same changes as QHT7140, so
-		 * can use GPIO interrupts.  They have serial #'s starting
-		 * with 128, rather than 112.
-		 */
-		if (dd->ipath_serial[0] == '1' &&
-		    dd->ipath_serial[1] == '2' &&
-		    dd->ipath_serial[2] == '8')
-			dd->ipath_flags |= IPATH_GPIO_INTR;
-		else {
-			ipath_dev_err(dd, "Unsupported InfiniPath board "
-				"(serial number %.16s)!\n",
-				dd->ipath_serial);
-			return 1;
-		}
-	}
-
-	if (dd->ipath_minrev >= 4) {
-		/* Rev4+ reports extra errors via internal GPIO pins */
-		dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
-		dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-	}
-
-	return 0;
-}
-
-
-/**
- * ipath_init_ht_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
-	struct ipath_base_info *kinfo = kbase;
-
-	kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-		IPATH_RUNTIME_PIO_REGSWAPPED;
-
-	if (pd->port_dd->ipath_minrev < 4)
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
-
-	return 0;
-}
-
-static void ipath_ht_free_irq(struct ipath_devdata *dd)
-{
-	free_irq(dd->ipath_irq, dd);
-	ht_destroy_irq(dd->ipath_irq);
-	dd->ipath_irq = 0;
-	dd->ipath_intconfig = 0;
-}
-
-static struct ipath_message_header *
-ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
-	return (struct ipath_message_header *)
-		&rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
-	dd->ipath_portcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-	dd->ipath_p0_rcvegrcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_ht_read_counters(struct ipath_devdata *dd,
-				   struct infinipath_counters *cntrs)
-{
-	cntrs->LBIntCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
-	cntrs->LBFlowStallCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
-	cntrs->TxSDmaDescCnt = 0;
-	cntrs->TxUnsupVLErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
-	cntrs->TxDataPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
-	cntrs->TxFlowPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
-	cntrs->TxDwordCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
-	cntrs->TxLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
-	cntrs->TxMaxMinLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
-	cntrs->TxUnderrunCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
-	cntrs->TxFlowStallCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
-	cntrs->TxDroppedPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
-	cntrs->RxDroppedPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
-	cntrs->RxDataPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
-	cntrs->RxFlowPktCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
-	cntrs->RxDwordCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
-	cntrs->RxLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
-	cntrs->RxMaxMinLenErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
-	cntrs->RxICRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
-	cntrs->RxVCRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
-	cntrs->RxFlowCtrlErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
-	cntrs->RxBadFormatCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
-	cntrs->RxLinkProblemCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
-	cntrs->RxEBPCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
-	cntrs->RxLPCRCErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
-	cntrs->RxBufOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
-	cntrs->RxTIDFullErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
-	cntrs->RxTIDValidErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
-	cntrs->RxPKeyMismatchCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
-	cntrs->RxP0HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
-	cntrs->RxP1HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
-	cntrs->RxP2HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
-	cntrs->RxP3HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
-	cntrs->RxP4HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
-	cntrs->RxP5HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
-	cntrs->RxP6HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
-	cntrs->RxP7HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
-	cntrs->RxP8HdrEgrOvflCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
-	cntrs->RxP9HdrEgrOvflCnt = 0;
-	cntrs->RxP10HdrEgrOvflCnt = 0;
-	cntrs->RxP11HdrEgrOvflCnt = 0;
-	cntrs->RxP12HdrEgrOvflCnt = 0;
-	cntrs->RxP13HdrEgrOvflCnt = 0;
-	cntrs->RxP14HdrEgrOvflCnt = 0;
-	cntrs->RxP15HdrEgrOvflCnt = 0;
-	cntrs->RxP16HdrEgrOvflCnt = 0;
-	cntrs->IBStatusChangeCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
-	cntrs->IBLinkErrRecoveryCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
-	cntrs->IBLinkDownedCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
-	cntrs->IBSymbolErrCnt =
-		ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
-	cntrs->RxVL15DroppedPktCnt = 0;
-	cntrs->RxOtherLocalPhyErrCnt = 0;
-	cntrs->PcieRetryBufDiagQwordCnt = 0;
-	cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
-	cntrs->LocalLinkIntegrityErrCnt =
-		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		dd->ipath_lli_errs : dd->ipath_lli_errors;
-	cntrs->RxVlErrCnt = 0;
-	cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
-{
-	return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC).  Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining.  To do this right, we reset IBC
- * as well.
- */
-static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
-{
-	u64 val, prev_val;
-
-	prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
-	val = prev_val | INFINIPATH_XGXS_RESET;
-	prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-}
-
-
-static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
-	int ret;
-
-	switch (which) {
-	case IPATH_IB_CFG_LWID:
-		ret = dd->ipath_link_width_active;
-		break;
-	case IPATH_IB_CFG_SPD:
-		ret = dd->ipath_link_speed_active;
-		break;
-	case IPATH_IB_CFG_LWID_ENB:
-		ret = dd->ipath_link_width_enabled;
-		break;
-	case IPATH_IB_CFG_SPD_ENB:
-		ret = dd->ipath_link_speed_enabled;
-		break;
-	default:
-		ret =  -ENOTSUPP;
-		break;
-	}
-	return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
-	int ret = 0;
-
-	if (which == IPATH_IB_CFG_LWID_ENB)
-		dd->ipath_link_width_enabled = val;
-	else if (which == IPATH_IB_CFG_SPD_ENB)
-		dd->ipath_link_speed_enabled = val;
-	else
-		ret = -ENOTSUPP;
-	return ret;
-}
-
-
-static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
-	ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
-		ipath_ib_linktrstate(dd, ibcs));
-	return 0;
-}
-
-
-/**
- * ipath_init_iba6110_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
-{
-	dd->ipath_f_intrsetup = ipath_ht_intconfig;
-	dd->ipath_f_bus = ipath_setup_ht_config;
-	dd->ipath_f_reset = ipath_setup_ht_reset;
-	dd->ipath_f_get_boardname = ipath_ht_boardname;
-	dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
-	dd->ipath_f_early_init = ipath_ht_early_init;
-	dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
-	dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
-	dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
-	dd->ipath_f_clear_tids = ipath_ht_clear_tids;
-	dd->ipath_f_put_tid = ipath_ht_put_tid;
-	dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
-	dd->ipath_f_setextled = ipath_setup_ht_setextled;
-	dd->ipath_f_get_base_info = ipath_ht_get_base_info;
-	dd->ipath_f_free_irq = ipath_ht_free_irq;
-	dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
-	dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
-	dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
-	dd->ipath_f_config_ports = ipath_ht_config_ports;
-	dd->ipath_f_read_counters = ipath_ht_read_counters;
-	dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
-	dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
-	dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
-	dd->ipath_f_config_jint = ipath_ht_config_jint;
-	dd->ipath_f_ib_updown = ipath_ht_ib_updown;
-
-	/*
-	 * initialize chip-specific variables
-	 */
-	ipath_init_ht_variables(dd);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644
index a5eea199f733..000000000000
--- a/drivers/staging/rdma/ipath/ipath_init_chip.c
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_kernel.h"
-#include "ipath_common.h"
-
-/*
- * min buffers we want to have per port, after driver
- */
-#define IPATH_MIN_USER_PORT_BUFCNT 7
-
-/*
- * Number of ports we are configured to use (to allow for more pio
- * buffers per port, etc.)  Zero means use chip value.
- */
-static ushort ipath_cfgports;
-
-module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
-MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
-
-/*
- * Number of buffers reserved for driver (verbs and layered drivers.)
- * Initialized based on number of PIO buffers if not set via module interface.
- * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.
- */
-static ushort ipath_kpiobufs;
-
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
-
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
-		  &ipath_kpiobufs, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
-
-/**
- * create_port0_egr - allocate the eager TID buffers
- * @dd: the infinipath device
- *
- * This code is now quite different for user and kernel, because
- * the kernel uses skb's, for the accelerated network performance.
- * This is the kernel (port0) version.
- *
- * Allocate the eager TID buffers and program them into infinipath.
- * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like verbs
- * packets, or pass the buffers up to the ipath layered driver and
- * thence the network layer, replacing them as we do so (see
- * ipath_rcv_layer()).
- */
-static int create_port0_egr(struct ipath_devdata *dd)
-{
-	unsigned e, egrcnt;
-	struct ipath_skbinfo *skbinfo;
-	int ret;
-
-	egrcnt = dd->ipath_p0_rcvegrcnt;
-
-	skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
-	if (skbinfo == NULL) {
-		ipath_dev_err(dd, "allocation error for eager TID "
-			      "skb array\n");
-		ret = -ENOMEM;
-		goto bail;
-	}
-	for (e = 0; e < egrcnt; e++) {
-		/*
-		 * This is a bit tricky in that we allocate extra
-		 * space for 2 bytes of the 14 byte ethernet header.
-		 * These two bytes are passed in the ipath header so
-		 * the rest of the data is word aligned.  We allocate
-		 * 4 bytes so that the data buffer stays word aligned.
-		 * See ipath_kreceive() for more details.
-		 */
-		skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
-		if (!skbinfo[e].skb) {
-			ipath_dev_err(dd, "SKB allocation error for "
-				      "eager TID %u\n", e);
-			while (e != 0)
-				dev_kfree_skb(skbinfo[--e].skb);
-			vfree(skbinfo);
-			ret = -ENOMEM;
-			goto bail;
-		}
-	}
-	/*
-	 * After loop above, so we can test non-NULL to see if ready
-	 * to use at receive, etc.
-	 */
-	dd->ipath_port0_skbinfo = skbinfo;
-
-	for (e = 0; e < egrcnt; e++) {
-		dd->ipath_port0_skbinfo[e].phys =
-		  ipath_map_single(dd->pcidev,
-				   dd->ipath_port0_skbinfo[e].skb->data,
-				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
-		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase +
-				     dd->ipath_rcvegrbase),
-				    RCVHQ_RCV_TYPE_EAGER,
-				    dd->ipath_port0_skbinfo[e].phys);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int bringup_link(struct ipath_devdata *dd)
-{
-	u64 val, ibc;
-	int ret = 0;
-
-	/* hold IBC in reset */
-	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-
-	/*
-	 * set initial max size pkt IBC will send, including ICRC; it's the
-	 * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
-	 */
-	val = (dd->ipath_ibmaxlen >> 2) + 1;
-	ibc = val << dd->ibcc_mpl_shift;
-
-	/* flowcontrolwatermark is in units of KBytes */
-	ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
-	/*
-	 * How often flowctrl sent.  More or less in usecs; balance against
-	 * watermark value, so that in theory senders always get a flow
-	 * control update in time to not let the IB link go idle.
-	 */
-	ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
-	/* max error tolerance */
-	ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-	/* use "real" buffer space for */
-	ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
-	/* IB credit flow control. */
-	ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-	/* initially come up waiting for TS1, without sending anything. */
-	dd->ipath_ibcctrl = ibc;
-	/*
-	 * Want to start out with both LINKCMD and LINKINITCMD in NOP
-	 * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-	 * to stay a NOP. Flag that we are disabled, for the (unlikely)
-	 * case that some recovery path is trying to bring the link up
-	 * before we are ready.
-	 */
-	ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-		INFINIPATH_IBCC_LINKINITCMD_SHIFT;
-	dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
-	ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
-		   (unsigned long long) ibc);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
-
-	// be sure chip saw it
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-	ret = dd->ipath_f_bringup_serdes(dd);
-
-	if (ret)
-		dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
-			 "not usable\n");
-	else {
-		/* enable IBC */
-		dd->ipath_control |= INFINIPATH_C_LINKENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-				 dd->ipath_control);
-	}
-
-	return ret;
-}
-
-static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
-{
-	struct ipath_portdata *pd;
-
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (pd) {
-		pd->port_dd = dd;
-		pd->port_cnt = 1;
-		/* The port 0 pkey table is used by the layer interface. */
-		pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
-		pd->port_seq_cnt = 1;
-	}
-	return pd;
-}
-
-static int init_chip_first(struct ipath_devdata *dd)
-{
-	struct ipath_portdata *pd;
-	int ret = 0;
-	u64 val;
-
-	spin_lock_init(&dd->ipath_kernel_tid_lock);
-	spin_lock_init(&dd->ipath_user_tid_lock);
-	spin_lock_init(&dd->ipath_sendctrl_lock);
-	spin_lock_init(&dd->ipath_uctxt_lock);
-	spin_lock_init(&dd->ipath_sdma_lock);
-	spin_lock_init(&dd->ipath_gpio_lock);
-	spin_lock_init(&dd->ipath_eep_st_lock);
-	spin_lock_init(&dd->ipath_sdepb_lock);
-	mutex_init(&dd->ipath_eep_lock);
-
-	/*
-	 * skip cfgports stuff because we are not allocating memory,
-	 * and we don't want problems if the portcnt changed due to
-	 * cfgports.  We do still check and report a difference, if
-	 * not same (should be impossible).
-	 */
-	dd->ipath_f_config_ports(dd, ipath_cfgports);
-	if (!ipath_cfgports)
-		dd->ipath_cfgports = dd->ipath_portcnt;
-	else if (ipath_cfgports <= dd->ipath_portcnt) {
-		dd->ipath_cfgports = ipath_cfgports;
-		ipath_dbg("Configured to use %u ports out of %u in chip\n",
-			  dd->ipath_cfgports, ipath_read_kreg32(dd,
-			  dd->ipath_kregs->kr_portcnt));
-	} else {
-		dd->ipath_cfgports = dd->ipath_portcnt;
-		ipath_dbg("Tried to configured to use %u ports; chip "
-			  "only supports %u\n", ipath_cfgports,
-			  ipath_read_kreg32(dd,
-				  dd->ipath_kregs->kr_portcnt));
-	}
-	/*
-	 * Allocate full portcnt array, rather than just cfgports, because
-	 * cleanup iterates across all possible ports.
-	 */
-	dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
-			       GFP_KERNEL);
-
-	if (!dd->ipath_pd) {
-		ipath_dev_err(dd, "Unable to allocate portdata array, "
-			      "failing\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	pd = create_portdata0(dd);
-	if (!pd) {
-		ipath_dev_err(dd, "Unable to allocate portdata for port "
-			      "0, failing\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-	dd->ipath_pd[0] = pd;
-
-	dd->ipath_rcvtidcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-	dd->ipath_rcvtidbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-	dd->ipath_rcvegrcnt =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-	dd->ipath_rcvegrbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-	dd->ipath_palign =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-	dd->ipath_piobufbase =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
-	dd->ipath_piosize2k = val & ~0U;
-	dd->ipath_piosize4k = val >> 32;
-	if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
-		ipath_mtu4096 = 0; /* 4KB not supported by this chip */
-	dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
-	dd->ipath_piobcnt2k = val & ~0U;
-	dd->ipath_piobcnt4k = val >> 32;
-	dd->ipath_pio2kbase =
-		(u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
-				 (dd->ipath_piobufbase & 0xffffffff));
-	if (dd->ipath_piobcnt4k) {
-		dd->ipath_pio4kbase = (u32 __iomem *)
-			(((char __iomem *) dd->ipath_kregbase) +
-			 (dd->ipath_piobufbase >> 32));
-		/*
-		 * 4K buffers take 2 pages; we use roundup just to be
-		 * paranoid; we calculate it once here, rather than on
-		 * ever buf allocate
-		 */
-		dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
-					  dd->ipath_palign);
-		ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
-			  "(%x aligned)\n",
-			  dd->ipath_piobcnt2k, dd->ipath_piosize2k,
-			  dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
-			  dd->ipath_piosize4k, dd->ipath_pio4kbase,
-			  dd->ipath_4kalign);
-	} else {
-		ipath_dbg("%u 2k piobufs @ %p\n",
-			  dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
-	}
-done:
-	return ret;
-}
-
-/**
- * init_chip_reset - re-initialize after a reset, or enable
- * @dd: the infinipath device
- *
- * sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explicitly, in case reset
- * failed
- */
-static int init_chip_reset(struct ipath_devdata *dd)
-{
-	u32 rtmp;
-	int i;
-	unsigned long flags;
-
-	/*
-	 * ensure chip does no sends or receives, tail updates, or
-	 * pioavail updates while we re-initialize
-	 */
-	dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
-	for (i = 0; i < dd->ipath_portcnt; i++) {
-		clear_bit(dd->ipath_r_portenable_shift + i,
-			  &dd->ipath_rcvctrl);
-		clear_bit(dd->ipath_r_intravail_shift + i,
-			  &dd->ipath_rcvctrl);
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-		dd->ipath_rcvctrl);
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl = 0U; /* no sdma, etc */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
-	if (rtmp != dd->ipath_rcvtidcnt)
-		dev_info(&dd->pcidev->dev, "tidcnt was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvtidcnt, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
-	if (rtmp != dd->ipath_rcvtidbase)
-		dev_info(&dd->pcidev->dev, "tidbase was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvtidbase, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-	if (rtmp != dd->ipath_rcvegrcnt)
-		dev_info(&dd->pcidev->dev, "egrcnt was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvegrcnt, rtmp);
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
-	if (rtmp != dd->ipath_rcvegrbase)
-		dev_info(&dd->pcidev->dev, "egrbase was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_rcvegrbase, rtmp);
-
-	return 0;
-}
-
-static int init_pioavailregs(struct ipath_devdata *dd)
-{
-	int ret;
-
-	dd->ipath_pioavailregs_dma = dma_alloc_coherent(
-		&dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
-		GFP_KERNEL);
-	if (!dd->ipath_pioavailregs_dma) {
-		ipath_dev_err(dd, "failed to allocate PIOavail reg area "
-			      "in memory\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	/*
-	 * we really want L2 cache aligned, but for current CPUs of
-	 * interest, they are the same.
-	 */
-	dd->ipath_statusp = (u64 *)
-		((char *)dd->ipath_pioavailregs_dma +
-		 ((2 * L1_CACHE_BYTES +
-		   dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
-	/* copy the current value now that it's really allocated */
-	*dd->ipath_statusp = dd->_ipath_status;
-	/*
-	 * setup buffer to hold freeze msg, accessible to apps,
-	 * following statusp
-	 */
-	dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
-	/* and its length */
-	dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
-
-	ret = 0;
-
-done:
-	return ret;
-}
-
-/**
- * init_shadow_tids - allocate the shadow TID array
- * @dd: the infinipath device
- *
- * allocate the shadow TID array, so we can ipath_munlock previous
- * entries.  It may make more sense to move the pageshadow to the
- * port data structure, so we only allocate memory for ports actually
- * in use, since we at 8k per port, now.
- */
-static void init_shadow_tids(struct ipath_devdata *dd)
-{
-	struct page **pages;
-	dma_addr_t *addrs;
-
-	pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-			sizeof(struct page *));
-	if (!pages) {
-		ipath_dev_err(dd, "failed to allocate shadow page * "
-			      "array, no expected sends!\n");
-		dd->ipath_pageshadow = NULL;
-		return;
-	}
-
-	addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-			sizeof(dma_addr_t));
-	if (!addrs) {
-		ipath_dev_err(dd, "failed to allocate shadow dma handle "
-			      "array, no expected sends!\n");
-		vfree(pages);
-		dd->ipath_pageshadow = NULL;
-		return;
-	}
-
-	dd->ipath_pageshadow = pages;
-	dd->ipath_physshadow = addrs;
-}
-
-static void enable_chip(struct ipath_devdata *dd, int reinit)
-{
-	u32 val;
-	u64 rcvmask;
-	unsigned long flags;
-	int i;
-
-	if (!reinit)
-		init_waitqueue_head(&ipath_state_wait);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	/* Enable PIO send, and update of PIOavail regs to memory. */
-	dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
-		INFINIPATH_S_PIOBUFAVAILUPD;
-
-	/*
-	 * Set the PIO avail update threshold to host memory
-	 * on chips that support it.
-	 */
-	if (dd->ipath_pioupd_thresh)
-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/*
-	 * Enable kernel ports' receive and receive interrupt.
-	 * Other ports done as user opens and inits them.
-	 */
-	rcvmask = 1ULL;
-	dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
-		(rcvmask << dd->ipath_r_intravail_shift);
-	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
-		dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-
-	/*
-	 * now ready for use.  this should be cleared whenever we
-	 * detect a reset, or initiate one.
-	 */
-	dd->ipath_flags |= IPATH_INITTED;
-
-	/*
-	 * Init our shadow copies of head from tail values,
-	 * and write head values to match.
-	 */
-	val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
-
-	/* Initialize so we interrupt on next packet received */
-	ipath_write_ureg(dd, ur_rcvhdrhead,
-			 dd->ipath_rhdrhead_intr_off |
-			 dd->ipath_pd[0]->port_head, 0);
-
-	/*
-	 * by now pioavail updates to memory should have occurred, so
-	 * copy them into our working/shadow registers; this is in
-	 * case something went wrong with abort, but mostly to get the
-	 * initial values of the generation bit correct.
-	 */
-	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		__le64 pioavail;
-
-		/*
-		 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
-		 */
-		if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-			pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
-		else
-			pioavail = dd->ipath_pioavailregs_dma[i];
-		/*
-		 * don't need to worry about ipath_pioavailkernel here
-		 * because we will call ipath_chg_pioavailkernel() later
-		 * in initialization, to busy out buffers as needed
-		 */
-		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
-	}
-	/* can get counters, stats, etc. */
-	dd->ipath_flags |= IPATH_PRESENT;
-}
-
-static int init_housekeeping(struct ipath_devdata *dd, int reinit)
-{
-	char boardn[40];
-	int ret = 0;
-
-	/*
-	 * have to clear shadow copies of registers at init that are
-	 * not otherwise set here, or all kinds of bizarre things
-	 * happen with driver on chip reset
-	 */
-	dd->ipath_rcvhdrsize = 0;
-
-	/*
-	 * Don't clear ipath_flags as 8bit mode was set before
-	 * entering this func. However, we do set the linkstate to
-	 * unknown, so we can watch for a transition.
-	 * PRESENT is set because we want register reads to work,
-	 * and the kernel infrastructure saw it in config space;
-	 * We clear it if we have failures.
-	 */
-	dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
-	dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
-			     IPATH_LINKDOWN | IPATH_LINKINIT);
-
-	ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
-	dd->ipath_revision =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
-
-	/*
-	 * set up fundamental info we need to use the chip; we assume
-	 * if the revision reg and these regs are OK, we don't need to
-	 * special case the rest
-	 */
-	dd->ipath_sregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
-	dd->ipath_cregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
-	dd->ipath_uregbase =
-		ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
-	ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
-		   "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
-		   dd->ipath_uregbase, dd->ipath_cregbase);
-	if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
-	    || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
-		ipath_dev_err(dd, "Register read failures from chip, "
-			      "giving up initialization\n");
-		dd->ipath_flags &= ~IPATH_PRESENT;
-		ret = -ENODEV;
-		goto done;
-	}
-
-
-	/* clear diagctrl register, in case diags were running and crashed */
-	ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
-
-	/* clear the initial reset flag, in case first driver load */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-			 INFINIPATH_E_RESET);
-
-	ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
-		   (unsigned long long) dd->ipath_revision,
-		   dd->ipath_pcirev);
-
-	if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
-	     INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
-		ipath_dev_err(dd, "Driver only handles version %d, "
-			      "chip swversion is %d (%llx), failng\n",
-			      IPATH_CHIP_SWVERSION,
-			      (int)(dd->ipath_revision >>
-				    INFINIPATH_R_SOFTWARE_SHIFT) &
-			      INFINIPATH_R_SOFTWARE_MASK,
-			      (unsigned long long) dd->ipath_revision);
-		ret = -ENOSYS;
-		goto done;
-	}
-	dd->ipath_majrev = (u8) ((dd->ipath_revision >>
-				  INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
-				 INFINIPATH_R_CHIPREVMAJOR_MASK);
-	dd->ipath_minrev = (u8) ((dd->ipath_revision >>
-				  INFINIPATH_R_CHIPREVMINOR_SHIFT) &
-				 INFINIPATH_R_CHIPREVMINOR_MASK);
-	dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
-				    INFINIPATH_R_BOARDID_SHIFT) &
-				   INFINIPATH_R_BOARDID_MASK);
-
-	ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
-
-	snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-		 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
-		 "SW Compat %u\n",
-		 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
-		 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
-		 INFINIPATH_R_ARCH_MASK,
-		 dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
-		 (unsigned)(dd->ipath_revision >>
-			    INFINIPATH_R_SOFTWARE_SHIFT) &
-		 INFINIPATH_R_SOFTWARE_MASK);
-
-	ipath_dbg("%s", dd->ipath_boardversion);
-
-	if (ret)
-		goto done;
-
-	if (reinit)
-		ret = init_chip_reset(dd);
-	else
-		ret = init_chip_first(dd);
-
-done:
-	return ret;
-}
-
-static void verify_interrupt(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-
-	if (!dd)
-		return; /* being torn down */
-
-	/*
-	 * If we don't have any interrupts, let the user know and
-	 * don't bother checking again.
-	 */
-	if (dd->ipath_int_counter == 0) {
-		if (!dd->ipath_f_intr_fallback(dd))
-			dev_err(&dd->pcidev->dev, "No interrupts detected, "
-				"not usable.\n");
-		else /* re-arm the timer to see if fallback works */
-			mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
-	} else
-		ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
-			dd->ipath_int_counter);
-}
-
-/**
- * ipath_init_chip - do the actual initialization sequence on the chip
- * @dd: the infinipath device
- * @reinit: reinitializing, so don't allocate new memory
- *
- * Do the actual initialization sequence on the chip.  This is done
- * both from the init routine called from the PCI infrastructure, and
- * when we reset the chip, or detect that it was reset internally,
- * or it's administratively re-enabled.
- *
- * Memory allocation here and in called routines is only done in
- * the first case (reinit == 0).  We have to be careful, because even
- * without memory allocation, we need to re-write all the chip registers
- * TIDs, etc. after the reset or enable has completed.
- */
-int ipath_init_chip(struct ipath_devdata *dd, int reinit)
-{
-	int ret = 0;
-	u32 kpiobufs, defkbufs;
-	u32 piobufs, uports;
-	u64 val;
-	struct ipath_portdata *pd;
-	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-
-	ret = init_housekeeping(dd, reinit);
-	if (ret)
-		goto done;
-
-	/*
-	 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
-	 * but then it no longer nicely fits power of two, and since
-	 * we now use routines that backend onto __get_free_pages, the
-	 * rest would be wasted.
-	 */
-	dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
-			 dd->ipath_rcvhdrcnt);
-
-	/*
-	 * Set up the shadow copies of the piobufavail registers,
-	 * which we compare against the chip registers for now, and
-	 * the in memory DMA'ed copies of the registers.  This has to
-	 * be done early, before we calculate lastport, etc.
-	 */
-	piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/*
-	 * calc number of pioavail registers, and save it; we have 2
-	 * bits per buffer.
-	 */
-	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
-		/ (sizeof(u64) * BITS_PER_BYTE / 2);
-	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-	if (piobufs > 144)
-		defkbufs = 32 + dd->ipath_pioreserved;
-	else
-		defkbufs = 16 + dd->ipath_pioreserved;
-
-	if (ipath_kpiobufs && (ipath_kpiobufs +
-		(uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
-		int i = (int) piobufs -
-			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-		if (i < 1)
-			i = 1;
-		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
-			 "%d for kernel leaves too few for %d user ports "
-			 "(%d each); using %u\n", ipath_kpiobufs,
-			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
-		/*
-		 * shouldn't change ipath_kpiobufs, because could be
-		 * different for different devices...
-		 */
-		kpiobufs = i;
-	} else if (ipath_kpiobufs)
-		kpiobufs = ipath_kpiobufs;
-	else
-		kpiobufs = defkbufs;
-	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
-	dd->ipath_pbufsport =
-		uports ? dd->ipath_lastport_piobuf / uports : 0;
-	/* if not an even divisor, some user ports get extra buffers */
-	dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
-		(dd->ipath_pbufsport * uports);
-	if (dd->ipath_ports_extrabuf)
-		ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
-			"ports <= %u\n", dd->ipath_pbufsport,
-			dd->ipath_ports_extrabuf);
-	dd->ipath_lastpioindex = 0;
-	dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-	/* ipath_pioavailshadow initialized earlier */
-	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
-		   "each for %u user ports\n", kpiobufs,
-		   piobufs, dd->ipath_pbufsport, uports);
-	ret = dd->ipath_f_early_init(dd);
-	if (ret) {
-		ipath_dev_err(dd, "Early initialization failure\n");
-		goto done;
-	}
-
-	/*
-	 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-	 * done after early_init.
-	 */
-	dd->ipath_hdrqlast =
-		dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
-			 dd->ipath_rcvhdrentsize);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
-			 dd->ipath_rcvhdrsize);
-
-	if (!reinit) {
-		ret = init_pioavailregs(dd);
-		init_shadow_tids(dd);
-		if (ret)
-			goto done;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-			 dd->ipath_pioavailregs_phys);
-
-	/*
-	 * this is to detect s/w errors, which the h/w works around by
-	 * ignoring the low 6 bits of address, if it wasn't aligned.
-	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
-	if (val != dd->ipath_pioavailregs_phys) {
-		ipath_dev_err(dd, "Catastrophic software error, "
-			      "SendPIOAvailAddr written as %lx, "
-			      "read back as %llx\n",
-			      (unsigned long) dd->ipath_pioavailregs_phys,
-			      (unsigned long long) val);
-		ret = -EINVAL;
-		goto done;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
-
-	/*
-	 * make sure we are not in freeze, and PIO send enabled, so
-	 * writes to pbc happen
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
-
-	/*
-	 * before error clears, since we expect serdes pll errors during
-	 * this, the first time after reset
-	 */
-	if (bringup_link(dd)) {
-		dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
-		ret = -ENETDOWN;
-		goto done;
-	}
-
-	/*
-	 * clear any "expected" hwerrs from reset and/or initialization
-	 * clear any that aren't enabled (at least this once), and then
-	 * set the enable mask
-	 */
-	dd->ipath_f_init_hwerrors(dd);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
-			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
-			 dd->ipath_hwerrmask);
-
-	/* clear all */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
-	/* enable errors that are masked, at least this first time. */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-			 ~dd->ipath_maskederrs);
-	dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
-	dd->ipath_errormask =
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-	/* clear any interrupts up to this point (ints still not enabled) */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
-
-	dd->ipath_f_tidtemplate(dd);
-
-	/*
-	 * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
-	 * re-init, the simplest way to handle this is to free
-	 * existing, and re-allocate.
-	 * Need to re-create rest of port 0 portdata as well.
-	 */
-	pd = dd->ipath_pd[0];
-	if (reinit) {
-		struct ipath_portdata *npd;
-
-		/*
-		 * Alloc and init new ipath_portdata for port0,
-		 * Then free old pd. Could lead to fragmentation, but also
-		 * makes later support for hot-swap easier.
-		 */
-		npd = create_portdata0(dd);
-		if (npd) {
-			ipath_free_pddata(dd, pd);
-			dd->ipath_pd[0] = npd;
-			pd = npd;
-		} else {
-			ipath_dev_err(dd, "Unable to allocate portdata"
-				      " for port 0, failing\n");
-			ret = -ENOMEM;
-			goto done;
-		}
-	}
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = create_port0_egr(dd);
-	if (ret) {
-		ipath_dev_err(dd, "failed to allocate kernel port's "
-			      "rcvhdrq and/or egr bufs\n");
-		goto done;
-	} else {
-		enable_chip(dd, reinit);
-	}
-
-	/* after enable_chip, so pioavailshadow setup */
-	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
-
-	/*
-	 * Cancel any possible active sends from early driver load.
-	 * Follows early_init because some chips have to initialize
-	 * PIO buffers in early_init to avoid false parity errors.
-	 * After enable and ipath_chg_pioavailkernel so we can safely
-	 * enable pioavail updates and PIOENABLE; packets are now
-	 * ready to go out.
-	 */
-	ipath_cancel_sends(dd, 1);
-
-	if (!reinit) {
-		/*
-		 * Used when we close a port, for DMA already in flight
-		 * at close.
-		 */
-		dd->ipath_dummy_hdrq = dma_alloc_coherent(
-			&dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
-			&dd->ipath_dummy_hdrq_phys,
-			gfp_flags);
-		if (!dd->ipath_dummy_hdrq) {
-			dev_info(&dd->pcidev->dev,
-				"Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-				dd->ipath_pd[0]->port_rcvhdrq_size);
-			/* fallback to just 0'ing */
-			dd->ipath_dummy_hdrq_phys = 0UL;
-		}
-	}
-
-	/*
-	 * cause retrigger of pending interrupts ignored during init,
-	 * even if we had errors
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-
-	if (!dd->ipath_stats_timer_active) {
-		/*
-		 * first init, or after an admin disable/enable
-		 * set up stats retrieval timer, even if we had errors
-		 * in last portion of setup
-		 */
-		setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
-				(unsigned long)dd);
-		/* every 5 seconds; */
-		dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
-		/* takes ~16 seconds to overflow at full IB 4x bandwdith */
-		add_timer(&dd->ipath_stats_timer);
-		dd->ipath_stats_timer_active = 1;
-	}
-
-	/* Set up SendDMA if chip supports it */
-	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		ret = setup_sdma(dd);
-
-	/* Set up HoL state */
-	setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
-
-	dd->ipath_hol_state = IPATH_HOL_UP;
-
-done:
-	if (!ret) {
-		*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
-		if (!dd->ipath_f_intrsetup(dd)) {
-			/* now we can enable all interrupts from the chip */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-					 -1LL);
-			/* force re-interrupt of any pending interrupts. */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
-					 0ULL);
-			/* chip is usable; mark it as initialized */
-			*dd->ipath_statusp |= IPATH_STATUS_INITTED;
-
-			/*
-			 * setup to verify we get an interrupt, and fallback
-			 * to an alternate if necessary and possible
-			 */
-			if (!reinit) {
-				setup_timer(&dd->ipath_intrchk_timer,
-						verify_interrupt,
-						(unsigned long)dd);
-			}
-			dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
-			add_timer(&dd->ipath_intrchk_timer);
-		} else
-			ipath_dev_err(dd, "No interrupts enabled, couldn't "
-				      "setup interrupt address\n");
-
-		if (dd->ipath_cfgports > ipath_stats.sps_nports)
-			/*
-			 * sps_nports is a global, so, we set it to
-			 * the highest number of ports of any of the
-			 * chips we find; we never decrement it, at
-			 * least for now.  Since this might have changed
-			 * over disable/enable or prior to reset, always
-			 * do the check and potentially adjust.
-			 */
-			ipath_stats.sps_nports = dd->ipath_cfgports;
-	} else
-		ipath_dbg("Failed (%d) to initialize chip\n", ret);
-
-	/* if ret is non-zero, we probably should do some cleanup
-	   here... */
-	return ret;
-}
-
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
-{
-	struct ipath_devdata *dd;
-	unsigned long flags;
-	unsigned short val;
-	int ret;
-
-	ret = ipath_parse_ushort(str, &val);
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	if (ret < 0)
-		goto bail;
-
-	if (val == 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
-		if (dd->ipath_kregbase)
-			continue;
-		if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-			   (dd->ipath_cfgports *
-			    IPATH_MIN_USER_PORT_BUFCNT)))
-		{
-			ipath_dev_err(
-				dd,
-				"Allocating %d PIO bufs for kernel leaves "
-				"too few for %d user ports (%d each)\n",
-				val, dd->ipath_cfgports - 1,
-				IPATH_MIN_USER_PORT_BUFCNT);
-			ret = -EINVAL;
-			goto bail;
-		}
-		dd->ipath_lastport_piobuf =
-			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
-	}
-
-	ipath_kpiobufs = val;
-	ret = 0;
-bail:
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644
index 0403fa28ed8d..000000000000
--- a/drivers/staging/rdma/ipath/ipath_intr.c
+++ /dev/null
@@ -1,1271 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-
-/*
- * Called when we might have an error that is specific to a particular
- * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
-{
-	u32 piobcnt;
-	unsigned long sbuf[4];
-	/*
-	 * it's possible that sendbuffererror could have bits set; might
-	 * have already done this as a result of hardware error handling
-	 */
-	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/* read these before writing errorclear */
-	sbuf[0] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror);
-	sbuf[1] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-	if (piobcnt > 128)
-		sbuf[2] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-	if (piobcnt > 192)
-		sbuf[3] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-	else
-		sbuf[3] = 0;
-
-	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-		int i;
-		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
-			time_after(dd->ipath_lastcancel, jiffies)) {
-			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
-					  "SendbufErrs %lx %lx", sbuf[0],
-					  sbuf[1]);
-			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-				printk(" %lx %lx ", sbuf[2], sbuf[3]);
-			printk("\n");
-		}
-
-		for (i = 0; i < piobcnt; i++)
-			if (test_bit(i, sbuf))
-				ipath_disarm_piobufs(dd, i, 1);
-		/* ignore armlaunch errs for a bit */
-		dd->ipath_lastcancel = jiffies+3;
-	}
-}
-
-
-/* These are all rcv-related errors which we want to count for stats */
-#define E_SUM_PKTERRS \
-	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
-	 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
-	 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
-	 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-	 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
-	 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
-
-/* These are all send-related errors which we want to count for stats */
-#define E_SUM_ERRS \
-	(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
-	 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
-	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-	 INFINIPATH_E_INVALIDADDR)
-
-/*
- * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
- * errors not related to freeze and cancelling buffers.  Can't ignore
- * armlaunch because could get more while still cleaning up, and need
- * to cancel those as they happen.
- */
-#define E_SPKT_ERRS_IGNORE \
-	 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
-	 INFINIPATH_E_SPKTLEN)
-
-/*
- * these are errors that can occur when the link changes state while
- * a packet is being sent or received.  This doesn't cover things
- * like EBP or VCRC that can be the result of a sending having the
- * link change state, so we receive a "known bad" packet.
- */
-#define E_SUM_LINK_PKTERRS \
-	(INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
-	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
-	 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
-	 INFINIPATH_E_RUNEXPCHAR)
-
-static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	u64 ignore_this_time = 0;
-
-	ipath_disarm_senderrbufs(dd);
-	if ((errs & E_SUM_LINK_PKTERRS) &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		/*
-		 * This can happen when SMA is trying to bring the link
-		 * up, but the IB link changes state at the "wrong" time.
-		 * The IB logic then complains that the packet isn't
-		 * valid.  We don't want to confuse people, so we just
-		 * don't print them, except at debug
-		 */
-		ipath_dbg("Ignoring packet errors %llx, because link not "
-			  "ACTIVE\n", (unsigned long long) errs);
-		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-	}
-
-	return ignore_this_time;
-}
-
-/* generic hw error messages... */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
-	{ \
-		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
-			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
-		.msg = "TXE " #a " Memory Parity"	     \
-	}
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
-	{ \
-		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
-			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
-		.msg = "RXE " #a " Memory Parity"	     \
-	}
-
-static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
-	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
-	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
-
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
-	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
-
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
-	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
-};
-
-/**
- * ipath_format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
-	strlcat(msg, "[", msgl);
-	strlcat(msg, hwmsg, msgl);
-	strlcat(msg, "]", msgl);
-}
-
-/**
- * ipath_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void ipath_format_hwerrors(u64 hwerrs,
-			   const struct ipath_hwerror_msgs *hwerrmsgs,
-			   size_t nhwerrmsgs,
-			   char *msg, size_t msgl)
-{
-	int i;
-	const int glen =
-	    ARRAY_SIZE(ipath_generic_hwerror_msgs);
-
-	for (i=0; i<glen; i++) {
-		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
-			ipath_format_hwmsg(msg, msgl,
-					   ipath_generic_hwerror_msgs[i].msg);
-		}
-	}
-
-	for (i=0; i<nhwerrmsgs; i++) {
-		if (hwerrs & hwerrmsgs[i].mask) {
-			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-		}
-	}
-}
-
-/* return the strings for the most common link states */
-static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	char *ret;
-	u32 state;
-
-	state = ipath_ib_state(dd, ibcs);
-	if (state == dd->ib_init)
-		ret = "Init";
-	else if (state == dd->ib_arm)
-		ret = "Arm";
-	else if (state == dd->ib_active)
-		ret = "Active";
-	else
-		ret = "Down";
-	return ret;
-}
-
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
-{
-	struct ib_event event;
-
-	event.device = &dd->verbs_dev->ibdev;
-	event.element.port_num = 1;
-	event.event = ev;
-	ib_dispatch_event(&event);
-}
-
-static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-				     ipath_err_t errs)
-{
-	u32 ltstate, lstate, ibstate, lastlstate;
-	u32 init = dd->ib_init;
-	u32 arm = dd->ib_arm;
-	u32 active = dd->ib_active;
-	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-
-	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
-	ibstate = ipath_ib_state(dd, ibcs);
-	/* linkstate at last interrupt */
-	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
-	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
-
-	/*
-	 * Since going into a recovery state causes the link state to go
-	 * down and since recovery is transitory, it is better if we "miss"
-	 * ever seeing the link training state go into recovery (i.e.,
-	 * ignore this transition for link state special handling purposes)
-	 * without even updating ipath_lastibcstat.
-	 */
-	if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
-	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
-	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
-		goto done;
-
-	/*
-	 * if linkstate transitions into INIT from any of the various down
-	 * states, or if it transitions from any of the up (INIT or better)
-	 * states into any of the down states (except link recovery), then
-	 * call the chip-specific code to take appropriate actions.
-	 */
-	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
-		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
-		/* transitioned to UP */
-		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
-			/* link came up, so we must no longer be disabled */
-			dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
-			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
-			goto skip_ibchange; /* chip-code handled */
-		}
-	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
-		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
-		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
-		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
-		int handled;
-		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
-		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
-		if (handled) {
-			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
-			goto skip_ibchange; /* chip-code handled */
-		}
-	}
-
-	/*
-	 * Significant enough to always print and get into logs, if it was
-	 * unexpected.  If it was a requested state change, we'll have
-	 * already cleared the flags, so we won't print this warning
-	 */
-	if ((ibstate != arm && ibstate != active) &&
-	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-		dev_info(&dd->pcidev->dev, "Link state changed from %s "
-			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
-			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
-	}
-
-	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-		u32 lastlts;
-		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
-		/*
-		 * Ignore cycling back and forth from Polling.Active to
-		 * Polling.Quiet while waiting for the other end of the link
-		 * to come up, except to try and decide if we are connected
-		 * to a live IB device or not.  We will cycle back and
-		 * forth between them if no cable is plugged in, the other
-		 * device is powered off or disabled, etc.
-		 */
-		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
-		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-			if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
-			     (++dd->ipath_ibpollcnt == 40)) {
-				dd->ipath_flags |= IPATH_NOCABLE;
-				*dd->ipath_statusp |=
-					IPATH_STATUS_IB_NOCABLE;
-				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
-			}
-			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
-				ipath_ibcstatus_str[ltstate], ibstate);
-			goto skip_ibchange;
-		}
-	}
-
-	dd->ipath_ibpollcnt = 0; /* not poll*, now */
-	ipath_stats.sps_iblink++;
-
-	if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
-		u64 linkrecov;
-		linkrecov = ipath_snap_cntr(dd,
-			dd->ipath_cregs->cr_iblinkerrrecovcnt);
-		if (linkrecov != dd->ipath_lastlinkrecov) {
-			ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
-				(unsigned long long) ibcs,
-				ib_linkstate(dd, ibcs),
-				ipath_ibcstatus_str[ltstate],
-				(unsigned long long) linkrecov);
-			/* and no more until active again */
-			dd->ipath_lastlinkrecov = 0;
-			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
-			goto skip_ibchange;
-		}
-	}
-
-	if (ibstate == init || ibstate == arm || ibstate == active) {
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-		if (ibstate == init || ibstate == arm) {
-			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-			if (dd->ipath_flags & IPATH_LINKACTIVE)
-				signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		}
-		if (ibstate == arm) {
-			dd->ipath_flags |= IPATH_LINKARMED;
-			dd->ipath_flags &= ~(IPATH_LINKUNK |
-				IPATH_LINKINIT | IPATH_LINKDOWN |
-				IPATH_LINKACTIVE | IPATH_NOCABLE);
-			ipath_hol_down(dd);
-		} else  if (ibstate == init) {
-			/*
-			 * set INIT and DOWN.  Down is checked by
-			 * most of the other code, but INIT is
-			 * useful to know in a few places.
-			 */
-			dd->ipath_flags |= IPATH_LINKINIT |
-				IPATH_LINKDOWN;
-			dd->ipath_flags &= ~(IPATH_LINKUNK |
-				IPATH_LINKARMED | IPATH_LINKACTIVE |
-				IPATH_NOCABLE);
-			ipath_hol_down(dd);
-		} else {  /* active */
-			dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
-				dd->ipath_cregs->cr_iblinkerrrecovcnt);
-			*dd->ipath_statusp |=
-				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-			dd->ipath_flags |= IPATH_LINKACTIVE;
-			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				| IPATH_LINKDOWN | IPATH_LINKARMED |
-				IPATH_NOCABLE);
-			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-				ipath_restart_sdma(dd);
-			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-			/* LED active not handled in chip _f_updown */
-			dd->ipath_f_setextled(dd, lstate, ltstate);
-			ipath_hol_up(dd);
-		}
-
-		/*
-		 * print after we've already done the work, so as not to
-		 * delay the state changes and notifications, for debugging
-		 */
-		if (lstate == lastlstate)
-			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
-				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
-		else
-			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
-				  dd->ipath_unit, ib_linkstate(dd, ibcs),
-				  ipath_ibcstatus_str[ltstate],  ibstate);
-	} else { /* down */
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		dd->ipath_flags |= IPATH_LINKDOWN;
-		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				     | IPATH_LINKACTIVE |
-				     IPATH_LINKARMED);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		dd->ipath_lli_counter = 0;
-
-		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
-			ipath_cdbg(VERBOSE, "Unit %u link state down "
-				   "(state 0x%x), from %s\n",
-				   dd->ipath_unit, lstate,
-				   ib_linkstate(dd, dd->ipath_lastibcstat));
-		else
-			ipath_cdbg(LINKVERB, "Unit %u link state changed "
-				   "to %s (0x%x) from down (%x)\n",
-				   dd->ipath_unit,
-				   ipath_ibcstatus_str[ltstate],
-				   ibstate, lastlstate);
-	}
-
-skip_ibchange:
-	dd->ipath_lastibcstat = ibcs;
-done:
-	return;
-}
-
-static void handle_supp_msgs(struct ipath_devdata *dd,
-			     unsigned supp_msgs, char *msg, u32 msgsz)
-{
-	/*
-	 * Print the message unless it's ibc status change only, which
-	 * happens so often we never want to count it.
-	 */
-	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		int iserr;
-		ipath_err_t mask;
-		iserr = ipath_decode_err(dd, msg, msgsz,
-					 dd->ipath_lasterror &
-					 ~INFINIPATH_E_IBSTATUSCHANGED);
-
-		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
-
-		/* if we're in debug, then don't mask SDMADISABLED msgs */
-		if (ipath_debug & __IPATH_DBG)
-			mask &= ~INFINIPATH_E_SDMADISABLED;
-
-		if (dd->ipath_lasterror & ~mask)
-			ipath_dev_err(dd, "Suppressed %u messages for "
-				      "fast-repeating errors (%s) (%llx)\n",
-				      supp_msgs, msg,
-				      (unsigned long long)
-				      dd->ipath_lasterror);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal", for some
-			 * types of processes (mostly benchmarks) that send
-			 * huge numbers of messages, while not processing
-			 * them. So only complain about these at debug
-			 * level.
-			 */
-			if (iserr)
-				ipath_dbg("Suppressed %u messages for %s\n",
-					  supp_msgs, msg);
-			else
-				ipath_cdbg(ERRPKT,
-					"Suppressed %u messages for %s\n",
-					  supp_msgs, msg);
-		}
-	}
-}
-
-static unsigned handle_frequent_errors(struct ipath_devdata *dd,
-				       ipath_err_t errs, char *msg,
-				       u32 msgsz, int *noprint)
-{
-	unsigned long nc;
-	static unsigned long nextmsg_time;
-	static unsigned nmsgs, supp_msgs;
-
-	/*
-	 * Throttle back "fast" messages to no more than 10 per 5 seconds.
-	 * This isn't perfect, but it's a reasonable heuristic. If we get
-	 * more than 10, give a 6x longer delay.
-	 */
-	nc = jiffies;
-	if (nmsgs > 10) {
-		if (time_before(nc, nextmsg_time)) {
-			*noprint = 1;
-			if (!supp_msgs++)
-				nextmsg_time = nc + HZ * 3;
-		} else if (supp_msgs) {
-			handle_supp_msgs(dd, supp_msgs, msg, msgsz);
-			supp_msgs = 0;
-			nmsgs = 0;
-		}
-	} else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
-		nextmsg_time = nc + HZ / 2;
-	}
-
-	return supp_msgs;
-}
-
-static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	unsigned long flags;
-	int expected;
-
-	if (ipath_debug & __IPATH_DBG) {
-		char msg[128];
-		ipath_decode_err(dd, msg, sizeof msg, errs &
-			INFINIPATH_E_SDMAERRS);
-		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
-	}
-	if (ipath_debug & __IPATH_VERBDBG) {
-		unsigned long tl, hd, status, lengen;
-		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-		status = ipath_read_kreg64(dd
-			, dd->ipath_kregs->kr_senddmastatus);
-		lengen = ipath_read_kreg64(dd,
-			dd->ipath_kregs->kr_senddmalengen);
-		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
-			"lengen 0x%lx\n", tl, hd, status, lengen);
-	}
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!expected)
-		ipath_cancel_sends(dd, 1);
-}
-
-static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
-{
-	unsigned long flags;
-	int expected;
-
-	if ((istat & INFINIPATH_I_SDMAINT) &&
-	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		ipath_sdma_intr(dd);
-
-	if (istat & INFINIPATH_I_SDMADISABLED) {
-		expected = test_bit(IPATH_SDMA_ABORTING,
-			&dd->ipath_sdma_status);
-		ipath_dbg("%s SDmaDisabled intr\n",
-			expected ? "expected" : "unexpected");
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-		if (!expected)
-			ipath_cancel_sends(dd, 1);
-		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-	}
-}
-
-static int handle_hdrq_full(struct ipath_devdata *dd)
-{
-	int chkerrpkts = 0;
-	u32 hd, tl;
-	u32 i;
-
-	ipath_stats.sps_hdrqfull++;
-	for (i = 0; i < dd->ipath_cfgports; i++) {
-		struct ipath_portdata *pd = dd->ipath_pd[i];
-
-		if (i == 0) {
-			/*
-			 * For kernel receive queues, we just want to know
-			 * if there are packets in the queue that we can
-			 * process.
-			 */
-			if (pd->port_head != ipath_get_hdrqtail(pd))
-				chkerrpkts |= 1 << i;
-			continue;
-		}
-
-		/* Skip if user context is not open */
-		if (!pd || !pd->port_cnt)
-			continue;
-
-		/* Don't report the same point multiple times. */
-		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
-			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
-		else
-			tl = ipath_get_rcvhdrtail(pd);
-		if (tl == pd->port_lastrcvhdrqtail)
-			continue;
-
-		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
-		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
-			pd->port_lastrcvhdrqtail = tl;
-			pd->port_hdrqfull++;
-			/* flush hdrqfull so that poll() sees it */
-			wmb();
-			wake_up_interruptible(&pd->port_wait);
-		}
-	}
-
-	return chkerrpkts;
-}
-
-static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
-{
-	char msg[128];
-	u64 ignore_this_time = 0;
-	u64 iserr = 0;
-	int chkerrpkts = 0, noprint = 0;
-	unsigned supp_msgs;
-	int log_idx;
-
-	/*
-	 * don't report errors that are masked, either at init
-	 * (not set in ipath_errormask), or temporarily (set in
-	 * ipath_maskederrs)
-	 */
-	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
-
-	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
-		&noprint);
-
-	/* do these first, they are most important */
-	if (errs & INFINIPATH_E_HARDWARE) {
-		/* reuse same msg buf */
-		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
-	} else {
-		u64 mask;
-		for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
-			mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
-			if (errs & mask)
-				ipath_inc_eeprom_err(dd, log_idx, 1);
-		}
-	}
-
-	if (errs & INFINIPATH_E_SDMAERRS)
-		handle_sdma_errors(dd, errs);
-
-	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
-		ipath_dev_err(dd, "error interrupt with unknown errors "
-			      "%llx set\n", (unsigned long long)
-			      (errs & ~dd->ipath_e_bitsextant));
-
-	if (errs & E_SUM_ERRS)
-		ignore_this_time = handle_e_sum_errs(dd, errs);
-	else if ((errs & E_SUM_LINK_PKTERRS) &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		/*
-		 * This can happen when SMA is trying to bring the link
-		 * up, but the IB link changes state at the "wrong" time.
-		 * The IB logic then complains that the packet isn't
-		 * valid.  We don't want to confuse people, so we just
-		 * don't print them, except at debug
-		 */
-		ipath_dbg("Ignoring packet errors %llx, because link not "
-			  "ACTIVE\n", (unsigned long long) errs);
-		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
-	}
-
-	if (supp_msgs == 250000) {
-		int s_iserr;
-		/*
-		 * It's not entirely reasonable assuming that the errors set
-		 * in the last clear period are all responsible for the
-		 * problem, but the alternative is to assume it's the only
-		 * ones on this particular interrupt, which also isn't great
-		 */
-		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
-
-		dd->ipath_errormask &= ~dd->ipath_maskederrs;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 dd->ipath_errormask);
-		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
-					   dd->ipath_maskederrs);
-
-		if (dd->ipath_maskederrs &
-		    ~(INFINIPATH_E_RRCVEGRFULL |
-		      INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
-			ipath_dev_err(dd, "Temporarily disabling "
-			    "error(s) %llx reporting; too frequent (%s)\n",
-				(unsigned long long) dd->ipath_maskederrs,
-				msg);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal",
-			 * for some types of processes (mostly benchmarks)
-			 * that send huge numbers of messages, while not
-			 * processing them.  So only complain about
-			 * these at debug level.
-			 */
-			if (s_iserr)
-				ipath_dbg("Temporarily disabling reporting "
-				    "too frequent queue full errors (%s)\n",
-				    msg);
-			else
-				ipath_cdbg(ERRPKT,
-				    "Temporarily disabling reporting too"
-				    " frequent packet errors (%s)\n",
-				    msg);
-		}
-
-		/*
-		 * Re-enable the masked errors after around 3 minutes.  in
-		 * ipath_get_faststats().  If we have a series of fast
-		 * repeating but different errors, the interval will keep
-		 * stretching out, but that's OK, as that's pretty
-		 * catastrophic.
-		 */
-		dd->ipath_unmasktime = jiffies + HZ * 180;
-	}
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
-	if (ignore_this_time)
-		errs &= ~ignore_this_time;
-	if (errs & ~dd->ipath_lasterror) {
-		errs &= ~dd->ipath_lasterror;
-		/* never suppress duplicate hwerrors or ibstatuschange */
-		dd->ipath_lasterror |= errs &
-			~(INFINIPATH_E_HARDWARE |
-			  INFINIPATH_E_IBSTATUSCHANGED);
-	}
-
-	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
-		dd->ipath_spectriggerhit++;
-		ipath_dbg("%lu special trigger hits\n",
-			dd->ipath_spectriggerhit);
-	}
-
-	/* likely due to cancel; so suppress message unless verbose */
-	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
-		time_after(dd->ipath_lastcancel, jiffies)) {
-		/* armlaunch takes precedence; it often causes both. */
-		ipath_cdbg(VERBOSE,
-			"Suppressed %s error (%llx) after sendbuf cancel\n",
-			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
-			"armlaunch" : "sendpktlen", (unsigned long long)errs);
-		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
-	}
-
-	if (!errs)
-		return 0;
-
-	if (!noprint) {
-		ipath_err_t mask;
-		/*
-		 * The ones we mask off are handled specially below
-		 * or above.  Also mask SDMADISABLED by default as it
-		 * is too chatty.
-		 */
-		mask = INFINIPATH_E_IBSTATUSCHANGED |
-			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
-
-		/* if we're in debug, then don't mask SDMADISABLED msgs */
-		if (ipath_debug & __IPATH_DBG)
-			mask &= ~INFINIPATH_E_SDMADISABLED;
-
-		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
-	} else
-		/* so we don't need if (!noprint) at strlcat's below */
-		*msg = 0;
-
-	if (errs & E_SUM_PKTERRS) {
-		ipath_stats.sps_pkterrs++;
-		chkerrpkts = 1;
-	}
-	if (errs & E_SUM_ERRS)
-		ipath_stats.sps_errs++;
-
-	if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
-		ipath_stats.sps_crcerrs++;
-		chkerrpkts = 1;
-	}
-	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
-
-
-	/*
-	 * We don't want to print these two as they happen, or we can make
-	 * the situation even worse, because it takes so long to print
-	 * messages to serial consoles.  Kernel ports get printed from
-	 * fast_stats, no more than every 5 seconds, user ports get printed
-	 * on close
-	 */
-	if (errs & INFINIPATH_E_RRCVHDRFULL)
-		chkerrpkts |= handle_hdrq_full(dd);
-	if (errs & INFINIPATH_E_RRCVEGRFULL) {
-		struct ipath_portdata *pd = dd->ipath_pd[0];
-
-		/*
-		 * since this is of less importance and not likely to
-		 * happen without also getting hdrfull, only count
-		 * occurrences; don't check each port (or even the kernel
-		 * vs user)
-		 */
-		ipath_stats.sps_etidfull++;
-		if (pd->port_head != ipath_get_hdrqtail(pd))
-			chkerrpkts |= 1;
-	}
-
-	/*
-	 * do this before IBSTATUSCHANGED, in case both bits set in a single
-	 * interrupt; we want the STATUSCHANGE to "win", so we do our
-	 * internal copy of state machine correctly
-	 */
-	if (errs & INFINIPATH_E_RIBLOSTLINK) {
-		/*
-		 * force through block below
-		 */
-		errs |= INFINIPATH_E_IBSTATUSCHANGED;
-		ipath_stats.sps_iblink++;
-		dd->ipath_flags |= IPATH_LINKDOWN;
-		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
-				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-
-		ipath_dbg("Lost link, link now down (%s)\n",
-			ipath_ibcstatus_str[ipath_read_kreg64(dd,
-			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
-	}
-	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-		handle_e_ibstatuschanged(dd, errs);
-
-	if (errs & INFINIPATH_E_RESET) {
-		if (!noprint)
-			ipath_dev_err(dd, "Got reset, requires re-init "
-				      "(unload and reload driver)\n");
-		dd->ipath_flags &= ~IPATH_INITTED;	/* needs re-init */
-		/* mark as having had error */
-		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
-	}
-
-	if (!noprint && *msg) {
-		if (iserr)
-			ipath_dev_err(dd, "%s error\n", msg);
-	}
-	if (dd->ipath_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_state_wanted,
-			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_state_wait);
-	}
-
-	return chkerrpkts;
-}
-
-/*
- * try to cleanup as much as possible for anything that might have gone
- * wrong while in freeze mode, such as pio buffers being written by user
- * processes (causing armlaunch), send errors due to going into freeze mode,
- * etc., and try to avoid causing extra interrupts while doing so.
- * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it while in freeze mode (the register values
- * themselves are kept correct).
- * Make sure that we don't lose any important interrupts by using the chip
- * feature that says that writing 0 to a bit in *clear that is set in
- * *status will cause an interrupt to be generated again (if allowed by
- * the *mask value).
- */
-void ipath_clear_freeze(struct ipath_devdata *dd)
-{
-	/* disable error interrupts, to avoid confusion */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
-
-	/* also disable interrupts; errormask is sometimes overwriten */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-
-	ipath_cancel_sends(dd, 1);
-
-	/* clear the freeze, and be sure chip saw it */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			 dd->ipath_control);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
-	/* force in-memory update now we are out of freeze */
-	ipath_force_pio_avail_update(dd);
-
-	/*
-	 * force new interrupt if any hwerr, error or interrupt bits are
-	 * still set, and clear "safe" send packet errors related to freeze
-	 * and cancelling sends.  Re-enable error interrupts before possible
-	 * force of re-interrupt on pending interrupts.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
-		E_SPKT_ERRS_IGNORE);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-}
-
-
-/* this is separate to allow for better optimization of ipath_intr() */
-
-static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
-{
-	/*
-	 * sometimes happen during driver init and unload, don't want
-	 * to process any interrupts at that point
-	 */
-
-	/* this is just a bandaid, not a fix, if something goes badly
-	 * wrong */
-	if (++*unexpectp > 100) {
-		if (++*unexpectp > 105) {
-			/*
-			 * ok, we must be taking somebody else's interrupts,
-			 * due to a messed up mptable and/or PIRQ table, so
-			 * unregister the interrupt.  We've seen this during
-			 * linuxbios development work, and it may happen in
-			 * the future again.
-			 */
-			if (dd->pcidev && dd->ipath_irq) {
-				ipath_dev_err(dd, "Now %u unexpected "
-					      "interrupts, unregistering "
-					      "interrupt handler\n",
-					      *unexpectp);
-				ipath_dbg("free_irq of irq %d\n",
-					  dd->ipath_irq);
-				dd->ipath_f_free_irq(dd);
-			}
-		}
-		if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
-			ipath_dev_err(dd, "%u unexpected interrupts, "
-				      "disabling interrupts completely\n",
-				      *unexpectp);
-			/*
-			 * disable all interrupts, something is very wrong
-			 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
-					 0ULL);
-		}
-	} else if (*unexpectp > 1)
-		ipath_dbg("Interrupt when not ready, should not happen, "
-			  "ignoring\n");
-}
-
-static noinline void ipath_bad_regread(struct ipath_devdata *dd)
-{
-	static int allbits;
-
-	/* separate routine, for better optimization of ipath_intr() */
-
-	/*
-	 * We print the message and disable interrupts, in hope of
-	 * having a better chance of debugging the problem.
-	 */
-	ipath_dev_err(dd,
-		      "Read of interrupt status failed (all bits set)\n");
-	if (allbits++) {
-		/* disable all interrupts, something is very wrong */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
-		if (allbits == 2) {
-			ipath_dev_err(dd, "Still bad interrupt status, "
-				      "unregistering interrupt\n");
-			dd->ipath_f_free_irq(dd);
-		} else if (allbits > 2) {
-			if ((allbits % 10000) == 0)
-				printk(".");
-		} else
-			ipath_dev_err(dd, "Disabling interrupts, "
-				      "multiple errors\n");
-	}
-}
-
-static void handle_layer_pioavail(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int ret;
-
-	ret = ipath_ib_piobufavail(dd->verbs_dev);
-	if (ret > 0)
-		goto set;
-
-	return;
-set:
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-}
-
-/*
- * Handle receive interrupts for user ports; this means a user
- * process was waiting for a packet to arrive, and didn't want
- * to poll
- */
-static void handle_urcv(struct ipath_devdata *dd, u64 istat)
-{
-	u64 portr;
-	int i;
-	int rcvdint = 0;
-
-	/*
-	 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
-	 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
-	 * would both like timely updates of the bits so that
-	 * we don't pass them by unnecessarily.  the rmb()
-	 * here ensures that we see them promptly -- the
-	 * corresponding wmb()'s are in ipath_poll_urgent()
-	 * and ipath_poll_next()...
-	 */
-	rmb();
-	portr = ((istat >> dd->ipath_i_rcvavail_shift) &
-		 dd->ipath_i_rcvavail_mask) |
-		((istat >> dd->ipath_i_rcvurg_shift) &
-		 dd->ipath_i_rcvurg_mask);
-	for (i = 1; i < dd->ipath_cfgports; i++) {
-		struct ipath_portdata *pd = dd->ipath_pd[i];
-
-		if (portr & (1 << i) && pd && pd->port_cnt) {
-			if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
-					       &pd->port_flag)) {
-				clear_bit(i + dd->ipath_r_intravail_shift,
-					  &dd->ipath_rcvctrl);
-				wake_up_interruptible(&pd->port_wait);
-				rcvdint = 1;
-			} else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
-						      &pd->port_flag)) {
-				pd->port_urgent++;
-				wake_up_interruptible(&pd->port_wait);
-			}
-		}
-	}
-	if (rcvdint) {
-		/* only want to take one interrupt, so turn off the rcv
-		 * interrupt for all the ports that we set the rcv_waiting
-		 * (but never for kernel port)
-		 */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-				 dd->ipath_rcvctrl);
-	}
-}
-
-irqreturn_t ipath_intr(int irq, void *data)
-{
-	struct ipath_devdata *dd = data;
-	u64 istat, chk0rcv = 0;
-	ipath_err_t estat = 0;
-	irqreturn_t ret;
-	static unsigned unexpected = 0;
-	u64 kportrbits;
-
-	ipath_stats.sps_ints++;
-
-	if (dd->ipath_int_counter != (u32) -1)
-		dd->ipath_int_counter++;
-
-	if (!(dd->ipath_flags & IPATH_PRESENT)) {
-		/*
-		 * This return value is not great, but we do not want the
-		 * interrupt core code to remove our interrupt handler
-		 * because we don't appear to be handling an interrupt
-		 * during a chip reset.
-		 */
-		return IRQ_HANDLED;
-	}
-
-	/*
-	 * this needs to be flags&initted, not statusp, so we keep
-	 * taking interrupts even after link goes down, etc.
-	 * Also, we *must* clear the interrupt at some point, or we won't
-	 * take it again, which can be real bad for errors, etc...
-	 */
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		ipath_bad_intr(dd, &unexpected);
-		ret = IRQ_NONE;
-		goto bail;
-	}
-
-	istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
-
-	if (unlikely(!istat)) {
-		ipath_stats.sps_nullintr++;
-		ret = IRQ_NONE; /* not our interrupt, or already handled */
-		goto bail;
-	}
-	if (unlikely(istat == -1)) {
-		ipath_bad_regread(dd);
-		/* don't know if it was our interrupt or not */
-		ret = IRQ_NONE;
-		goto bail;
-	}
-
-	if (unexpected)
-		unexpected = 0;
-
-	if (unlikely(istat & ~dd->ipath_i_bitsextant))
-		ipath_dev_err(dd,
-			      "interrupt with unknown interrupts %Lx set\n",
-			      (unsigned long long)
-			      istat & ~dd->ipath_i_bitsextant);
-	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
-		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
-			(unsigned long long) istat);
-
-	if (istat & INFINIPATH_I_ERROR) {
-		ipath_stats.sps_errints++;
-		estat = ipath_read_kreg64(dd,
-					  dd->ipath_kregs->kr_errorstatus);
-		if (!estat)
-			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
-				 "but no error bits set!\n",
-				 (unsigned long long) istat);
-		else if (estat == -1LL)
-			/*
-			 * should we try clearing all, or hope next read
-			 * works?
-			 */
-			ipath_dev_err(dd, "Read of error status failed "
-				      "(all bits set); ignoring\n");
-		else
-			chk0rcv |= handle_errors(dd, estat);
-	}
-
-	if (istat & INFINIPATH_I_GPIO) {
-		/*
-		 * GPIO interrupts fall in two broad classes:
-		 * GPIO_2 indicates (on some HT4xx boards) that a packet
-		 *        has arrived for Port 0. Checking for this
-		 *        is controlled by flag IPATH_GPIO_INTR.
-		 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
-		 *        errors that we need to count. Checking for this
-		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
-		 */
-		u32 gpiostatus;
-		u32 to_clear = 0;
-
-		gpiostatus = ipath_read_kreg32(
-			dd, dd->ipath_kregs->kr_gpio_status);
-		/* First the error-counter case. */
-		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
-		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
-			/* want to clear the bits we see asserted. */
-			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
-
-			/*
-			 * Count appropriately, clear bits out of our copy,
-			 * as they have been "handled".
-			 */
-			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
-				ipath_dbg("FlowCtl on UnsupVL\n");
-				dd->ipath_rxfc_unsupvl_errs++;
-			}
-			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
-				ipath_dbg("Overrun Threshold exceeded\n");
-				dd->ipath_overrun_thresh_errs++;
-			}
-			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
-				ipath_dbg("Local Link Integrity error\n");
-				dd->ipath_lli_errs++;
-			}
-			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
-		}
-		/* Now the Port0 Receive case */
-		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
-		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
-			/*
-			 * GPIO status bit 2 is set, and we expected it.
-			 * clear it and indicate in p0bits.
-			 * This probably only happens if a Port0 pkt
-			 * arrives at _just_ the wrong time, and we
-			 * handle that by seting chk0rcv;
-			 */
-			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
-			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
-			chk0rcv = 1;
-		}
-		if (gpiostatus) {
-			/*
-			 * Some unexpected bits remain. If they could have
-			 * caused the interrupt, complain and clear.
-			 * To avoid repetition of this condition, also clear
-			 * the mask. It is almost certainly due to error.
-			 */
-			const u32 mask = (u32) dd->ipath_gpio_mask;
-
-			if (mask & gpiostatus) {
-				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
-				  gpiostatus & mask);
-				to_clear |= (gpiostatus & mask);
-				dd->ipath_gpio_mask &= ~(gpiostatus & mask);
-				ipath_write_kreg(dd,
-					dd->ipath_kregs->kr_gpio_mask,
-					dd->ipath_gpio_mask);
-			}
-		}
-		if (to_clear) {
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					(u64) to_clear);
-		}
-	}
-
-	/*
-	 * Clear the interrupt bits we found set, unless they are receive
-	 * related, in which case we already cleared them above, and don't
-	 * want to clear them again, because we might lose an interrupt.
-	 * Clear it early, so we "know" know the chip will have seen this by
-	 * the time we process the queue, and will re-interrupt if necessary.
-	 * The processor itself won't take the interrupt again until we return.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
-
-	/*
-	 * Handle kernel receive queues before checking for pio buffers
-	 * available since receives can overflow; piobuf waiters can afford
-	 * a few extra cycles, since they were waiting anyway, and user's
-	 * waiting for receive are at the bottom.
-	 */
-	kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
-		(1ULL << dd->ipath_i_rcvurg_shift);
-	if (chk0rcv || (istat & kportrbits)) {
-		istat &= ~kportrbits;
-		ipath_kreceive(dd->ipath_pd[0]);
-	}
-
-	if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
-		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
-		handle_urcv(dd, istat);
-
-	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
-		handle_sdma_intr(dd, istat);
-
-	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-		/* always process; sdma verbs uses PIO for acks and VL15  */
-		handle_layer_pioavail(dd);
-	}
-
-	ret = IRQ_HANDLED;
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644
index 66c934a5f839..000000000000
--- a/drivers/staging/rdma/ipath/ipath_kernel.h
+++ /dev/null
@@ -1,1374 +0,0 @@
-#ifndef _IPATH_KERNEL_H
-#define _IPATH_KERNEL_H
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for infinipath kernel code
- * ipath_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/sched.h>
-#include <asm/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_common.h"
-#include "ipath_debug.h"
-#include "ipath_registers.h"
-
-/* only s/w major version of InfiniPath we can handle */
-#define IPATH_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define IPATH_CHIP_VERS_MIN 0U
-
-/* temporary, maybe always */
-extern struct infinipath_stats ipath_stats;
-
-#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define IPATH_EEP_LOG_CNT (4)
-struct ipath_eep_log_mask {
-	u64 errs_to_log;
-	u64 hwerrs_to_log;
-};
-
-struct ipath_portdata {
-	void **port_rcvegrbuf;
-	dma_addr_t *port_rcvegrbuf_phys;
-	/* rcvhdrq base, needs mmap before useful */
-	void *port_rcvhdrq;
-	/* kernel virtual address where hdrqtail is updated */
-	void *port_rcvhdrtail_kvaddr;
-	/*
-	 * temp buffer for expected send setup, allocated at open, instead
-	 * of each setup call
-	 */
-	void *port_tid_pg_list;
-	/* when waiting for rcv or pioavail */
-	wait_queue_head_t port_wait;
-	/*
-	 * rcvegr bufs base, physical, must fit
-	 * in 44 bits so 32 bit programs mmap64 44 bit works)
-	 */
-	dma_addr_t port_rcvegr_phys;
-	/* mmap of hdrq, must fit in 44 bits */
-	dma_addr_t port_rcvhdrq_phys;
-	dma_addr_t port_rcvhdrqtailaddr_phys;
-	/*
-	 * number of opens (including slave subports) on this instance
-	 * (ignoring forks, dup, etc. for now)
-	 */
-	int port_cnt;
-	/*
-	 * how much space to leave at start of eager TID entries for
-	 * protocol use, on each TID
-	 */
-	/* instead of calculating it */
-	unsigned port_port;
-	/* non-zero if port is being shared. */
-	u16 port_subport_cnt;
-	/* non-zero if port is being shared. */
-	u16 port_subport_id;
-	/* number of pio bufs for this port (all procs, if shared) */
-	u32 port_piocnt;
-	/* first pio buffer for this port */
-	u32 port_pio_base;
-	/* chip offset of PIO buffers for this port */
-	u32 port_piobufs;
-	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
-	u32 port_rcvegrbuf_chunks;
-	/* how many egrbufs per chunk */
-	u32 port_rcvegrbufs_perchunk;
-	/* order for port_rcvegrbuf_pages */
-	size_t port_rcvegrbuf_size;
-	/* rcvhdrq size (for freeing) */
-	size_t port_rcvhdrq_size;
-	/* next expected TID to check when looking for free */
-	u32 port_tidcursor;
-	/* next expected TID to check */
-	unsigned long port_flag;
-	/* what happened */
-	unsigned long int_flag;
-	/* WAIT_RCV that timed out, no interrupt */
-	u32 port_rcvwait_to;
-	/* WAIT_PIO that timed out, no interrupt */
-	u32 port_piowait_to;
-	/* WAIT_RCV already happened, no wait */
-	u32 port_rcvnowait;
-	/* WAIT_PIO already happened, no wait */
-	u32 port_pionowait;
-	/* total number of rcvhdrqfull errors */
-	u32 port_hdrqfull;
-	/*
-	 * Used to suppress multiple instances of same
-	 * port staying stuck at same point.
-	 */
-	u32 port_lastrcvhdrqtail;
-	/* saved total number of rcvhdrqfull errors for poll edge trigger */
-	u32 port_hdrqfull_poll;
-	/* total number of polled urgent packets */
-	u32 port_urgent;
-	/* saved total number of polled urgent packets for poll edge trigger */
-	u32 port_urgent_poll;
-	/* pid of process using this port */
-	struct pid *port_pid;
-	struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
-	/* same size as task_struct .comm[] */
-	char port_comm[TASK_COMM_LEN];
-	/* pkeys set by this use of this port */
-	u16 port_pkeys[4];
-	/* so file ops can get at unit */
-	struct ipath_devdata *port_dd;
-	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
-	void *subport_uregbase;
-	/* An array of pages for the eager receive buffers * N */
-	void *subport_rcvegrbuf;
-	/* An array of pages for the eager header queue entries * N */
-	void *subport_rcvhdr_base;
-	/* The version of the library which opened this port */
-	u32 userversion;
-	/* Bitmask of active slaves */
-	u32 active_slaves;
-	/* Type of packets or conditions we want to poll for */
-	u16 poll_type;
-	/* port rcvhdrq head offset */
-	u32 port_head;
-	/* receive packet sequence counter */
-	u32 port_seq_cnt;
-};
-
-struct sk_buff;
-struct ipath_sge_state;
-struct ipath_verbs_txreq;
-
-/*
- * control information for layered drivers
- */
-struct _ipath_layer {
-	void *l_arg;
-};
-
-struct ipath_skbinfo {
-	struct sk_buff *skb;
-	dma_addr_t phys;
-};
-
-struct ipath_sdma_txreq {
-	int                 flags;
-	int                 sg_count;
-	union {
-		struct scatterlist *sg;
-		void *map_addr;
-	};
-	void              (*callback)(void *, int);
-	void               *callback_cookie;
-	int                 callback_status;
-	u16                 start_idx;  /* sdma private */
-	u16                 next_descq_idx;  /* sdma private */
-	struct list_head    list;       /* sdma private */
-};
-
-struct ipath_sdma_desc {
-	__le64 qw[2];
-};
-
-#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
-#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
-#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
-#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
-#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
-#define IPATH_SDMA_TXREQ_F_VL15         0x20
-
-#define IPATH_SDMA_TXREQ_S_OK        0
-#define IPATH_SDMA_TXREQ_S_SENDERROR 1
-#define IPATH_SDMA_TXREQ_S_ABORTED   2
-#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
-
-#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG	(1ull << 63)
-#define IPATH_SDMA_STATUS_ABORT_IN_PROG			(1ull << 62)
-#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE		(1ull << 61)
-#define IPATH_SDMA_STATUS_SCB_EMPTY			(1ull << 30)
-
-/* max dwords in small buffer packet */
-#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
-
-/*
- * Possible IB config parameters for ipath_f_get/set_ib_cfg()
- */
-#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
-#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
-#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
-#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
-#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
-#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
-#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
-#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
-#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
-#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
-#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
-
-
-struct ipath_devdata {
-	struct list_head ipath_list;
-
-	struct ipath_kregs const *ipath_kregs;
-	struct ipath_cregs const *ipath_cregs;
-
-	/* mem-mapped pointer to base of chip regs */
-	u64 __iomem *ipath_kregbase;
-	/* end of mem-mapped chip space; range checking */
-	u64 __iomem *ipath_kregend;
-	/* physical address of chip for io_remap, etc. */
-	unsigned long ipath_physaddr;
-	/* base of memory alloced for ipath_kregbase, for free */
-	u64 *ipath_kregalloc;
-	/* ipath_cfgports pointers */
-	struct ipath_portdata **ipath_pd;
-	/* sk_buffs used by port 0 eager receive queue */
-	struct ipath_skbinfo *ipath_port0_skbinfo;
-	/* kvirt address of 1st 2k pio buffer */
-	void __iomem *ipath_pio2kbase;
-	/* kvirt address of 1st 4k pio buffer */
-	void __iomem *ipath_pio4kbase;
-	/*
-	 * points to area where PIOavail registers will be DMA'ed.
-	 * Has to be on a page of it's own, because the page will be
-	 * mapped into user program space.  This copy is *ONLY* ever
-	 * written by DMA, not by the driver!  Need a copy per device
-	 * when we get to multiple devices
-	 */
-	volatile __le64 *ipath_pioavailregs_dma;
-	/* physical address where updates occur */
-	dma_addr_t ipath_pioavailregs_phys;
-	struct _ipath_layer ipath_layer;
-	/* setup intr */
-	int (*ipath_f_intrsetup)(struct ipath_devdata *);
-	/* fallback to alternate interrupt type if possible */
-	int (*ipath_f_intr_fallback)(struct ipath_devdata *);
-	/* setup on-chip bus config */
-	int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
-	/* hard reset chip */
-	int (*ipath_f_reset)(struct ipath_devdata *);
-	int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
-				     size_t);
-	void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
-	void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
-					size_t);
-	void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
-	int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
-	int (*ipath_f_early_init)(struct ipath_devdata *);
-	void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
-	void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
-				u32, unsigned long);
-	void (*ipath_f_tidtemplate)(struct ipath_devdata *);
-	void (*ipath_f_cleanup)(struct ipath_devdata *);
-	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
-	/* fill out chip-specific fields */
-	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	/* free irq */
-	void (*ipath_f_free_irq)(struct ipath_devdata *);
-	struct ipath_message_header *(*ipath_f_get_msgheader)
-					(struct ipath_devdata *, __le32 *);
-	void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
-	int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
-	int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
-	void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
-	void (*ipath_f_read_counters)(struct ipath_devdata *,
-					struct infinipath_counters *);
-	void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
-	/* per chip actions needed for IB Link up/down changes */
-	int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
-
-	unsigned ipath_lastegr_idx;
-	struct ipath_ibdev *verbs_dev;
-	struct timer_list verbs_timer;
-	/* total dwords sent (summed from counter) */
-	u64 ipath_sword;
-	/* total dwords rcvd (summed from counter) */
-	u64 ipath_rword;
-	/* total packets sent (summed from counter) */
-	u64 ipath_spkts;
-	/* total packets rcvd (summed from counter) */
-	u64 ipath_rpkts;
-	/* ipath_statusp initially points to this. */
-	u64 _ipath_status;
-	/* GUID for this interface, in network order */
-	__be64 ipath_guid;
-	/*
-	 * aggregrate of error bits reported since last cleared, for
-	 * limiting of error reporting
-	 */
-	ipath_err_t ipath_lasterror;
-	/*
-	 * aggregrate of error bits reported since last cleared, for
-	 * limiting of hwerror reporting
-	 */
-	ipath_err_t ipath_lasthwerror;
-	/* errors masked because they occur too fast */
-	ipath_err_t ipath_maskederrs;
-	u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
-	/* these 5 fields are used to establish deltas for IB Symbol
-	 * errors and linkrecovery errors. They can be reported on
-	 * some chips during link negotiation prior to INIT, and with
-	 * DDR when faking DDR negotiations with non-IBTA switches.
-	 * The chip counters are adjusted at driver unload if there is
-	 * a non-zero delta.
-	 */
-	u64 ibdeltainprog;
-	u64 ibsymdelta;
-	u64 ibsymsnap;
-	u64 iblnkerrdelta;
-	u64 iblnkerrsnap;
-
-	/* time in jiffies at which to re-enable maskederrs */
-	unsigned long ipath_unmasktime;
-	/* count of egrfull errors, combined for all ports */
-	u64 ipath_last_tidfull;
-	/* for ipath_qcheck() */
-	u64 ipath_lastport0rcv_cnt;
-	/* template for writing TIDs  */
-	u64 ipath_tidtemplate;
-	/* value to write to free TIDs */
-	u64 ipath_tidinvalid;
-	/* IBA6120 rcv interrupt setup */
-	u64 ipath_rhdrhead_intr_off;
-
-	/* size of memory at ipath_kregbase */
-	u32 ipath_kregsize;
-	/* number of registers used for pioavail */
-	u32 ipath_pioavregs;
-	/* IPATH_POLL, etc. */
-	u32 ipath_flags;
-	/* ipath_flags driver is waiting for */
-	u32 ipath_state_wanted;
-	/* last buffer for user use, first buf for kernel use is this
-	 * index. */
-	u32 ipath_lastport_piobuf;
-	/* is a stats timer active */
-	u32 ipath_stats_timer_active;
-	/* number of interrupts for this device -- saturates... */
-	u32 ipath_int_counter;
-	/* dwords sent read from counter */
-	u32 ipath_lastsword;
-	/* dwords received read from counter */
-	u32 ipath_lastrword;
-	/* sent packets read from counter */
-	u32 ipath_lastspkts;
-	/* received packets read from counter */
-	u32 ipath_lastrpkts;
-	/* pio bufs allocated per port */
-	u32 ipath_pbufsport;
-	/* if remainder on bufs/port, ports < extrabuf get 1 extra */
-	u32 ipath_ports_extrabuf;
-	u32 ipath_pioupd_thresh; /* update threshold, some chips */
-	/*
-	 * number of ports configured as max; zero is set to number chip
-	 * supports, less gives more pio bufs/port, etc.
-	 */
-	u32 ipath_cfgports;
-	/* count of port 0 hdrqfull errors */
-	u32 ipath_p0_hdrqfull;
-	/* port 0 number of receive eager buffers */
-	u32 ipath_p0_rcvegrcnt;
-
-	/*
-	 * index of last piobuffer we used.  Speeds up searching, by
-	 * starting at this point.  Doesn't matter if multiple cpu's use and
-	 * update, last updater is only write that matters.  Whenever it
-	 * wraps, we update shadow copies.  Need a copy per device when we
-	 * get to multiple devices
-	 */
-	u32 ipath_lastpioindex;
-	u32 ipath_lastpioindexl;
-	/* max length of freezemsg */
-	u32 ipath_freezelen;
-	/*
-	 * consecutive times we wanted a PIO buffer but were unable to
-	 * get one
-	 */
-	u32 ipath_consec_nopiobuf;
-	/*
-	 * hint that we should update ipath_pioavailshadow before
-	 * looking for a PIO buffer
-	 */
-	u32 ipath_upd_pio_shadow;
-	/* so we can rewrite it after a chip reset */
-	u32 ipath_pcibar0;
-	/* so we can rewrite it after a chip reset */
-	u32 ipath_pcibar1;
-	u32 ipath_x1_fix_tries;
-	u32 ipath_autoneg_tries;
-	u32 serdes_first_init_done;
-
-	struct ipath_relock {
-		atomic_t ipath_relock_timer_active;
-		struct timer_list ipath_relock_timer;
-		unsigned int ipath_relock_interval; /* in jiffies */
-	} ipath_relock_singleton;
-
-	/* interrupt number */
-	int ipath_irq;
-	/* HT/PCI Vendor ID (here for NodeInfo) */
-	u16 ipath_vendorid;
-	/* HT/PCI Device ID (here for NodeInfo) */
-	u16 ipath_deviceid;
-	/* offset in HT config space of slave/primary interface block */
-	u8 ipath_ht_slave_off;
-	/* for write combining settings */
-	int wc_cookie;
-	/* ref count for each pkey */
-	atomic_t ipath_pkeyrefs[4];
-	/* shadow copy of struct page *'s for exp tid pages */
-	struct page **ipath_pageshadow;
-	/* shadow copy of dma handles for exp tid pages */
-	dma_addr_t *ipath_physshadow;
-	u64 __iomem *ipath_egrtidbase;
-	/* lock to workaround chip bug 9437 and others */
-	spinlock_t ipath_kernel_tid_lock;
-	spinlock_t ipath_user_tid_lock;
-	spinlock_t ipath_sendctrl_lock;
-	/* around ipath_pd and (user ports) port_cnt use (intr vs free) */
-	spinlock_t ipath_uctxt_lock;
-
-	/*
-	 * IPATH_STATUS_*,
-	 * this address is mapped readonly into user processes so they can
-	 * get status cheaply, whenever they want.
-	 */
-	u64 *ipath_statusp;
-	/* freeze msg if hw error put chip in freeze */
-	char *ipath_freezemsg;
-	/* pci access data structure */
-	struct pci_dev *pcidev;
-	struct cdev *user_cdev;
-	struct cdev *diag_cdev;
-	struct device *user_dev;
-	struct device *diag_dev;
-	/* timer used to prevent stats overflow, error throttling, etc. */
-	struct timer_list ipath_stats_timer;
-	/* timer to verify interrupts work, and fallback if possible */
-	struct timer_list ipath_intrchk_timer;
-	void *ipath_dummy_hdrq;	/* used after port close */
-	dma_addr_t ipath_dummy_hdrq_phys;
-
-	/* SendDMA related entries */
-	spinlock_t            ipath_sdma_lock;
-	unsigned long         ipath_sdma_status;
-	unsigned long         ipath_sdma_abort_jiffies;
-	unsigned long         ipath_sdma_abort_intr_timeout;
-	unsigned long         ipath_sdma_buf_jiffies;
-	struct ipath_sdma_desc *ipath_sdma_descq;
-	u64		      ipath_sdma_descq_added;
-	u64		      ipath_sdma_descq_removed;
-	int		      ipath_sdma_desc_nreserved;
-	u16                   ipath_sdma_descq_cnt;
-	u16                   ipath_sdma_descq_tail;
-	u16                   ipath_sdma_descq_head;
-	u16                   ipath_sdma_next_intr;
-	u16                   ipath_sdma_reset_wait;
-	u8                    ipath_sdma_generation;
-	struct tasklet_struct ipath_sdma_abort_task;
-	struct tasklet_struct ipath_sdma_notify_task;
-	struct list_head      ipath_sdma_activelist;
-	struct list_head      ipath_sdma_notifylist;
-	atomic_t              ipath_sdma_vl15_count;
-	struct timer_list     ipath_sdma_vl15_timer;
-
-	dma_addr_t       ipath_sdma_descq_phys;
-	volatile __le64 *ipath_sdma_head_dma;
-	dma_addr_t       ipath_sdma_head_phys;
-
-	unsigned long ipath_ureg_align; /* user register alignment */
-
-	struct delayed_work ipath_autoneg_work;
-	wait_queue_head_t ipath_autoneg_wait;
-
-	/* HoL blocking / user app forward-progress state */
-	unsigned          ipath_hol_state;
-	unsigned          ipath_hol_next;
-	struct timer_list ipath_hol_timer;
-
-	/*
-	 * Shadow copies of registers; size indicates read access size.
-	 * Most of them are readonly, but some are write-only register,
-	 * where we manipulate the bits in the shadow copy, and then write
-	 * the shadow copy to infinipath.
-	 *
-	 * We deliberately make most of these 32 bits, since they have
-	 * restricted range.  For any that we read, we won't to generate 32
-	 * bit accesses, since Opteron will generate 2 separate 32 bit HT
-	 * transactions for a 64 bit read, and we want to avoid unnecessary
-	 * HT transactions.
-	 */
-
-	/* This is the 64 bit group */
-
-	/*
-	 * shadow of pioavail, check to be sure it's large enough at
-	 * init time.
-	 */
-	unsigned long ipath_pioavailshadow[8];
-	/* bitmap of send buffers available for the kernel to use with PIO. */
-	unsigned long ipath_pioavailkernel[8];
-	/* shadow of kr_gpio_out, for rmw ops */
-	u64 ipath_gpio_out;
-	/* shadow the gpio mask register */
-	u64 ipath_gpio_mask;
-	/* shadow the gpio output enable, etc... */
-	u64 ipath_extctrl;
-	/* kr_revision shadow */
-	u64 ipath_revision;
-	/*
-	 * shadow of ibcctrl, for interrupt handling of link changes,
-	 * etc.
-	 */
-	u64 ipath_ibcctrl;
-	/*
-	 * last ibcstatus, to suppress "duplicate" status change messages,
-	 * mostly from 2 to 3
-	 */
-	u64 ipath_lastibcstat;
-	/* hwerrmask shadow */
-	ipath_err_t ipath_hwerrmask;
-	ipath_err_t ipath_errormask; /* errormask shadow */
-	/* interrupt config reg shadow */
-	u64 ipath_intconfig;
-	/* kr_sendpiobufbase value */
-	u64 ipath_piobufbase;
-	/* kr_ibcddrctrl shadow */
-	u64 ipath_ibcddrctrl;
-
-	/* these are the "32 bit" regs */
-
-	/*
-	 * number of GUIDs in the flash for this interface; may need some
-	 * rethinking for setting on other ifaces
-	 */
-	u32 ipath_nguid;
-	/*
-	 * the following two are 32-bit bitmasks, but {test,clear,set}_bit
-	 * all expect bit fields to be "unsigned long"
-	 */
-	/* shadow kr_rcvctrl */
-	unsigned long ipath_rcvctrl;
-	/* shadow kr_sendctrl */
-	unsigned long ipath_sendctrl;
-	/* to not count armlaunch after cancel */
-	unsigned long ipath_lastcancel;
-	/* count cases where special trigger was needed (double write) */
-	unsigned long ipath_spectriggerhit;
-
-	/* value we put in kr_rcvhdrcnt */
-	u32 ipath_rcvhdrcnt;
-	/* value we put in kr_rcvhdrsize */
-	u32 ipath_rcvhdrsize;
-	/* value we put in kr_rcvhdrentsize */
-	u32 ipath_rcvhdrentsize;
-	/* offset of last entry in rcvhdrq */
-	u32 ipath_hdrqlast;
-	/* kr_portcnt value */
-	u32 ipath_portcnt;
-	/* kr_pagealign value */
-	u32 ipath_palign;
-	/* number of "2KB" PIO buffers */
-	u32 ipath_piobcnt2k;
-	/* size in bytes of "2KB" PIO buffers */
-	u32 ipath_piosize2k;
-	/* number of "4KB" PIO buffers */
-	u32 ipath_piobcnt4k;
-	/* size in bytes of "4KB" PIO buffers */
-	u32 ipath_piosize4k;
-	u32 ipath_pioreserved; /* reserved special-inkernel; */
-	/* kr_rcvegrbase value */
-	u32 ipath_rcvegrbase;
-	/* kr_rcvegrcnt value */
-	u32 ipath_rcvegrcnt;
-	/* kr_rcvtidbase value */
-	u32 ipath_rcvtidbase;
-	/* kr_rcvtidcnt value */
-	u32 ipath_rcvtidcnt;
-	/* kr_sendregbase */
-	u32 ipath_sregbase;
-	/* kr_userregbase */
-	u32 ipath_uregbase;
-	/* kr_counterregbase */
-	u32 ipath_cregbase;
-	/* shadow the control register contents */
-	u32 ipath_control;
-	/* PCI revision register (HTC rev on FPGA) */
-	u32 ipath_pcirev;
-
-	/* chip address space used by 4k pio buffers */
-	u32 ipath_4kalign;
-	/* The MTU programmed for this unit */
-	u32 ipath_ibmtu;
-	/*
-	 * The max size IB packet, included IB headers that we can send.
-	 * Starts same as ipath_piosize, but is affected when ibmtu is
-	 * changed, or by size of eager buffers
-	 */
-	u32 ipath_ibmaxlen;
-	/*
-	 * ibmaxlen at init time, limited by chip and by receive buffer
-	 * size.  Not changed after init.
-	 */
-	u32 ipath_init_ibmaxlen;
-	/* size of each rcvegrbuffer */
-	u32 ipath_rcvegrbufsize;
-	/* localbus width (1, 2,4,8,16,32) from config space  */
-	u32 ipath_lbus_width;
-	/* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
-	u32 ipath_lbus_speed;
-	/*
-	 * number of sequential ibcstatus change for polling active/quiet
-	 * (i.e., link not coming up).
-	 */
-	u32 ipath_ibpollcnt;
-	/* low and high portions of MSI capability/vector */
-	u32 ipath_msi_lo;
-	/* saved after PCIe init for restore after reset */
-	u32 ipath_msi_hi;
-	/* MSI data (vector) saved for restore */
-	u16 ipath_msi_data;
-	/* MLID programmed for this instance */
-	u16 ipath_mlid;
-	/* LID programmed for this instance */
-	u16 ipath_lid;
-	/* list of pkeys programmed; 0 if not set */
-	u16 ipath_pkeys[4];
-	/*
-	 * ASCII serial number, from flash, large enough for original
-	 * all digit strings, and longer QLogic serial number format
-	 */
-	u8 ipath_serial[16];
-	/* human readable board version */
-	u8 ipath_boardversion[96];
-	u8 ipath_lbus_info[32]; /* human readable localbus info */
-	/* chip major rev, from ipath_revision */
-	u8 ipath_majrev;
-	/* chip minor rev, from ipath_revision */
-	u8 ipath_minrev;
-	/* board rev, from ipath_revision */
-	u8 ipath_boardrev;
-	/* saved for restore after reset */
-	u8 ipath_pci_cacheline;
-	/* LID mask control */
-	u8 ipath_lmc;
-	/* link width supported */
-	u8 ipath_link_width_supported;
-	/* link speed supported */
-	u8 ipath_link_speed_supported;
-	u8 ipath_link_width_enabled;
-	u8 ipath_link_speed_enabled;
-	u8 ipath_link_width_active;
-	u8 ipath_link_speed_active;
-	/* Rx Polarity inversion (compensate for ~tx on partner) */
-	u8 ipath_rx_pol_inv;
-
-	u8 ipath_r_portenable_shift;
-	u8 ipath_r_intravail_shift;
-	u8 ipath_r_tailupd_shift;
-	u8 ipath_r_portcfg_shift;
-
-	/* unit # of this chip, if present */
-	int ipath_unit;
-
-	/* local link integrity counter */
-	u32 ipath_lli_counter;
-	/* local link integrity errors */
-	u32 ipath_lli_errors;
-	/*
-	 * Above counts only cases where _successive_ LocalLinkIntegrity
-	 * errors were seen in the receive headers of kern-packets.
-	 * Below are the three (monotonically increasing) counters
-	 * maintained via GPIO interrupts on iba6120-rev2.
-	 */
-	u32 ipath_rxfc_unsupvl_errs;
-	u32 ipath_overrun_thresh_errs;
-	u32 ipath_lli_errs;
-
-	/*
-	 * Not all devices managed by a driver instance are the same
-	 * type, so these fields must be per-device.
-	 */
-	u64 ipath_i_bitsextant;
-	ipath_err_t ipath_e_bitsextant;
-	ipath_err_t ipath_hwe_bitsextant;
-
-	/*
-	 * Below should be computable from number of ports,
-	 * since they are never modified.
-	 */
-	u64 ipath_i_rcvavail_mask;
-	u64 ipath_i_rcvurg_mask;
-	u16 ipath_i_rcvurg_shift;
-	u16 ipath_i_rcvavail_shift;
-
-	/*
-	 * Register bits for selecting i2c direction and values, used for
-	 * I2C serial flash.
-	 */
-	u8 ipath_gpio_sda_num;
-	u8 ipath_gpio_scl_num;
-	u8 ipath_i2c_chain_type;
-	u64 ipath_gpio_sda;
-	u64 ipath_gpio_scl;
-
-	/* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
-	spinlock_t ipath_gpio_lock;
-
-	/*
-	 * IB link and linktraining states and masks that vary per chip in
-	 * some way.  Set at init, to avoid each IB status change interrupt
-	 */
-	u8 ibcs_ls_shift;
-	u8 ibcs_lts_mask;
-	u32 ibcs_mask;
-	u32 ib_init;
-	u32 ib_arm;
-	u32 ib_active;
-
-	u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
-
-	/*
-	 * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
-	 * reg. Changes for IBA7220
-	 */
-	u8 ibcc_lic_mask; /* LinkInitCmd */
-	u8 ibcc_lc_shift; /* LinkCmd */
-	u8 ibcc_mpl_shift; /* Maxpktlen */
-
-	u8 delay_mult;
-
-	/* used to override LED behavior */
-	u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
-	u16 ipath_led_override_timeoff; /* delta to next timer event */
-	u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
-	u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
-	atomic_t ipath_led_override_timer_active;
-	/* Used to flash LEDs in override mode */
-	struct timer_list ipath_led_override_timer;
-
-	/* Support (including locks) for EEPROM logging of errors and time */
-	/* control access to actual counters, timer */
-	spinlock_t ipath_eep_st_lock;
-	/* control high-level access to EEPROM */
-	struct mutex ipath_eep_lock;
-	/* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
-	uint64_t ipath_traffic_wds;
-	/* active time is kept in seconds, but logged in hours */
-	atomic_t ipath_active_time;
-	/* Below are nominal shadow of EEPROM, new since last EEPROM update */
-	uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
-	uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
-	uint16_t ipath_eep_hrs;
-	/*
-	 * masks for which bits of errs, hwerrs that cause
-	 * each of the counters to increment.
-	 */
-	struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
-
-	/* interrupt mitigation reload register info */
-	u16 ipath_jint_idle_ticks;	/* idle clock ticks */
-	u16 ipath_jint_max_packets;	/* max packets across all ports */
-
-	/*
-	 * lock for access to SerDes, and flags to sequence preset
-	 * versus steady-state. 7220-only at the moment.
-	 */
-	spinlock_t ipath_sdepb_lock;
-	u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
-};
-
-/* ipath_hol_state values (stopping/starting user proc, send flushing) */
-#define IPATH_HOL_UP       0
-#define IPATH_HOL_DOWN     1
-/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
-#define IPATH_HOL_DOWNSTOP 0
-#define IPATH_HOL_DOWNCONT 1
-
-/* bit positions for sdma_status */
-#define IPATH_SDMA_ABORTING  0
-#define IPATH_SDMA_DISARMED  1
-#define IPATH_SDMA_DISABLED  2
-#define IPATH_SDMA_LAYERBUF  3
-#define IPATH_SDMA_RUNNING  30
-#define IPATH_SDMA_SHUTDOWN 31
-
-/* bit combinations that correspond to abort states */
-#define IPATH_SDMA_ABORT_NONE 0
-#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
-#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED))
-#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
-	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
-
-#define IPATH_SDMA_BUF_NONE 0
-#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
-
-/* Private data for file operations */
-struct ipath_filedata {
-	struct ipath_portdata *pd;
-	unsigned subport;
-	unsigned tidcursor;
-	struct ipath_user_sdma_queue *pq;
-};
-extern struct list_head ipath_dev_list;
-extern spinlock_t ipath_devs_lock;
-extern struct ipath_devdata *ipath_lookup(int unit);
-
-int ipath_init_chip(struct ipath_devdata *, int);
-int ipath_enable_wc(struct ipath_devdata *dd);
-void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
-void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_clear_freeze(struct ipath_devdata *);
-
-struct file_operations;
-int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
-		    struct cdev **cdevp, struct device **devp);
-void ipath_cdev_cleanup(struct cdev **cdevp,
-			struct device **devp);
-
-int ipath_diag_add(struct ipath_devdata *);
-void ipath_diag_remove(struct ipath_devdata *);
-
-extern wait_queue_head_t ipath_state_wait;
-
-int ipath_user_add(struct ipath_devdata *dd);
-void ipath_user_remove(struct ipath_devdata *dd);
-
-struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
-
-extern int ipath_diag_inuse;
-
-irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
-		     ipath_err_t err);
-#if __IPATH_INFO || __IPATH_DBG
-extern const char *ipath_ibcstatus_str[];
-#endif
-
-/* clean up any per-chip chip-specific stuff */
-void ipath_chip_cleanup(struct ipath_devdata *);
-/* clean up any chip type-specific stuff */
-void ipath_chip_done(void);
-
-void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
-			  unsigned cnt);
-void ipath_cancel_sends(struct ipath_devdata *, int);
-
-int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
-void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
-
-int ipath_parse_ushort(const char *str, unsigned short *valp);
-
-void ipath_kreceive(struct ipath_portdata *);
-int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
-int ipath_reset_device(int);
-void ipath_get_faststats(unsigned long);
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-int ipath_set_linkstate(struct ipath_devdata *, u8);
-int ipath_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
-int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
-void ipath_enable_armlaunch(struct ipath_devdata *);
-void ipath_disable_armlaunch(struct ipath_devdata *);
-void ipath_hol_down(struct ipath_devdata *);
-void ipath_hol_up(struct ipath_devdata *);
-void ipath_hol_event(unsigned long);
-void ipath_toggle_rclkrls(struct ipath_devdata *);
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
-void ipath_set_relock_poll(struct ipath_devdata *, int);
-void ipath_shutdown_relock_poll(struct ipath_devdata *);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
-#define subport_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->subport
-#define tidcursor_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->tidcursor
-#define user_sdma_queue_fp(fp) \
-	((struct ipath_filedata *)(fp)->private_data)->pq
-
-/*
- * values for ipath_flags
- */
-		/* chip can report link latency (IB 1.2) */
-#define IPATH_HAS_LINK_LATENCY 0x1
-		/* The chip is up and initted */
-#define IPATH_INITTED       0x2
-		/* set if any user code has set kr_rcvhdrsize */
-#define IPATH_RCVHDRSZ_SET  0x4
-		/* The chip is present and valid for accesses */
-#define IPATH_PRESENT       0x8
-		/* HT link0 is only 8 bits wide, ignore upper byte crc
-		 * errors, etc. */
-#define IPATH_8BIT_IN_HT0   0x10
-		/* HT link1 is only 8 bits wide, ignore upper byte crc
-		 * errors, etc. */
-#define IPATH_8BIT_IN_HT1   0x20
-		/* The link is down */
-#define IPATH_LINKDOWN      0x40
-		/* The link level is up (0x11) */
-#define IPATH_LINKINIT      0x80
-		/* The link is in the armed (0x21) state */
-#define IPATH_LINKARMED     0x100
-		/* The link is in the active (0x31) state */
-#define IPATH_LINKACTIVE    0x200
-		/* link current state is unknown */
-#define IPATH_LINKUNK       0x400
-		/* Write combining flush needed for PIO */
-#define IPATH_PIO_FLUSH_WC  0x1000
-		/* DMA Receive tail pointer */
-#define IPATH_NODMA_RTAIL   0x2000
-		/* no IB cable, or no device on IB cable */
-#define IPATH_NOCABLE       0x4000
-		/* Supports port zero per packet receive interrupts via
-		 * GPIO */
-#define IPATH_GPIO_INTR     0x8000
-		/* uses the coded 4byte TID, not 8 byte */
-#define IPATH_4BYTE_TID     0x10000
-		/* packet/word counters are 32 bit, else those 4 counters
-		 * are 64bit */
-#define IPATH_32BITCOUNTERS 0x20000
-		/* Interrupt register is 64 bits */
-#define IPATH_INTREG_64     0x40000
-		/* can miss port0 rx interrupts */
-#define IPATH_DISABLED      0x80000 /* administratively disabled */
-		/* Use GPIO interrupts for new counters */
-#define IPATH_GPIO_ERRINTRS 0x100000
-#define IPATH_SWAP_PIOBUFS  0x200000
-		/* Supports Send DMA */
-#define IPATH_HAS_SEND_DMA  0x400000
-		/* Supports Send Count (not just word count) in PBC */
-#define IPATH_HAS_PBC_CNT   0x800000
-		/* Suppress heartbeat, even if turning off loopback */
-#define IPATH_NO_HRTBT      0x1000000
-#define IPATH_HAS_THRESH_UPDATE 0x4000000
-#define IPATH_HAS_MULT_IB_SPEED 0x8000000
-#define IPATH_IB_AUTONEG_INPROG 0x10000000
-#define IPATH_IB_AUTONEG_FAILED 0x20000000
-		/* Linkdown-disable intentionally, Do not attempt to bring up */
-#define IPATH_IB_LINK_DISABLED 0x40000000
-#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
-
-/* Bits in GPIO for the added interrupts */
-#define IPATH_GPIO_PORT0_BIT 2
-#define IPATH_GPIO_RXUVL_BIT 3
-#define IPATH_GPIO_OVRUN_BIT 4
-#define IPATH_GPIO_LLI_BIT 5
-#define IPATH_GPIO_ERRINTR_MASK 0x38
-
-/* portdata flag bit offsets */
-		/* waiting for a packet to arrive */
-#define IPATH_PORT_WAITING_RCV   2
-		/* master has not finished initializing */
-#define IPATH_PORT_MASTER_UNINIT 4
-		/* waiting for an urgent packet to arrive */
-#define IPATH_PORT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
-void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
-				unsigned len, int avail);
-void ipath_init_iba6110_funcs(struct ipath_devdata *);
-void ipath_get_eeprom_info(struct ipath_devdata *);
-int ipath_update_eeprom_log(struct ipath_devdata *dd);
-void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
-u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
-void ipath_force_pio_avail_update(struct ipath_devdata *);
-void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
-void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
-
-/* send dma routines */
-int setup_sdma(struct ipath_devdata *);
-void teardown_sdma(struct ipath_devdata *);
-void ipath_restart_sdma(struct ipath_devdata *);
-void ipath_sdma_intr(struct ipath_devdata *);
-int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
-			  u32, struct ipath_verbs_txreq *);
-/* ipath_sdma_lock should be locked before calling this. */
-int ipath_sdma_make_progress(struct ipath_devdata *dd);
-
-/* must be called under ipath_sdma_lock */
-static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
-{
-	return dd->ipath_sdma_descq_cnt -
-		(dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
-		1 - dd->ipath_sdma_desc_nreserved;
-}
-
-static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
-{
-	dd->ipath_sdma_desc_nreserved += cnt;
-}
-
-static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
-{
-	dd->ipath_sdma_desc_nreserved -= cnt;
-}
-
-/*
- * number of words used for protocol header if not set by ipath_userinit();
- */
-#define IPATH_DFLT_RCVHDRSIZE 9
-
-int ipath_get_user_pages(unsigned long, size_t, struct page **);
-void ipath_release_user_pages(struct page **, size_t);
-void ipath_release_user_pages_on_close(struct page **, size_t);
-int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
-int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
-int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
-int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
-
-/* these are used for the registers that vary with port */
-void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
-			   unsigned, u64);
-
-/*
- * We could have a single register get/put routine, that takes a group type,
- * but this is somewhat clearer and cleaner.  It also gives us some error
- * checking.  64 bit register reads should always work, but are inefficient
- * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
- * so we use kreg32 wherever possible.  User register and counter register
- * reads are always 32 bit reads, so only one form of those routines.
- */
-
-/*
- * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg().
- */
-
-/**
- * ipath_read_ureg32 - read 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @port: port number
- *
- * Return the contents of a register that is virtualized to be per port.
- * Returns -1 on errors (not distinguishable from valid contents at
- * runtime; we may add a separate error variable at some point).
- */
-static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
-				    ipath_ureg regno, int port)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-
-	return readl(regno + (u64 __iomem *)
-		     (dd->ipath_uregbase +
-		      (char __iomem *)dd->ipath_kregbase +
-		      dd->ipath_ureg_align * port));
-}
-
-/**
- * ipath_write_ureg - write 32-bit virtualized per-port register
- * @dd: device
- * @regno: register number
- * @value: value
- * @port: port
- *
- * Write the contents of a register that is virtualized to be per port.
- */
-static inline void ipath_write_ureg(const struct ipath_devdata *dd,
-				    ipath_ureg regno, u64 value, int port)
-{
-	u64 __iomem *ubase = (u64 __iomem *)
-		(dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
-		 dd->ipath_ureg_align * port);
-	if (dd->ipath_kregbase)
-		writeq(value, &ubase[regno]);
-}
-
-static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
-				    ipath_kreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return -1;
-	return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
-				    ipath_kreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return -1;
-
-	return readq(&dd->ipath_kregbase[regno]);
-}
-
-static inline void ipath_write_kreg(const struct ipath_devdata *dd,
-				    ipath_kreg regno, u64 value)
-{
-	if (dd->ipath_kregbase)
-		writeq(value, &dd->ipath_kregbase[regno]);
-}
-
-static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
-				  ipath_sreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-
-	return readq(regno + (u64 __iomem *)
-		     (dd->ipath_cregbase +
-		      (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
-					 ipath_sreg regno)
-{
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
-		return 0;
-	return readl(regno + (u64 __iomem *)
-		     (dd->ipath_cregbase +
-		      (char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_write_creg(const struct ipath_devdata *dd,
-				    ipath_creg regno, u64 value)
-{
-	if (dd->ipath_kregbase)
-		writeq(value, regno + (u64 __iomem *)
-		       (dd->ipath_cregbase +
-			(char __iomem *)dd->ipath_kregbase));
-}
-
-static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
-{
-	*((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
-{
-	return (u32) le64_to_cpu(*((volatile __le64 *)
-				pd->port_rcvhdrtail_kvaddr));
-}
-
-static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
-{
-	const struct ipath_devdata *dd = pd->port_dd;
-	u32 hdrqtail;
-
-	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
-		__le32 *rhf_addr;
-		u32 seq;
-
-		rhf_addr = (__le32 *) pd->port_rcvhdrq +
-			pd->port_head + dd->ipath_rhf_offset;
-		seq = ipath_hdrget_seq(rhf_addr);
-		hdrqtail = pd->port_head;
-		if (seq == pd->port_seq_cnt)
-			hdrqtail++;
-	} else
-		hdrqtail = ipath_get_rcvhdrtail(pd);
-
-	return hdrqtail;
-}
-
-static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
-{
-	return (dd->ipath_flags & IPATH_INTREG_64) ?
-		ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return linkstate
- * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
- * everywhere, anyway (and should be, for almost all purposes).
- */
-static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
-		INFINIPATH_IBCS_LINKSTATE_MASK;
-	if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
-		state = INFINIPATH_IBCS_L_STATE_ACTIVE;
-	return state;
-}
-
-/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
-static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
-{
-	return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		dd->ibcs_lts_mask;
-}
-
-/*
- * from contents of IBCStatus (or a saved copy), return logical link state
- * combination of link state and linktraining state (down, active, init,
- * arm, etc.
- */
-static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
-{
-	u32 ibs;
-	ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		dd->ibcs_lts_mask;
-	ibs |= (u32)(ibcs &
-		(INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
-	return ibs;
-}
-
-/*
- * sysfs interface.
- */
-
-struct device_driver;
-
-extern const char ib_ipath_version[];
-
-extern const struct attribute_group *ipath_driver_attr_groups[];
-
-int ipath_device_create_group(struct device *, struct ipath_devdata *);
-void ipath_device_remove_group(struct device *, struct ipath_devdata *);
-int ipath_expose_reset(struct device *);
-
-int ipath_init_ipathfs(void);
-void ipath_exit_ipathfs(void);
-int ipathfs_add_device(struct ipath_devdata *);
-int ipathfs_remove_device(struct ipath_devdata *);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
-			  size_t, int);
-dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
-const char *ipath_get_unit_name(int unit);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-#if defined(CONFIG_X86_64)
-#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
-#else
-#define ipath_flush_wc() wmb()
-#endif
-
-extern unsigned ipath_debug; /* debugging bit mask */
-extern unsigned ipath_linkrecovery;
-extern unsigned ipath_mtu4096;
-extern struct mutex ipath_mutex;
-
-#define IPATH_DRV_NAME		"ib_ipath"
-#define IPATH_MAJOR		233
-#define IPATH_USER_MINOR_BASE	0
-#define IPATH_DIAGPKT_MINOR	127
-#define IPATH_DIAG_MINOR_BASE	129
-#define IPATH_NMINORS		255
-
-#define ipath_dev_err(dd,fmt,...) \
-	do { \
-		const struct ipath_devdata *__dd = (dd); \
-		if (__dd->pcidev) \
-			dev_err(&__dd->pcidev->dev, "%s: " fmt, \
-				ipath_get_unit_name(__dd->ipath_unit), \
-				##__VA_ARGS__); \
-		else \
-			printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
-			       ipath_get_unit_name(__dd->ipath_unit), \
-			       ##__VA_ARGS__); \
-	} while (0)
-
-#if _IPATH_DEBUGGING
-
-# define __IPATH_DBG_WHICH(which,fmt,...) \
-	do { \
-		if (unlikely(ipath_debug & (which))) \
-			printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
-			       __func__,##__VA_ARGS__); \
-	} while(0)
-
-# define ipath_dbg(fmt,...) \
-	__IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
-# define ipath_cdbg(which,fmt,...) \
-	__IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
-
-#else /* ! _IPATH_DEBUGGING */
-
-# define ipath_dbg(fmt,...)
-# define ipath_cdbg(which,fmt,...)
-
-#endif /* _IPATH_DEBUGGING */
-
-/*
- * this is used for formatting hw error messages...
- */
-struct ipath_hwerror_msgs {
-	u64 mask;
-	const char *msg;
-};
-
-#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
-
-/* in ipath_intr.c... */
-void ipath_format_hwerrors(u64 hwerrs,
-			   const struct ipath_hwerror_msgs *hwerrmsgs,
-			   size_t nhwerrmsgs,
-			   char *msg, size_t lmsg);
-
-#endif				/* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644
index c0e933fec218..000000000000
--- a/drivers/staging/rdma/ipath/ipath_keys.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
- * @mr: memory region that this lkey protects
- *
- * Returns 1 if successful, otherwise returns 0.
- */
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
-{
-	unsigned long flags;
-	u32 r;
-	u32 n;
-	int ret;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-
-	/* Find the next available LKEY */
-	r = n = rkt->next;
-	for (;;) {
-		if (rkt->table[r] == NULL)
-			break;
-		r = (r + 1) & (rkt->max - 1);
-		if (r == n) {
-			spin_unlock_irqrestore(&rkt->lock, flags);
-			ipath_dbg("LKEY table full\n");
-			ret = 0;
-			goto bail;
-		}
-	}
-	rkt->next = (r + 1) & (rkt->max - 1);
-	/*
-	 * Make sure lkey is never zero which is reserved to indicate an
-	 * unrestricted LKEY.
-	 */
-	rkt->gen++;
-	mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
-		((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
-		 << 8);
-	if (mr->lkey == 0) {
-		mr->lkey |= 1 << 8;
-		rkt->gen++;
-	}
-	rkt->table[r] = mr;
-	spin_unlock_irqrestore(&rkt->lock, flags);
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
- */
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
-{
-	unsigned long flags;
-	u32 r;
-
-	if (lkey == 0)
-		return;
-	r = lkey >> (32 - ib_ipath_lkey_table_size);
-	spin_lock_irqsave(&rkt->lock, flags);
-	rkt->table[r] = NULL;
-	spin_unlock_irqrestore(&rkt->lock, flags);
-}
-
-/**
- * ipath_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc)
-{
-	struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct ipath_mregion *mr;
-	unsigned n, m;
-	size_t off;
-	int ret;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see ipath_get_dma_mr and ipath_dma.c).
-	 */
-	if (sge->lkey == 0) {
-		/* always a kernel port, no locking needed */
-		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-		if (pd->user) {
-			ret = 0;
-			goto bail;
-		}
-		isge->mr = NULL;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		ret = 1;
-		goto bail;
-	}
-	mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
-		     qp->ibqp.pd != mr->pd)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off += mr->offset;
-	m = 0;
-	n = 0;
-	while (off >= mr->map[m]->segs[n].length) {
-		off -= mr->map[m]->segs[n].length;
-		n++;
-		if (n >= IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-
-	ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_rkey_ok - check the IB virtual address, length, and RKEY
- * @dev: infiniband device
- * @ss: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- */
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_lkey_table *rkt = &dev->lk_table;
-	struct ipath_sge *sge = &ss->sge;
-	struct ipath_mregion *mr;
-	unsigned n, m;
-	size_t off;
-	int ret;
-
-	/*
-	 * We use RKEY == zero for kernel virtual addresses
-	 * (see ipath_get_dma_mr and ipath_dma.c).
-	 */
-	if (rkey == 0) {
-		/* always a kernel port, no locking needed */
-		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
-
-		if (pd->user) {
-			ret = 0;
-			goto bail;
-		}
-		sge->mr = NULL;
-		sge->vaddr = (void *) vaddr;
-		sge->length = len;
-		sge->sge_length = len;
-		ss->sg_list = NULL;
-		ss->num_sge = 1;
-		ret = 1;
-		goto bail;
-	}
-
-	mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != rkey ||
-		     qp->ibqp.pd != mr->pd)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off = vaddr - mr->iova;
-	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-		     (mr->access_flags & acc) == 0)) {
-		ret = 0;
-		goto bail;
-	}
-
-	off += mr->offset;
-	m = 0;
-	n = 0;
-	while (off >= mr->map[m]->segs[n].length) {
-		off -= mr->map[m]->segs[n].length;
-		n++;
-		if (n >= IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	sge->mr = mr;
-	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	sge->length = mr->map[m]->segs[n].length - off;
-	sge->sge_length = len;
-	sge->m = m;
-	sge->n = n;
-	ss->sg_list = NULL;
-	ss->num_sge = 1;
-
-	ret = 1;
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644
index ad3a926ab3c5..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mad.c
+++ /dev/null
@@ -1,1521 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_pma.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define IB_SMP_UNSUP_VERSION	cpu_to_be16(0x0004)
-#define IB_SMP_UNSUP_METHOD	cpu_to_be16(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR	cpu_to_be16(0x000C)
-#define IB_SMP_INVALID_FIELD	cpu_to_be16(0x001C)
-
-static int reply(struct ib_smp *smp)
-{
-	/*
-	 * The verbs framework will handle the directed/LID route
-	 * packet changes.
-	 */
-	smp->method = IB_MGMT_METHOD_GET_RESP;
-	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-		smp->status |= IB_SMP_DIRECTION;
-	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
-static int recv_subn_get_nodedescription(struct ib_smp *smp,
-					 struct ib_device *ibdev)
-{
-	if (smp->attr_mod)
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
-
-	return reply(smp);
-}
-
-struct nodeinfo {
-	u8 base_version;
-	u8 class_version;
-	u8 node_type;
-	u8 num_ports;
-	__be64 sys_guid;
-	__be64 node_guid;
-	__be64 port_guid;
-	__be16 partition_cap;
-	__be16 device_id;
-	__be32 revision;
-	u8 local_port_num;
-	u8 vendor_id[3];
-} __attribute__ ((packed));
-
-static int recv_subn_get_nodeinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
-	struct ipath_devdata *dd = to_idev(ibdev)->dd;
-	u32 vendor, majrev, minrev;
-
-	/* GUID 0 is illegal */
-	if (smp->attr_mod || (dd->ipath_guid == 0))
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	nip->base_version = 1;
-	nip->class_version = 1;
-	nip->node_type = 1;	/* channel adapter */
-	/*
-	 * XXX The num_ports value will need a layer function to get
-	 * the value if we ever have more than one IB port on a chip.
-	 * We will also need to get the GUID for the port.
-	 */
-	nip->num_ports = ibdev->phys_port_cnt;
-	/* This is already in network order */
-	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = dd->ipath_guid;
-	nip->port_guid = dd->ipath_guid;
-	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
-	majrev = dd->ipath_majrev;
-	minrev = dd->ipath_minrev;
-	nip->revision = cpu_to_be32((majrev << 16) | minrev);
-	nip->local_port_num = port;
-	vendor = dd->ipath_vendorid;
-	nip->vendor_id[0] = IPATH_SRC_OUI_1;
-	nip->vendor_id[1] = IPATH_SRC_OUI_2;
-	nip->vendor_id[2] = IPATH_SRC_OUI_3;
-
-	return reply(smp);
-}
-
-static int recv_subn_get_guidinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev)
-{
-	u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
-	__be64 *p = (__be64 *) smp->data;
-
-	/* 32 blocks of 8 64-bit GUIDs per block */
-
-	memset(smp->data, 0, sizeof(smp->data));
-
-	/*
-	 * We only support one GUID for now.  If this changes, the
-	 * portinfo.guid_cap field needs to be updated too.
-	 */
-	if (startgx == 0) {
-		__be64 g = to_idev(ibdev)->dd->ipath_guid;
-		if (g == 0)
-			/* GUID 0 is illegal */
-			smp->status |= IB_SMP_INVALID_FIELD;
-		else
-			/* The first is a copy of the read-only HW GUID. */
-			*p = g;
-	} else
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return reply(smp);
-}
-
-static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
-{
-	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
-}
-
-static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
-{
-	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
-}
-
-static int get_overrunthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-/**
- * set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-static int get_phyerrthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-/**
- * set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-/**
- * get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-static int get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-static int recv_subn_get_portinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct ipath_ibdev *dev;
-	struct ipath_devdata *dd;
-	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-	u16 lid;
-	u8 ibcstat;
-	u8 mtu;
-	int ret;
-
-	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
-		smp->status |= IB_SMP_INVALID_FIELD;
-		ret = reply(smp);
-		goto bail;
-	}
-
-	dev = to_idev(ibdev);
-	dd = dev->dd;
-
-	/* Clear all fields.  Only set the non-zero fields. */
-	memset(smp->data, 0, sizeof(smp->data));
-
-	/* Only return the mkey if the protection field allows it. */
-	if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-	    dev->mkeyprot == 0)
-		pip->mkey = dev->mkey;
-	pip->gid_prefix = dev->gid_prefix;
-	lid = dd->ipath_lid;
-	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
-	pip->sm_lid = cpu_to_be16(dev->sm_lid);
-	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
-	/* pip->diag_code; */
-	pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
-	pip->local_port_num = port;
-	pip->link_width_enabled = dd->ipath_link_width_enabled;
-	pip->link_width_supported = dd->ipath_link_width_supported;
-	pip->link_width_active = dd->ipath_link_width_active;
-	pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
-	ibcstat = dd->ipath_lastibcstat;
-	/* map LinkState to IB portinfo values.  */
-	pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
-
-	pip->portphysstate_linkdown =
-		(ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
-		(get_linkdowndefaultstate(dd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
-	pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
-		dd->ipath_link_speed_enabled;
-	switch (dd->ipath_ibmtu) {
-	case 4096:
-		mtu = IB_MTU_4096;
-		break;
-	case 2048:
-		mtu = IB_MTU_2048;
-		break;
-	case 1024:
-		mtu = IB_MTU_1024;
-		break;
-	case 512:
-		mtu = IB_MTU_512;
-		break;
-	case 256:
-		mtu = IB_MTU_256;
-		break;
-	default:		/* oops, something is wrong */
-		mtu = IB_MTU_2048;
-		break;
-	}
-	pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
-	pip->vlcap_inittype = 0x10;	/* VLCap = VL0, InitType = 0 */
-	pip->vl_high_limit = dev->vl_high_limit;
-	/* pip->vl_arb_high_cap; // only one VL */
-	/* pip->vl_arb_low_cap; // only one VL */
-	/* InitTypeReply = 0 */
-	/* our mtu cap depends on whether 4K MTU enabled or not */
-	pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-	/* HCAs ignore VLStallCount and HOQLife */
-	/* pip->vlstallcnt_hoqlife; */
-	pip->operationalvl_pei_peo_fpi_fpo = 0x10;	/* OVLs = 1 */
-	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
-	/* P_KeyViolations are counted by hardware. */
-	pip->pkey_violations =
-		cpu_to_be16((ipath_get_cr_errpkey(dd) -
-			     dev->z_pkey_violations) & 0xFFFF);
-	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
-	/* Only the hardware GUID is supported for now */
-	pip->guid_cap = 1;
-	pip->clientrereg_resv_subnetto = dev->subnet_timeout;
-	/* 32.768 usec. response time (guessing) */
-	pip->resv_resptimevalue = 3;
-	pip->localphyerrors_overrunerrors =
-		(get_phyerrthreshold(dd) << 4) |
-		get_overrunthreshold(dd);
-	/* pip->max_credit_hint; */
-	if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
-		u32 v;
-
-		v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
-		pip->link_roundtrip_latency[0] = v >> 16;
-		pip->link_roundtrip_latency[1] = v >> 8;
-		pip->link_roundtrip_latency[2] = v;
-	}
-
-	ret = reply(smp);
-
-bail:
-	return ret;
-}
-
-/**
- * get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	/* always a kernel port, no locking needed */
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-
-	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-	return 0;
-}
-
-static int recv_subn_get_pkeytable(struct ib_smp *smp,
-				   struct ib_device *ibdev)
-{
-	u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-	u16 *p = (u16 *) smp->data;
-	__be16 *q = (__be16 *) smp->data;
-
-	/* 64 blocks of 32 16-bit P_Key entries */
-
-	memset(smp->data, 0, sizeof(smp->data));
-	if (startpx == 0) {
-		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_get_npkeys(dev->dd);
-
-		get_pkeys(dev->dd, p);
-
-		for (i = 0; i < n; i++)
-			q[i] = cpu_to_be16(p[i]);
-	} else
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return reply(smp);
-}
-
-static int recv_subn_set_guidinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev)
-{
-	/* The only GUID we support is the first read-only entry. */
-	return recv_subn_get_guidinfo(smp, ibdev);
-}
-
-/**
- * set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
-{
-	if (sleep)
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	else
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl);
-	return 0;
-}
-
-/**
- * recv_subn_set_portinfo - set port information
- * @smp: the incoming SM packet
- * @ibdev: the infiniband device
- * @port: the port on the device
- *
- * Set Portinfo (see ch. 14.2.5.6).
- */
-static int recv_subn_set_portinfo(struct ib_smp *smp,
-				  struct ib_device *ibdev, u8 port)
-{
-	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
-	struct ib_event event;
-	struct ipath_ibdev *dev;
-	struct ipath_devdata *dd;
-	char clientrereg = 0;
-	u16 lid, smlid;
-	u8 lwe;
-	u8 lse;
-	u8 state;
-	u16 lstate;
-	u32 mtu;
-	int ret, ore;
-
-	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
-		goto err;
-
-	dev = to_idev(ibdev);
-	dd = dev->dd;
-	event.device = ibdev;
-	event.element.port_num = port;
-
-	dev->mkey = pip->mkey;
-	dev->gid_prefix = pip->gid_prefix;
-	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
-
-	lid = be16_to_cpu(pip->lid);
-	if (dd->ipath_lid != lid ||
-	    dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
-		/* Must be a valid unicast LID address. */
-		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
-			goto err;
-		ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
-		event.event = IB_EVENT_LID_CHANGE;
-		ib_dispatch_event(&event);
-	}
-
-	smlid = be16_to_cpu(pip->sm_lid);
-	if (smlid != dev->sm_lid) {
-		/* Must be a valid unicast LID address. */
-		if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
-			goto err;
-		dev->sm_lid = smlid;
-		event.event = IB_EVENT_SM_CHANGE;
-		ib_dispatch_event(&event);
-	}
-
-	/* Allow 1x or 4x to be set (see 14.2.6.6). */
-	lwe = pip->link_width_enabled;
-	if (lwe) {
-		if (lwe == 0xFF)
-			lwe = dd->ipath_link_width_supported;
-		else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
-			goto err;
-		set_link_width_enabled(dd, lwe);
-	}
-
-	/* Allow 2.5 or 5.0 Gbs. */
-	lse = pip->linkspeedactive_enabled & 0xF;
-	if (lse) {
-		if (lse == 15)
-			lse = dd->ipath_link_speed_supported;
-		else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
-			goto err;
-		set_link_speed_enabled(dd, lse);
-	}
-
-	/* Set link down default state. */
-	switch (pip->portphysstate_linkdown & 0xF) {
-	case 0: /* NOP */
-		break;
-	case 1: /* SLEEP */
-		if (set_linkdowndefaultstate(dd, 1))
-			goto err;
-		break;
-	case 2: /* POLL */
-		if (set_linkdowndefaultstate(dd, 0))
-			goto err;
-		break;
-	default:
-		goto err;
-	}
-
-	dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-	dev->vl_high_limit = pip->vl_high_limit;
-
-	switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
-	case IB_MTU_256:
-		mtu = 256;
-		break;
-	case IB_MTU_512:
-		mtu = 512;
-		break;
-	case IB_MTU_1024:
-		mtu = 1024;
-		break;
-	case IB_MTU_2048:
-		mtu = 2048;
-		break;
-	case IB_MTU_4096:
-		if (!ipath_mtu4096)
-			goto err;
-		mtu = 4096;
-		break;
-	default:
-		/* XXX We have already partially updated our state! */
-		goto err;
-	}
-	ipath_set_mtu(dd, mtu);
-
-	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
-
-	/* We only support VL0 */
-	if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
-		goto err;
-
-	if (pip->mkey_violations == 0)
-		dev->mkey_violations = 0;
-
-	/*
-	 * Hardware counter can't be reset so snapshot and subtract
-	 * later.
-	 */
-	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
-
-	if (pip->qkey_violations == 0)
-		dev->qkey_violations = 0;
-
-	ore = pip->localphyerrors_overrunerrors;
-	if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
-		goto err;
-
-	if (set_overrunthreshold(dd, (ore & 0xF)))
-		goto err;
-
-	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
-
-	if (pip->clientrereg_resv_subnetto & 0x80) {
-		clientrereg = 1;
-		event.event = IB_EVENT_CLIENT_REREGISTER;
-		ib_dispatch_event(&event);
-	}
-
-	/*
-	 * Do the port state change now that the other link parameters
-	 * have been set.
-	 * Changing the port physical state only makes sense if the link
-	 * is down or is being set to down.
-	 */
-	state = pip->linkspeed_portstate & 0xF;
-	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
-	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
-		goto err;
-
-	/*
-	 * Only state changes of DOWN, ARM, and ACTIVE are valid
-	 * and must be in the correct state to take effect (see 7.2.6).
-	 */
-	switch (state) {
-	case IB_PORT_NOP:
-		if (lstate == 0)
-			break;
-		/* FALLTHROUGH */
-	case IB_PORT_DOWN:
-		if (lstate == 0)
-			lstate = IPATH_IB_LINKDOWN_ONLY;
-		else if (lstate == 1)
-			lstate = IPATH_IB_LINKDOWN_SLEEP;
-		else if (lstate == 2)
-			lstate = IPATH_IB_LINKDOWN;
-		else if (lstate == 3)
-			lstate = IPATH_IB_LINKDOWN_DISABLE;
-		else
-			goto err;
-		ipath_set_linkstate(dd, lstate);
-		if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
-			ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-			goto done;
-		}
-		ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
-				IPATH_LINKACTIVE, 1000);
-		break;
-	case IB_PORT_ARMED:
-		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
-		break;
-	case IB_PORT_ACTIVE:
-		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
-		break;
-	default:
-		/* XXX We have already partially updated our state! */
-		goto err;
-	}
-
-	ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-	if (clientrereg)
-		pip->clientrereg_resv_subnetto |= 0x80;
-
-	goto done;
-
-err:
-	smp->status |= IB_SMP_INVALID_FIELD;
-	ret = recv_subn_get_portinfo(smp, ibdev, port);
-
-done:
-	return ret;
-}
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (dd->ipath_pkeys[i] != key)
-			continue;
-		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-			dd->ipath_pkeys[i] = 0;
-			ret = 1;
-			goto bail;
-		}
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	u16 lkey = key & 0x7FFF;
-	int any = 0;
-	int ret;
-
-	if (lkey == 0x7FFF) {
-		ret = 0;
-		goto bail;
-	}
-
-	/* Look for an empty slot or a matching PKEY. */
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		/* If it matches exactly, try to increment the ref count */
-		if (dd->ipath_pkeys[i] == key) {
-			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-				ret = 0;
-				goto bail;
-			}
-			/* Lost the race. Look for an empty slot below. */
-			atomic_dec(&dd->ipath_pkeyrefs[i]);
-			any++;
-		}
-		/*
-		 * It makes no sense to have both the limited and unlimited
-		 * PKEY set at the same time since the unlimited one will
-		 * disable the limited one.
-		 */
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			dd->ipath_pkeys[i] = key;
-			ret = 1;
-			goto bail;
-		}
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
-{
-	struct ipath_portdata *pd;
-	int i;
-	int changed = 0;
-
-	/* always a kernel port, no locking needed */
-	pd = dd->ipath_pd[0];
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		u16 key = pkeys[i];
-		u16 okey = pd->port_pkeys[i];
-
-		if (key == okey)
-			continue;
-		/*
-		 * The value of this PKEY table entry is changing.
-		 * Remove the old entry in the hardware's array of PKEYs.
-		 */
-		if (okey & 0x7FFF)
-			changed |= rm_pkey(dd, okey);
-		if (key & 0x7FFF) {
-			int ret = add_pkey(dd, key);
-
-			if (ret < 0)
-				key = 0;
-			else
-				changed |= ret;
-		}
-		pd->port_pkeys[i] = key;
-	}
-	if (changed) {
-		u64 pkey;
-		struct ib_event event;
-
-		pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev->ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
-	}
-	return 0;
-}
-
-static int recv_subn_set_pkeytable(struct ib_smp *smp,
-				   struct ib_device *ibdev, u8 port)
-{
-	u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
-	__be16 *p = (__be16 *) smp->data;
-	u16 *q = (u16 *) smp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_get_npkeys(dev->dd);
-
-	for (i = 0; i < n; i++)
-		q[i] = be16_to_cpu(p[i]);
-
-	if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
-		smp->status |= IB_SMP_INVALID_FIELD;
-
-	return recv_subn_get_pkeytable(smp, ibdev);
-}
-
-static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
-{
-	struct ib_class_port_info *p =
-		(struct ib_class_port_info *)pmp->data;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	if (pmp->mad_hdr.attr_mod != 0)
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	/* Indicate AllPortSelect is valid (only one port anyway) */
-	p->capability_mask = cpu_to_be16(1 << 8);
-	p->base_version = 1;
-	p->class_version = 1;
-	/*
-	 * Expected response time is 4.096 usec. * 2^18 == 1.073741824
-	 * sec.
-	 */
-	p->resp_time_value = 18;
-
-	return reply((struct ib_smp *) pmp);
-}
-
-/*
- * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
- * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
- * We support 5 counters which only count the mandatory quantities.
- */
-#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
-				    COUNTER_MASK(1, 1) | \
-				    COUNTER_MASK(1, 2) | \
-				    COUNTER_MASK(1, 3) | \
-				    COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
-					   struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portsamplescontrol *p =
-		(struct ib_pma_portsamplescontrol *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	unsigned long flags;
-	u8 port_select = p->port_select;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-	/*
-	 * Ticks are 10x the link transfer period which for 2.5Gbs is 4
-	 * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
-	 * intervals are counted in ticks.  Since we use Linux timers, that
-	 * count in jiffies, we can't sample for less than 1000 ticks if HZ
-	 * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
-	 * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
-	 * have hardware support for delaying packets.
-	 */
-	if (crp->cr_psstat)
-		p->tick = dev->dd->ipath_link_speed_active - 1;
-	else
-		p->tick = 250;		/* 1 usec. */
-	p->counter_width = 4;	/* 32 bit counters */
-	p->counter_mask0_9 = COUNTER_MASK0_9;
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (crp->cr_psstat)
-		p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		p->sample_status = dev->pma_sample_status;
-	p->sample_start = cpu_to_be32(dev->pma_sample_start);
-	p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
-	p->tag = cpu_to_be16(dev->pma_tag);
-	p->counter_select[0] = dev->pma_counter_select[0];
-	p->counter_select[1] = dev->pma_counter_select[1];
-	p->counter_select[2] = dev->pma_counter_select[2];
-	p->counter_select[3] = dev->pma_counter_select[3];
-	p->counter_select[4] = dev->pma_counter_select[4];
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
-					   struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portsamplescontrol *p =
-		(struct ib_pma_portsamplescontrol *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	unsigned long flags;
-	u8 status;
-	int ret;
-
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (p->port_select != port && p->port_select != 0xFF)) {
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-		ret = reply((struct ib_smp *) pmp);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	if (status == IB_PMA_SAMPLE_STATUS_DONE) {
-		dev->pma_sample_start = be32_to_cpu(p->sample_start);
-		dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
-		dev->pma_tag = be16_to_cpu(p->tag);
-		dev->pma_counter_select[0] = p->counter_select[0];
-		dev->pma_counter_select[1] = p->counter_select[1];
-		dev->pma_counter_select[2] = p->counter_select[2];
-		dev->pma_counter_select[3] = p->counter_select[3];
-		dev->pma_counter_select[4] = p->counter_select[4];
-		if (crp->cr_psstat) {
-			ipath_write_creg(dev->dd, crp->cr_psinterval,
-					 dev->pma_sample_interval);
-			ipath_write_creg(dev->dd, crp->cr_psstart,
-					 dev->pma_sample_start);
-		} else
-			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
-
-bail:
-	return ret;
-}
-
-static u64 get_counter(struct ipath_ibdev *dev,
-		       struct ipath_cregs const *crp,
-		       __be16 sel)
-{
-	u64 ret;
-
-	switch (sel) {
-	case IB_PMA_PORT_XMIT_DATA:
-		ret = (crp->cr_psxmitdatacount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
-			dev->ipath_sword;
-		break;
-	case IB_PMA_PORT_RCV_DATA:
-		ret = (crp->cr_psrcvdatacount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
-			dev->ipath_rword;
-		break;
-	case IB_PMA_PORT_XMIT_PKTS:
-		ret = (crp->cr_psxmitpktscount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
-			dev->ipath_spkts;
-		break;
-	case IB_PMA_PORT_RCV_PKTS:
-		ret = (crp->cr_psrcvpktscount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
-			dev->ipath_rpkts;
-		break;
-	case IB_PMA_PORT_XMIT_WAIT:
-		ret = (crp->cr_psxmitwaitcount) ?
-			ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
-			dev->ipath_xmit_wait;
-		break;
-	default:
-		ret = 0;
-	}
-
-	return ret;
-}
-
-static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
-					  struct ib_device *ibdev)
-{
-	struct ib_pma_portsamplesresult *p =
-		(struct ib_pma_portsamplesresult *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	u8 status;
-	int i;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-	p->tag = cpu_to_be16(dev->pma_tag);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	p->sample_status = cpu_to_be16(status);
-	for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-		p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-		    cpu_to_be32(
-			get_counter(dev, crp, dev->pma_counter_select[i]));
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
-					      struct ib_device *ibdev)
-{
-	struct ib_pma_portsamplesresult_ext *p =
-		(struct ib_pma_portsamplesresult_ext *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_cregs const *crp = dev->dd->ipath_cregs;
-	u8 status;
-	int i;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-	p->tag = cpu_to_be16(dev->pma_tag);
-	if (crp->cr_psstat)
-		status = ipath_read_creg32(dev->dd, crp->cr_psstat);
-	else
-		status = dev->pma_sample_status;
-	p->sample_status = cpu_to_be16(status);
-	/* 64 bits */
-	p->extended_width = cpu_to_be32(0x80000000);
-	for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
-		p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
-		    cpu_to_be64(
-			get_counter(dev, crp, dev->pma_counter_select[i]));
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
-				     struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_verbs_counters cntrs;
-	u8 port_select = p->port_select;
-
-	ipath_get_counters(dev->dd, &cntrs);
-
-	/* Adjust counters for any resets done. */
-	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
-	cntrs.link_error_recovery_counter -=
-		dev->z_link_error_recovery_counter;
-	cntrs.link_downed_counter -= dev->z_link_downed_counter;
-	cntrs.port_rcv_errors += dev->rcv_errors;
-	cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
-	cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
-	cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
-	cntrs.port_xmit_data -= dev->z_port_xmit_data;
-	cntrs.port_rcv_data -= dev->z_port_rcv_data;
-	cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
-	cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
-	cntrs.local_link_integrity_errors -=
-		dev->z_local_link_integrity_errors;
-	cntrs.excessive_buffer_overrun_errors -=
-		dev->z_excessive_buffer_overrun_errors;
-	cntrs.vl15_dropped -= dev->z_vl15_dropped;
-	cntrs.vl15_dropped += dev->n_vl15_dropped;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	if (cntrs.symbol_error_counter > 0xFFFFUL)
-		p->symbol_error_counter = cpu_to_be16(0xFFFF);
-	else
-		p->symbol_error_counter =
-			cpu_to_be16((u16)cntrs.symbol_error_counter);
-	if (cntrs.link_error_recovery_counter > 0xFFUL)
-		p->link_error_recovery_counter = 0xFF;
-	else
-		p->link_error_recovery_counter =
-			(u8)cntrs.link_error_recovery_counter;
-	if (cntrs.link_downed_counter > 0xFFUL)
-		p->link_downed_counter = 0xFF;
-	else
-		p->link_downed_counter = (u8)cntrs.link_downed_counter;
-	if (cntrs.port_rcv_errors > 0xFFFFUL)
-		p->port_rcv_errors = cpu_to_be16(0xFFFF);
-	else
-		p->port_rcv_errors =
-			cpu_to_be16((u16) cntrs.port_rcv_errors);
-	if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
-		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
-	else
-		p->port_rcv_remphys_errors =
-			cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
-	if (cntrs.port_xmit_discards > 0xFFFFUL)
-		p->port_xmit_discards = cpu_to_be16(0xFFFF);
-	else
-		p->port_xmit_discards =
-			cpu_to_be16((u16)cntrs.port_xmit_discards);
-	if (cntrs.local_link_integrity_errors > 0xFUL)
-		cntrs.local_link_integrity_errors = 0xFUL;
-	if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
-		cntrs.excessive_buffer_overrun_errors = 0xFUL;
-	p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
-		cntrs.excessive_buffer_overrun_errors;
-	if (cntrs.vl15_dropped > 0xFFFFUL)
-		p->vl15_dropped = cpu_to_be16(0xFFFF);
-	else
-		p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
-	if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
-		p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
-	if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
-		p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
-	if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
-		p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_xmit_packets =
-			cpu_to_be32((u32)cntrs.port_xmit_packets);
-	if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
-		p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
-	else
-		p->port_rcv_packets =
-			cpu_to_be32((u32) cntrs.port_rcv_packets);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
-					 struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters_ext *p =
-		(struct ib_pma_portcounters_ext *)pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	u64 swords, rwords, spkts, rpkts, xwait;
-	u8 port_select = p->port_select;
-
-	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				&rpkts, &xwait);
-
-	/* Adjust counters for any resets done. */
-	swords -= dev->z_port_xmit_data;
-	rwords -= dev->z_port_rcv_data;
-	spkts -= dev->z_port_xmit_packets;
-	rpkts -= dev->z_port_rcv_packets;
-
-	memset(pmp->data, 0, sizeof(pmp->data));
-
-	p->port_select = port_select;
-	if (pmp->mad_hdr.attr_mod != 0 ||
-	    (port_select != port && port_select != 0xFF))
-		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
-
-	p->port_xmit_data = cpu_to_be64(swords);
-	p->port_rcv_data = cpu_to_be64(rwords);
-	p->port_xmit_packets = cpu_to_be64(spkts);
-	p->port_rcv_packets = cpu_to_be64(rpkts);
-	p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
-	p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
-	p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
-	p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
-
-	return reply((struct ib_smp *) pmp);
-}
-
-static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
-				     struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_verbs_counters cntrs;
-
-	/*
-	 * Since the HW doesn't support clearing counters, we save the
-	 * current count and subtract it from future responses.
-	 */
-	ipath_get_counters(dev->dd, &cntrs);
-
-	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
-		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
-
-	if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
-		dev->z_link_error_recovery_counter =
-			cntrs.link_error_recovery_counter;
-
-	if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
-		dev->z_link_downed_counter = cntrs.link_downed_counter;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
-		dev->z_port_rcv_errors =
-			cntrs.port_rcv_errors + dev->rcv_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
-		dev->z_port_rcv_remphys_errors =
-			cntrs.port_rcv_remphys_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
-		dev->z_port_xmit_discards = cntrs.port_xmit_discards;
-
-	if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
-		dev->z_local_link_integrity_errors =
-			cntrs.local_link_integrity_errors;
-
-	if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
-		dev->z_excessive_buffer_overrun_errors =
-			cntrs.excessive_buffer_overrun_errors;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-		dev->n_vl15_dropped = 0;
-		dev->z_vl15_dropped = cntrs.vl15_dropped;
-	}
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
-		dev->z_port_xmit_data = cntrs.port_xmit_data;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
-		dev->z_port_rcv_data = cntrs.port_rcv_data;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
-		dev->z_port_xmit_packets = cntrs.port_xmit_packets;
-
-	if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
-		dev->z_port_rcv_packets = cntrs.port_rcv_packets;
-
-	return recv_pma_get_portcounters(pmp, ibdev, port);
-}
-
-static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
-					 struct ib_device *ibdev, u8 port)
-{
-	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
-		pmp->data;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	u64 swords, rwords, spkts, rpkts, xwait;
-
-	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				&rpkts, &xwait);
-
-	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
-		dev->z_port_xmit_data = swords;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
-		dev->z_port_rcv_data = rwords;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
-		dev->z_port_xmit_packets = spkts;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
-		dev->z_port_rcv_packets = rpkts;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
-		dev->n_unicast_xmit = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
-		dev->n_unicast_rcv = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
-		dev->n_multicast_xmit = 0;
-
-	if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
-		dev->n_multicast_rcv = 0;
-
-	return recv_pma_get_portcounters_ext(pmp, ibdev, port);
-}
-
-static int process_subn(struct ib_device *ibdev, int mad_flags,
-			u8 port_num, const struct ib_mad *in_mad,
-			struct ib_mad *out_mad)
-{
-	struct ib_smp *smp = (struct ib_smp *)out_mad;
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	*out_mad = *in_mad;
-	if (smp->class_version != 1) {
-		smp->status |= IB_SMP_UNSUP_VERSION;
-		ret = reply(smp);
-		goto bail;
-	}
-
-	/* Is the mkey in the process of expiring? */
-	if (dev->mkey_lease_timeout &&
-	    time_after_eq(jiffies, dev->mkey_lease_timeout)) {
-		/* Clear timeout and mkey protection field. */
-		dev->mkey_lease_timeout = 0;
-		dev->mkeyprot = 0;
-	}
-
-	/*
-	 * M_Key checking depends on
-	 * Portinfo:M_Key_protect_bits
-	 */
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
-	    dev->mkey != smp->mkey &&
-	    (smp->method == IB_MGMT_METHOD_SET ||
-	     (smp->method == IB_MGMT_METHOD_GET &&
-	      dev->mkeyprot >= 2))) {
-		if (dev->mkey_violations != 0xFFFF)
-			++dev->mkey_violations;
-		if (dev->mkey_lease_timeout ||
-		    dev->mkey_lease_period == 0) {
-			ret = IB_MAD_RESULT_SUCCESS |
-				IB_MAD_RESULT_CONSUMED;
-			goto bail;
-		}
-		dev->mkey_lease_timeout = jiffies +
-			dev->mkey_lease_period * HZ;
-		/* Future: Generate a trap notice. */
-		ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		goto bail;
-	} else if (dev->mkey_lease_timeout)
-		dev->mkey_lease_timeout = 0;
-
-	switch (smp->method) {
-	case IB_MGMT_METHOD_GET:
-		switch (smp->attr_id) {
-		case IB_SMP_ATTR_NODE_DESC:
-			ret = recv_subn_get_nodedescription(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_NODE_INFO:
-			ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_GUID_INFO:
-			ret = recv_subn_get_guidinfo(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_PORT_INFO:
-			ret = recv_subn_get_portinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_PKEY_TABLE:
-			ret = recv_subn_get_pkeytable(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_SM_INFO:
-			if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-				ret = IB_MAD_RESULT_SUCCESS |
-					IB_MAD_RESULT_CONSUMED;
-				goto bail;
-			}
-			if (dev->port_cap_flags & IB_PORT_SM) {
-				ret = IB_MAD_RESULT_SUCCESS;
-				goto bail;
-			}
-			/* FALLTHROUGH */
-		default:
-			smp->status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply(smp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_SET:
-		switch (smp->attr_id) {
-		case IB_SMP_ATTR_GUID_INFO:
-			ret = recv_subn_set_guidinfo(smp, ibdev);
-			goto bail;
-		case IB_SMP_ATTR_PORT_INFO:
-			ret = recv_subn_set_portinfo(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_PKEY_TABLE:
-			ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
-			goto bail;
-		case IB_SMP_ATTR_SM_INFO:
-			if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
-				ret = IB_MAD_RESULT_SUCCESS |
-					IB_MAD_RESULT_CONSUMED;
-				goto bail;
-			}
-			if (dev->port_cap_flags & IB_PORT_SM) {
-				ret = IB_MAD_RESULT_SUCCESS;
-				goto bail;
-			}
-			/* FALLTHROUGH */
-		default:
-			smp->status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply(smp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_TRAP:
-	case IB_MGMT_METHOD_REPORT:
-	case IB_MGMT_METHOD_REPORT_RESP:
-	case IB_MGMT_METHOD_TRAP_REPRESS:
-	case IB_MGMT_METHOD_GET_RESP:
-		/*
-		 * The ib_mad module will call us to process responses
-		 * before checking for other consumers.
-		 * Just tell the caller to process it normally.
-		 */
-		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
-	default:
-		smp->status |= IB_SMP_UNSUP_METHOD;
-		ret = reply(smp);
-	}
-
-bail:
-	return ret;
-}
-
-static int process_perf(struct ib_device *ibdev, u8 port_num,
-			const struct ib_mad *in_mad,
-			struct ib_mad *out_mad)
-{
-	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
-	int ret;
-
-	*out_mad = *in_mad;
-	if (pmp->mad_hdr.class_version != 1) {
-		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
-		ret = reply((struct ib_smp *) pmp);
-		goto bail;
-	}
-
-	switch (pmp->mad_hdr.method) {
-	case IB_MGMT_METHOD_GET:
-		switch (pmp->mad_hdr.attr_id) {
-		case IB_PMA_CLASS_PORT_INFO:
-			ret = recv_pma_get_classportinfo(pmp);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_CONTROL:
-			ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
-							      port_num);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_RESULT:
-			ret = recv_pma_get_portsamplesresult(pmp, ibdev);
-			goto bail;
-		case IB_PMA_PORT_SAMPLES_RESULT_EXT:
-			ret = recv_pma_get_portsamplesresult_ext(pmp,
-								 ibdev);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS:
-			ret = recv_pma_get_portcounters(pmp, ibdev,
-							port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS_EXT:
-			ret = recv_pma_get_portcounters_ext(pmp, ibdev,
-							    port_num);
-			goto bail;
-		default:
-			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply((struct ib_smp *) pmp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_SET:
-		switch (pmp->mad_hdr.attr_id) {
-		case IB_PMA_PORT_SAMPLES_CONTROL:
-			ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
-							      port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS:
-			ret = recv_pma_set_portcounters(pmp, ibdev,
-							port_num);
-			goto bail;
-		case IB_PMA_PORT_COUNTERS_EXT:
-			ret = recv_pma_set_portcounters_ext(pmp, ibdev,
-							    port_num);
-			goto bail;
-		default:
-			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
-			ret = reply((struct ib_smp *) pmp);
-			goto bail;
-		}
-
-	case IB_MGMT_METHOD_GET_RESP:
-		/*
-		 * The ib_mad module will call us to process responses
-		 * before checking for other consumers.
-		 * Just tell the caller to process it normally.
-		 */
-		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
-	default:
-		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
-		ret = reply((struct ib_smp *) pmp);
-	}
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_process_mad - process an incoming MAD packet
- * @ibdev: the infiniband device this packet came in on
- * @mad_flags: MAD flags
- * @port_num: the port number this packet came in on
- * @in_wc: the work completion entry for this packet
- * @in_grh: the global route header for this packet
- * @in_mad: the incoming MAD
- * @out_mad: any outgoing MAD reply
- *
- * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
- * interested in processing.
- *
- * Note that the verbs framework has already done the MAD sanity checks,
- * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
- * MADs.
- *
- * This is called by the ib_mad module.
- */
-int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-		      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index)
-{
-	int ret;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
-
-	switch (in_mad->mad_hdr.mgmt_class) {
-	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-		ret = process_subn(ibdev, mad_flags, port_num,
-				   in_mad, out_mad);
-		goto bail;
-	case IB_MGMT_CLASS_PERF_MGMT:
-		ret = process_perf(ibdev, port_num, in_mad, out_mad);
-		goto bail;
-	default:
-		ret = IB_MAD_RESULT_SUCCESS;
-	}
-
-bail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644
index e73274229404..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mmap.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct ipath_mmap_info
- */
-void ipath_release_mmap_info(struct kref *ref)
-{
-	struct ipath_mmap_info *ip =
-		container_of(ref, struct ipath_mmap_info, ref);
-	struct ipath_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void ipath_vma_open(struct vm_area_struct *vma)
-{
-	struct ipath_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void ipath_vma_close(struct vm_area_struct *vma)
-{
-	struct ipath_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, ipath_release_mmap_info);
-}
-
-static const struct vm_operations_struct ipath_vm_ops = {
-	.open =     ipath_vma_open,
-	.close =    ipath_vma_close,
-};
-
-/**
- * ipath_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct ipath_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct ipath_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &ipath_vm_ops;
-		vma->vm_private_data = ip;
-		ipath_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for ipath_mmap
- */
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-					       u32 size,
-					       struct ib_ucontext *context,
-					       void *obj) {
-	struct ipath_mmap_info *ip;
-
-	ip = kmalloc(sizeof *ip, GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-			    struct ipath_mmap_info *ip,
-			    u32 size, void *obj) {
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644
index b76b0ce66709..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/slab.h>
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-
-/* Fast memory region */
-struct ipath_fmr {
-	struct ib_fmr ibfmr;
-	u8 page_shift;
-	struct ipath_mregion mr;        /* must be last */
-};
-
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
-/**
- * ipath_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see ipath_dma.c).
- */
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct ipath_mr *mr;
-	struct ib_mr *ret;
-
-	mr = kzalloc(sizeof *mr, GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-static struct ipath_mr *alloc_mr(int count,
-				 struct ipath_lkey_table *lk_table)
-{
-	struct ipath_mr *mr;
-	int m, i = 0;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-	mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
-	if (!mr)
-		goto done;
-
-	/* Allocate first level page tables. */
-	for (; i < m; i++) {
-		mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
-		if (!mr->mr.map[i])
-			goto bail;
-	}
-	mr->mr.mapsz = m;
-
-	if (!ipath_alloc_lkey(lk_table, &mr->mr))
-		goto bail;
-	mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
-
-	goto done;
-
-bail:
-	while (i) {
-		i--;
-		kfree(mr->mr.map[i]);
-	}
-	kfree(mr);
-	mr = NULL;
-
-done:
-	return mr;
-}
-
-/**
- * ipath_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the InfiniPath driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				u64 virt_addr, int mr_access_flags,
-				struct ib_udata *udata)
-{
-	struct ipath_mr *mr;
-	struct ib_umem *umem;
-	int n, m, entry;
-	struct scatterlist *sg;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-	mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.pd = pd;
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->mr.max_segs = n;
-	mr->umem = umem;
-
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		void *vaddr;
-
-		vaddr = page_address(sg_page(sg));
-		if (!vaddr) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		mr->mr.map[m]->segs[n].vaddr = vaddr;
-		mr->mr.map[m]->segs[n].length = umem->page_size;
-		n++;
-		if (n == IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by ipath_get_dma_mr()
- * or ipath_reg_user_mr().
- */
-int ipath_dereg_mr(struct ib_mr *ibmr)
-{
-	struct ipath_mr *mr = to_imr(ibmr);
-	int i;
-
-	ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
-	i = mr->mr.mapsz;
-	while (i) {
-		i--;
-		kfree(mr->mr.map[i]);
-	}
-
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-
-	kfree(mr);
-	return 0;
-}
-
-/**
- * ipath_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			       struct ib_fmr_attr *fmr_attr)
-{
-	struct ipath_fmr *fmr;
-	int m, i = 0;
-	struct ib_fmr *ret;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
-	fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	/* Allocate first level page tables. */
-	for (; i < m; i++) {
-		fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
-					 GFP_KERNEL);
-		if (!fmr->mr.map[i])
-			goto bail;
-	}
-	fmr->mr.mapsz = m;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
-		goto bail;
-	fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.pd = pd;
-	fmr->mr.user_base = 0;
-	fmr->mr.iova = 0;
-	fmr->mr.length = 0;
-	fmr->mr.offset = 0;
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-	goto done;
-
-bail:
-	while (i)
-		kfree(fmr->mr.map[--i]);
-	kfree(fmr);
-	ret = ERR_PTR(-ENOMEM);
-
-done:
-	return ret;
-}
-
-/**
- * ipath_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-		       int list_len, u64 iova)
-{
-	struct ipath_fmr *fmr = to_ifmr(ibfmr);
-	struct ipath_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	ps = 1 << fmr->page_shift;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == IPATH_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int ipath_unmap_fmr(struct list_head *fmr_list)
-{
-	struct ipath_fmr *fmr;
-	struct ipath_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * ipath_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct ipath_fmr *fmr = to_ifmr(ibfmr);
-	int i;
-
-	ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
-	i = fmr->mr.mapsz;
-	while (i)
-		kfree(fmr->mr.map[--i]);
-	kfree(fmr);
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644
index 280cd2d638e4..000000000000
--- a/drivers/staging/rdma/ipath/ipath_qp.c
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-#define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
-#define mk_qpn(qpt, map, off)	(((map) - (qpt)->map) * BITS_PER_PAGE + \
-				 (off))
-#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
-						      BITS_PER_PAGE, off)
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
-	0,			/* 0 */
-	1,			/* 1 */
-	2,			/* 2 */
-	3,			/* 3 */
-	4,			/* 4 */
-	6,			/* 5 */
-	8,			/* 6 */
-	12,			/* 7 */
-	16,			/* 8 */
-	24,			/* 9 */
-	32,			/* A */
-	48,			/* B */
-	64,			/* C */
-	96,			/* D */
-	128,			/* E */
-	192,			/* F */
-	256,			/* 10 */
-	384,			/* 11 */
-	512,			/* 12 */
-	768,			/* 13 */
-	1024,			/* 14 */
-	1536,			/* 15 */
-	2048,			/* 16 */
-	3072,			/* 17 */
-	4096,			/* 18 */
-	6144,			/* 19 */
-	8192,			/* 1A */
-	12288,			/* 1B */
-	16384,			/* 1C */
-	24576,			/* 1D */
-	32768			/* 1E */
-};
-
-
-static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
-{
-	unsigned long page = get_zeroed_page(GFP_KERNEL);
-	unsigned long flags;
-
-	/*
-	 * Free the page if someone raced with us installing it.
-	 */
-
-	spin_lock_irqsave(&qpt->lock, flags);
-	if (map->page)
-		free_page(page);
-	else
-		map->page = (void *)page;
-	spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-
-static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
-{
-	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
-	u32 ret = -1;
-
-	if (type == IB_QPT_SMI)
-		ret = 0;
-	else if (type == IB_QPT_GSI)
-		ret = 1;
-
-	if (ret != -1) {
-		map = &qpt->map[0];
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page)) {
-				ret = -ENOMEM;
-				goto bail;
-			}
-		}
-		if (!test_and_set_bit(ret, map->page))
-			atomic_dec(&map->n_free);
-		else
-			ret = -EBUSY;
-		goto bail;
-	}
-
-	qpn = qpt->last + 1;
-	if (qpn >= QPN_MAX)
-		qpn = 2;
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
-	max_scan = qpt->nmaps - !offset;
-	for (i = 0;;) {
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page))
-				break;
-		}
-		if (likely(atomic_read(&map->n_free))) {
-			do {
-				if (!test_and_set_bit(offset, map->page)) {
-					atomic_dec(&map->n_free);
-					qpt->last = qpn;
-					ret = qpn;
-					goto bail;
-				}
-				offset = find_next_offset(map, offset);
-				qpn = mk_qpn(qpt, map, offset);
-				/*
-				 * This test differs from alloc_pidmap().
-				 * If find_next_offset() does find a zero
-				 * bit, we don't need to check for QPN
-				 * wrapping around past our starting QPN.
-				 * We just need to be sure we don't loop
-				 * forever.
-				 */
-			} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-		}
-		/*
-		 * In order to keep the number of pages allocated to a
-		 * minimum, we scan the all existing pages before increasing
-		 * the size of the bitmap table.
-		 */
-		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
-				break;
-			map = &qpt->map[qpt->nmaps++];
-			offset = 0;
-		} else if (map < &qpt->map[qpt->nmaps]) {
-			++map;
-			offset = 0;
-		} else {
-			map = &qpt->map[0];
-			offset = 2;
-		}
-		qpn = mk_qpn(qpt, map, offset);
-	}
-
-	ret = -ENOMEM;
-
-bail:
-	return ret;
-}
-
-static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-	atomic_inc(&map->n_free);
-}
-
-/**
- * ipath_alloc_qpn - allocate a QP number
- * @qpt: the QP table
- * @qp: the QP
- * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
- *
- * Allocate the next available QPN and put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
-			   enum ib_qp_type type)
-{
-	unsigned long flags;
-	int ret;
-
-	ret = alloc_qpn(qpt, type);
-	if (ret < 0)
-		goto bail;
-	qp->ibqp.qp_num = ret;
-
-	/* Add the QP to the hash table. */
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	ret %= qpt->max;
-	qp->next = qpt->table[ret];
-	qpt->table[ret] = qp;
-	atomic_inc(&qp->refcount);
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_free_qp - remove a QP from the QP table
- * @qpt: the QP table
- * @qp: the QP to remove
- *
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
-{
-	struct ipath_qp *q, **qpp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	/* Remove QP from the hash table. */
-	qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
-	for (; (q = *qpp) != NULL; qpp = &q->next) {
-		if (q == qp) {
-			*qpp = qp->next;
-			qp->next = NULL;
-			atomic_dec(&qp->refcount);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-}
-
-/**
- * ipath_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
-{
-	unsigned long flags;
-	struct ipath_qp *qp;
-	u32 n, qp_inuse = 0;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-	for (n = 0; n < qpt->max; n++) {
-		qp = qpt->table[n];
-		qpt->table[n] = NULL;
-
-		for (; qp; qp = qp->next)
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&qpt->lock, flags);
-
-	for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
-		if (qpt->map[n].page)
-			free_page((unsigned long) qpt->map[n].page);
-	return qp_inuse;
-}
-
-/**
- * ipath_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
-{
-	unsigned long flags;
-	struct ipath_qp *qp;
-
-	spin_lock_irqsave(&qpt->lock, flags);
-
-	for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
-		if (qp->ibqp.qp_num == qpn) {
-			atomic_inc(&qp->refcount);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&qpt->lock, flags);
-	return qp;
-}
-
-/**
- * ipath_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	atomic_set(&qp->s_dma_busy, 0);
-	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_pkt_delay = 0;
-	qp->s_draining = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_rnr_timeout = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-}
-
-/**
- * ipath_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	spin_unlock(&dev->pending_lock);
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		ipath_schedule_send(qp);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct ipath_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for ipathverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask, struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	int lastwqe = 0;
-	int ret;
-
-	spin_lock_irq(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
-
-	if (attr_mask & IB_QP_AV) {
-		if (attr->ah_attr.dlid == 0 ||
-		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
-			goto inval;
-
-		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
-		    (attr->ah_attr.grh.sgid_index > 1))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	/*
-	 * don't allow invalid Path MTU values or greater than 2048
-	 * unless we are configured for a 4KB MTU
-	 */
-	if ((attr_mask & IB_QP_PATH_MTU) &&
-		(ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
-		(attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
-		goto inval;
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE)
-		if (attr->path_mig_state != IB_MIG_MIGRATED &&
-		    attr->path_mig_state != IB_MIG_REARM)
-			goto inval;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			spin_lock(&dev->pending_lock);
-			if (!list_empty(&qp->timerwait))
-				list_del_init(&qp->timerwait);
-			if (!list_empty(&qp->piowait))
-				list_del_init(&qp->piowait);
-			spin_unlock(&dev->pending_lock);
-			qp->s_flags &= ~IPATH_S_ANY_WAIT;
-			spin_unlock_irq(&qp->s_lock);
-			/* Stop the sending tasklet */
-			tasklet_kill(&qp->s_task);
-			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-			spin_lock_irq(&qp->s_lock);
-		}
-		ipath_reset_qp(qp, ibqp->qp_type);
-		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_ERR:
-		lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-		break;
-
-	default:
-		qp->state = new_state;
-		break;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_psn = qp->s_next_psn = attr->sq_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU)
-		qp->path_mtu = attr->path_mtu;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry = attr->rnr_retry;
-		if (qp->s_rnr_retry > 7)
-			qp->s_rnr_retry = 7;
-		qp->s_rnr_retry_cnt = qp->s_rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT)
-		qp->timeout = attr->timeout;
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock_irq(&qp->s_lock);
-
-	if (lastwqe) {
-		struct ib_event ev;
-
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock_irq(&qp->s_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = 0;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = qp->r_psn;
-	attr->sq_psn = qp->s_next_psn;
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = 0;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = 1;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = 0;
-	attr->alt_timeout = 0;
-
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = 1;
-	return 0;
-}
-
-/**
- * ipath_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 ipath_compute_aeth(struct ipath_qp *qp)
-{
-	u32 aeth = qp->r_msn & IPATH_MSN_MASK;
-
-	if (qp->ibqp.srq) {
-		/*
-		 * Shared receive queues don't generate credits.
-		 * Set the credit field to the invalid value.
-		 */
-		aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
-	} else {
-		u32 min, max, x;
-		u32 credits;
-		struct ipath_rwq *wq = qp->r_rq.wq;
-		u32 head;
-		u32 tail;
-
-		/* sanity check pointers before trusting them */
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		/*
-		 * Compute the number of credits available (RWQEs).
-		 * XXX Not holding the r_rq.lock here so there is a small
-		 * chance that the pair of reads are not atomic.
-		 */
-		credits = head - tail;
-		if ((int)credits < 0)
-			credits += qp->r_rq.size;
-		/*
-		 * Binary search the credit table to find the code to
-		 * use.
-		 */
-		min = 0;
-		max = 31;
-		for (;;) {
-			x = (min + max) / 2;
-			if (credit_table[x] == credits)
-				break;
-			if (credit_table[x] > credits)
-				max = x;
-			else if (min == x)
-				break;
-			else
-				min = x;
-		}
-		aeth |= x << IPATH_AETH_CREDIT_SHIFT;
-	}
-	return cpu_to_be32(aeth);
-}
-
-/**
- * ipath_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: unused by InfiniPath
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-			      struct ib_qp_init_attr *init_attr,
-			      struct ib_udata *udata)
-{
-	struct ipath_qp *qp;
-	int err;
-	struct ipath_swqe *swq = NULL;
-	struct ipath_ibdev *dev;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-
-	if (init_attr->create_flags) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-	    init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-		    init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	}
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		sz = sizeof(struct ipath_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct ipath_swqe);
-		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct ipath_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
-		    init_attr->qp_type == IB_QPT_SMI ||
-		    init_attr->qp_type == IB_QPT_GSI)) {
-			qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
-			if (!qp->r_ud_sg_list) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		} else
-			qp->r_ud_sg_list = NULL;
-		if (init_attr->srq) {
-			sz = 0;
-			qp->r_rq.size = 0;
-			qp->r_rq.max_sge = 0;
-			qp->r_rq.wq = NULL;
-			init_attr->cap.max_recv_wr = 0;
-			init_attr->cap.max_recv_sge = 0;
-		} else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
-					      qp->r_rq.size * sz);
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_sg_list;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_waitqueue_head(&qp->wait_dma);
-		tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
-		INIT_LIST_HEAD(&qp->piowait);
-		INIT_LIST_HEAD(&qp->timerwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
-		else
-			qp->s_flags = 0;
-		dev = to_idev(ibpd->device);
-		err = ipath_alloc_qpn(&dev->qp_table, qp,
-				      init_attr->qp_type);
-		if (err) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_sg_list;
-		}
-		qp->ip = NULL;
-		qp->s_tx = NULL;
-		ipath_reset_qp(qp, init_attr->qp_type);
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct ipath_rwq) +
-				qp->r_rq.size * sz;
-
-			qp->ip =
-			    ipath_create_mmap_info(dev, s,
-						   ibpd->uobject->context,
-						   qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == ib_ipath_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	ipath_free_qp(&dev->qp_table, qp);
-	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-bail_sg_list:
-	kfree(qp->r_ud_sg_list);
-bail_qp:
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
-}
-
-/**
- * ipath_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int ipath_destroy_qp(struct ib_qp *ibqp)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		spin_lock(&dev->pending_lock);
-		if (!list_empty(&qp->timerwait))
-			list_del_init(&qp->timerwait);
-		if (!list_empty(&qp->piowait))
-			list_del_init(&qp->piowait);
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~IPATH_S_ANY_WAIT;
-		spin_unlock_irq(&qp->s_lock);
-		/* Stop the sending tasklet */
-		tasklet_kill(&qp->s_task);
-		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-	} else
-		spin_unlock_irq(&qp->s_lock);
-
-	ipath_free_qp(&dev->qp_table, qp);
-
-	if (qp->s_tx) {
-		atomic_dec(&qp->refcount);
-		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-			kfree(qp->s_tx->txreq.map_addr);
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
-		spin_unlock_irq(&dev->pending_lock);
-		qp->s_tx = NULL;
-	}
-
-	wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip)
-		kref_put(&qp->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	kfree(qp->r_ud_sg_list);
-	vfree(qp->s_wq);
-	kfree(qp);
-	return 0;
-}
-
-/**
- * ipath_init_qp_table - initialize the QP table for a device
- * @idev: the device who's QP table we're initializing
- * @size: the size of the QP table
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
-{
-	int i;
-	int ret;
-
-	idev->qp_table.last = 1;	/* QPN 0 and 1 are special. */
-	idev->qp_table.max = size;
-	idev->qp_table.nmaps = 1;
-	idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
-				       GFP_KERNEL);
-	if (idev->qp_table.table == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
-		atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
-		idev->qp_table.map[i].page = NULL;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
-{
-	u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
-
-	/*
-	 * If the credit is invalid, we can send
-	 * as many packets as we like.  Otherwise, we have to
-	 * honor the credit field.
-	 */
-	if (credit == IPATH_AETH_CREDIT_INVAL)
-		qp->s_lsn = (u32) -1;
-	else if (qp->s_lsn != (u32) -1) {
-		/* Compute new LSN (i.e., MSN + credit) */
-		credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
-		if (ipath_cmp24(credit, qp->s_lsn) > 0)
-			qp->s_lsn = credit;
-	}
-
-	/* Restart sending if it was blocked due to lack of credits. */
-	if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
-	    qp->s_cur != qp->s_head &&
-	    (qp->s_lsn == (u32) -1 ||
-	     ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
-			 qp->s_lsn + 1) <= 0))
-		ipath_schedule_send(qp);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644
index d4aa53574e57..000000000000
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ /dev/null
@@ -1,1969 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/io.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
-
-static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
-		       u32 psn, u32 pmtu)
-{
-	u32 len;
-
-	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-	ss->sge = wqe->sg_list[0];
-	ss->sg_list = wqe->sg_list + 1;
-	ss->num_sge = wqe->wr.num_sge;
-	ipath_skip_sge(ss, len);
-	return wqe->length - len;
-}
-
-/**
- * ipath_init_restart- initialize the qp->s_sge after a restart
- * @qp: the QP who's SGE we're restarting
- * @wqe: the work queue to initialize the QP's SGE from
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
-{
-	struct ipath_ibdev *dev;
-
-	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
-				ib_mtu_enum_to_int(qp->path_mtu));
-	dev = to_idev(qp->ibqp.device);
-	spin_lock(&dev->pending_lock);
-	if (list_empty(&qp->timerwait))
-		list_add_tail(&qp->timerwait,
-			      &dev->pending[dev->pending_index]);
-	spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
- * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- *
- * Return 1 if constructed; otherwise, return 0.
- * Note that we are in the responder's side of the QP context.
- * Note the QP s_lock must be held.
- */
-static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			     struct ipath_other_headers *ohdr, u32 pmtu)
-{
-	struct ipath_ack_entry *e;
-	u32 hwords;
-	u32 len;
-	u32 bth0;
-	u32 bth2;
-
-	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto bail;
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-
-	switch (qp->s_ack_state) {
-	case OP(RDMA_READ_RESPONSE_LAST):
-	case OP(RDMA_READ_RESPONSE_ONLY):
-	case OP(ATOMIC_ACKNOWLEDGE):
-		/*
-		 * We can increment the tail pointer now that the last
-		 * response has been sent instead of only being
-		 * constructed.
-		 */
-		if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-			qp->s_tail_ack_queue = 0;
-		/* FALLTHROUGH */
-	case OP(SEND_ONLY):
-	case OP(ACKNOWLEDGE):
-		/* Check for no next entry in the queue. */
-		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & IPATH_S_ACK_PENDING)
-				goto normal;
-			qp->s_ack_state = OP(ACKNOWLEDGE);
-			goto bail;
-		}
-
-		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-		if (e->opcode == OP(RDMA_READ_REQUEST)) {
-			/* Copy SGE state in case we need to resend */
-			qp->s_ack_rdma_sge = e->rdma_sge;
-			qp->s_cur_sge = &qp->s_ack_rdma_sge;
-			len = e->rdma_sge.sge.sge_length;
-			if (len > pmtu) {
-				len = pmtu;
-				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-			} else {
-				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-				e->sent = 1;
-			}
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-			hwords++;
-			qp->s_ack_rdma_psn = e->psn;
-			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-		} else {
-			/* COMPARE_SWAP or FETCH_ADD */
-			qp->s_cur_sge = NULL;
-			len = 0;
-			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
-			ohdr->u.at.aeth = ipath_compute_aeth(qp);
-			ohdr->u.at.atomic_ack_eth[0] =
-				cpu_to_be32(e->atomic_data >> 32);
-			ohdr->u.at.atomic_ack_eth[1] =
-				cpu_to_be32(e->atomic_data);
-			hwords += sizeof(ohdr->u.at) / sizeof(u32);
-			bth2 = e->psn;
-			e->sent = 1;
-		}
-		bth0 = qp->s_ack_state << 24;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		len = qp->s_ack_rdma_sge.sge.sge_length;
-		if (len > pmtu)
-			len = pmtu;
-		else {
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-			hwords++;
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-			qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
-		}
-		bth0 = qp->s_ack_state << 24;
-		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
-		break;
-
-	default:
-	normal:
-		/*
-		 * Send a regular ACK.
-		 * Set the s_ack_state so we wait until after sending
-		 * the ACK before setting s_ack_state to ACKNOWLEDGE
-		 * (see above).
-		 */
-		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~IPATH_S_ACK_PENDING;
-		qp->s_cur_sge = NULL;
-		if (qp->s_nak_state)
-			ohdr->u.aeth =
-				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-					    (qp->s_nak_state <<
-					     IPATH_AETH_CREDIT_SHIFT));
-		else
-			ohdr->u.aeth = ipath_compute_aeth(qp);
-		hwords++;
-		len = 0;
-		bth0 = OP(ACKNOWLEDGE) << 24;
-		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
-	}
-	qp->s_hdrwords = hwords;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
-	return 1;
-
-bail:
-	return 0;
-}
-
-/**
- * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_rc_req(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_other_headers *ohdr;
-	struct ipath_sge_state *ss;
-	struct ipath_swqe *wqe;
-	u32 hwords;
-	u32 len;
-	u32 bth0;
-	u32 bth2;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	char newreq;
-	unsigned long flags;
-	int ret = 0;
-
-	ohdr = &qp->s_hdr.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
-
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout resends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Sending responses has higher priority over sending requests. */
-	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-	     (qp->s_flags & IPATH_S_ACK_PENDING) ||
-	     qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-	    ipath_make_rc_ack(dev, qp, ohdr, pmtu))
-		goto done;
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	/* Leave BUSY set until RNR timeout. */
-	if (qp->s_rnr_timeout) {
-		qp->s_flags |= IPATH_S_WAITING;
-		goto bail;
-	}
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-	bth0 = 1 << 22; /* Set M bit */
-
-	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	switch (qp->s_state) {
-	default:
-		if (!(ib_ipath_state_ops[qp->state] &
-		    IPATH_PROCESS_NEXT_SEND_OK))
-			goto bail;
-		/*
-		 * Resend an old request or start a new one.
-		 *
-		 * We keep track of the current SWQE so that
-		 * we don't reset the "furthest progress" state
-		 * if we need to back up.
-		 */
-		newreq = 0;
-		if (qp->s_cur == qp->s_tail) {
-			/* Check if send work queue is empty. */
-			if (qp->s_tail == qp->s_head)
-				goto bail;
-			/*
-			 * If a fence is requested, wait for previous
-			 * RDMA read and atomic operations to finish.
-			 */
-			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
-			    qp->s_num_rd_atomic) {
-				qp->s_flags |= IPATH_S_FENCE_PENDING;
-				goto bail;
-			}
-			wqe->psn = qp->s_next_psn;
-			newreq = 1;
-		}
-		/*
-		 * Note that we have to be careful not to modify the
-		 * original work request since we may need to resend
-		 * it.
-		 */
-		len = wqe->length;
-		ss = &qp->s_sge;
-		bth2 = 0;
-		switch (wqe->wr.opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_IMM:
-			/* If no credit, return. */
-			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-				goto bail;
-			}
-			wqe->lpsn = wqe->psn;
-			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
-				qp->s_state = OP(SEND_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_SEND)
-				qp->s_state = OP(SEND_ONLY);
-			else {
-				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-			}
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-			bth2 = 1 << 31;	/* Request ACK. */
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_WRITE:
-			if (newreq && qp->s_lsn != (u32) -1)
-				qp->s_lsn++;
-			/* FALLTHROUGH */
-		case IB_WR_RDMA_WRITE_WITH_IMM:
-			/* If no credit, return. */
-			if (qp->s_lsn != (u32) -1 &&
-			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
-				goto bail;
-			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
-			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
-				qp->s_state = OP(RDMA_WRITE_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
-				qp->s_state =
-					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after RETH */
-				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-					bth0 |= 1 << 23;
-			}
-			bth2 = 1 << 31;	/* Request ACK. */
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_READ:
-			/*
-			 * Don't allow more operations to be started
-			 * than the QP limits allow.
-			 */
-			if (newreq) {
-				if (qp->s_num_rd_atomic >=
-				    qp->s_max_rd_atomic) {
-					qp->s_flags |= IPATH_S_RDMAR_PENDING;
-					goto bail;
-				}
-				qp->s_num_rd_atomic++;
-				if (qp->s_lsn != (u32) -1)
-					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
-			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			qp->s_state = OP(RDMA_READ_REQUEST);
-			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-			ss = NULL;
-			len = 0;
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_ATOMIC_CMP_AND_SWP:
-		case IB_WR_ATOMIC_FETCH_AND_ADD:
-			/*
-			 * Don't allow more operations to be started
-			 * than the QP limits allow.
-			 */
-			if (newreq) {
-				if (qp->s_num_rd_atomic >=
-				    qp->s_max_rd_atomic) {
-					qp->s_flags |= IPATH_S_RDMAR_PENDING;
-					goto bail;
-				}
-				qp->s_num_rd_atomic++;
-				if (qp->s_lsn != (u32) -1)
-					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
-			}
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-				qp->s_state = OP(COMPARE_SWAP);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.swap);
-				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-			} else {
-				qp->s_state = OP(FETCH_ADD);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-				ohdr->u.atomic_eth.compare_data = 0;
-			}
-			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr >> 32);
-			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr);
-			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->atomic_wr.rkey);
-			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
-			ss = NULL;
-			len = 0;
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		default:
-			goto bail;
-		}
-		qp->s_sge.sge = wqe->sg_list[0];
-		qp->s_sge.sg_list = wqe->sg_list + 1;
-		qp->s_sge.num_sge = wqe->wr.num_sge;
-		qp->s_len = wqe->length;
-		if (newreq) {
-			qp->s_tail++;
-			if (qp->s_tail >= qp->s_size)
-				qp->s_tail = 0;
-		}
-		bth2 |= qp->s_psn & IPATH_PSN_MASK;
-		if (wqe->wr.opcode == IB_WR_RDMA_READ)
-			qp->s_psn = wqe->lpsn + 1;
-		else {
-			qp->s_psn++;
-			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
-		/*
-		 * Put the QP on the pending list so lost ACKs will cause
-		 * a retry.  More than one request can be pending so the
-		 * QP may already be on the dev->pending list.
-		 */
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->timerwait))
-			list_add_tail(&qp->timerwait,
-				      &dev->pending[dev->pending_index]);
-		spin_unlock(&dev->pending_lock);
-		break;
-
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		/*
-		 * This case can only happen if a send is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		/* FALLTHROUGH */
-	case OP(SEND_FIRST):
-		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
-		ss = &qp->s_sge;
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_SEND)
-			qp->s_state = OP(SEND_LAST);
-		else {
-			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-		}
-		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-			bth0 |= 1 << 23;
-		bth2 |= 1 << 31;	/* Request ACK. */
-		qp->s_cur++;
-		if (qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/*
-		 * This case can only happen if a RDMA write is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_FIRST):
-		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
-		ss = &qp->s_sge;
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
-			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-		}
-		bth2 |= 1 << 31;	/* Request ACK. */
-		qp->s_cur++;
-		if (qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		/*
-		 * This case can only happen if a RDMA read is restarted.
-		 * See ipath_restart_rc().
-		 */
-		ipath_init_restart(qp, wqe);
-		len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
-		ohdr->u.rc.reth.vaddr =
-			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
-		ohdr->u.rc.reth.rkey =
-			cpu_to_be32(wqe->rdma_wr.rkey);
-		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
-		qp->s_state = OP(RDMA_READ_REQUEST);
-		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-		bth2 = qp->s_psn & IPATH_PSN_MASK;
-		qp->s_psn = wqe->lpsn + 1;
-		ss = NULL;
-		len = 0;
-		qp->s_cur++;
-		if (qp->s_cur == qp->s_size)
-			qp->s_cur = 0;
-		break;
-	}
-	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
-		bth2 |= 1 << 31;	/* Request ACK. */
-	qp->s_len -= len;
-	qp->s_hdrwords = hwords;
-	qp->s_cur_sge = ss;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
- *
- * This is called from ipath_rc_rcv() and only uses the receive
- * side QP state.
- * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
- */
-static void send_rc_ack(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_devdata *dd;
-	u16 lrh0;
-	u32 bth0;
-	u32 hwords;
-	u32 __iomem *piobuf;
-	struct ipath_ib_header hdr;
-	struct ipath_other_headers *ohdr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
-	    (qp->s_flags & IPATH_S_ACK_PENDING) ||
-	    qp->s_ack_state != OP(ACKNOWLEDGE))
-		goto queue_ack;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	/* Don't try to send ACKs if the link isn't ACTIVE */
-	dd = dev->dd;
-	if (!(dd->ipath_flags & IPATH_LINKACTIVE))
-		goto done;
-
-	piobuf = ipath_getpiobuf(dd, 0, NULL);
-	if (!piobuf) {
-		/*
-		 * We are out of PIO buffers at the moment.
-		 * Pass responsibility for sending the ACK to the
-		 * send tasklet so that when a PIO buffer becomes
-		 * available, the ACK is sent ahead of other outgoing
-		 * packets.
-		 */
-		spin_lock_irqsave(&qp->s_lock, flags);
-		goto queue_ack;
-	}
-
-	/* Construct the header. */
-	ohdr = &hdr.u.oth;
-	lrh0 = IPATH_LRH_BTH;
-	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
-	hwords = 6;
-	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		hwords += ipath_make_grh(dev, &hdr.u.l.grh,
-					 &qp->remote_ah_attr.grh,
-					 hwords, 0);
-		ohdr = &hdr.u.l.oth;
-		lrh0 = IPATH_LRH_GRH;
-	}
-	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
-		(OP(ACKNOWLEDGE) << 24) | (1 << 22);
-	if (qp->r_nak_state)
-		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-					    (qp->r_nak_state <<
-					     IPATH_AETH_CREDIT_SHIFT));
-	else
-		ohdr->u.aeth = ipath_compute_aeth(qp);
-	lrh0 |= qp->remote_ah_attr.sl << 4;
-	hdr.lrh[0] = cpu_to_be16(lrh0);
-	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
-				 qp->remote_ah_attr.src_path_bits);
-	ohdr->bth[0] = cpu_to_be32(bth0);
-	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
-
-	writeq(hwords + 1, piobuf);
-
-	if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
-		u32 *hdrp = (u32 *) &hdr;
-
-		ipath_flush_wc();
-		__iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
-		ipath_flush_wc();
-		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
-	} else
-		__iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
-
-	ipath_flush_wc();
-
-	dev->n_unicast_xmit++;
-	goto done;
-
-queue_ack:
-	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
-		dev->n_rc_qacks++;
-		qp->s_flags |= IPATH_S_ACK_PENDING;
-		qp->s_nak_state = qp->r_nak_state;
-		qp->s_ack_psn = qp->r_ack_psn;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return;
-}
-
-/**
- * reset_psn - reset the QP state to send starting from PSN
- * @qp: the QP
- * @psn: the packet sequence number to restart at
- *
- * This is called from ipath_rc_rcv() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held.
- */
-static void reset_psn(struct ipath_qp *qp, u32 psn)
-{
-	u32 n = qp->s_last;
-	struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
-	u32 opcode;
-
-	qp->s_cur = n;
-
-	/*
-	 * If we are starting the request from the beginning,
-	 * let the normal send code handle initialization.
-	 */
-	if (ipath_cmp24(psn, wqe->psn) <= 0) {
-		qp->s_state = OP(SEND_LAST);
-		goto done;
-	}
-
-	/* Find the work request opcode corresponding to the given PSN. */
-	opcode = wqe->wr.opcode;
-	for (;;) {
-		int diff;
-
-		if (++n == qp->s_size)
-			n = 0;
-		if (n == qp->s_tail)
-			break;
-		wqe = get_swqe_ptr(qp, n);
-		diff = ipath_cmp24(psn, wqe->psn);
-		if (diff < 0)
-			break;
-		qp->s_cur = n;
-		/*
-		 * If we are starting the request from the beginning,
-		 * let the normal send code handle initialization.
-		 */
-		if (diff == 0) {
-			qp->s_state = OP(SEND_LAST);
-			goto done;
-		}
-		opcode = wqe->wr.opcode;
-	}
-
-	/*
-	 * Set the state to restart in the middle of a request.
-	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
-	 * See ipath_make_rc_req().
-	 */
-	switch (opcode) {
-	case IB_WR_SEND:
-	case IB_WR_SEND_WITH_IMM:
-		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
-		break;
-
-	case IB_WR_RDMA_WRITE:
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
-		break;
-
-	case IB_WR_RDMA_READ:
-		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-		break;
-
-	default:
-		/*
-		 * This case shouldn't happen since its only
-		 * one PSN per req.
-		 */
-		qp->s_state = OP(SEND_LAST);
-	}
-done:
-	qp->s_psn = psn;
-}
-
-/**
- * ipath_restart_rc - back up requester to resend the last un-ACKed request
- * @qp: the QP to restart
- * @psn: packet sequence number for the request
- * @wc: the work completion request
- *
- * The QP s_lock should be held and interrupts disabled.
- */
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
-{
-	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-	struct ipath_ibdev *dev;
-
-	if (qp->s_retry == 0) {
-		ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-		goto bail;
-	}
-	qp->s_retry--;
-
-	/*
-	 * Remove the QP from the timeout queue.
-	 * Note: it may already have been removed by ipath_ib_timer().
-	 */
-	dev = to_idev(qp->ibqp.device);
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	if (!list_empty(&qp->piowait))
-		list_del_init(&qp->piowait);
-	spin_unlock(&dev->pending_lock);
-
-	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		dev->n_rc_resends++;
-	else
-		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
-
-	reset_psn(qp, psn);
-	ipath_schedule_send(qp);
-
-bail:
-	return;
-}
-
-static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
-{
-	qp->s_last_psn = psn;
-}
-
-/**
- * do_rc_ack - process an incoming RC ACK
- * @qp: the QP the ACK came in on
- * @psn: the packet sequence number of the ACK
- * @opcode: the opcode of the request that resulted in the ACK
- *
- * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
- * for the given QP.
- * Called at interrupt level with the QP s_lock held and interrupts disabled.
- * Returns 1 if OK, 0 if current operation should be aborted (NAK).
- */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
-		     u64 val)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	enum ib_wc_status status;
-	struct ipath_swqe *wqe;
-	int ret = 0;
-	u32 ack_psn;
-	int diff;
-
-	/*
-	 * Remove the QP from the timeout queue (or RNR timeout queue).
-	 * If ipath_ib_timer() has already removed it,
-	 * it's OK since we hold the QP s_lock and ipath_restart_rc()
-	 * just won't find anything to restart if we ACK everything.
-	 */
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->timerwait))
-		list_del_init(&qp->timerwait);
-	spin_unlock(&dev->pending_lock);
-
-	/*
-	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
-	 * requests and implicitly NAK RDMA read and atomic requests issued
-	 * before the NAK'ed request.  The MSN won't include the NAK'ed
-	 * request but will include an ACK'ed request(s).
-	 */
-	ack_psn = psn;
-	if (aeth >> 29)
-		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_last);
-
-	/*
-	 * The MSN might be for a later WQE than the PSN indicates so
-	 * only complete WQEs that the PSN finishes.
-	 */
-	while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
-		/*
-		 * RDMA_READ_RESPONSE_ONLY is a special case since
-		 * we want to generate completion events for everything
-		 * before the RDMA read, copy the data, then generate
-		 * the completion for the read.
-		 */
-		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
-		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
-		    diff == 0) {
-			ret = 1;
-			goto bail;
-		}
-		/*
-		 * If this request is a RDMA read or atomic, and the ACK is
-		 * for a later operation, this ACK NAKs the RDMA read or
-		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
-		 * can ACK a RDMA read and likewise for atomic ops.  Note
-		 * that the NAK case can only happen if relaxed ordering is
-		 * used and requests are sent after an RDMA read or atomic
-		 * is sent but before the response is received.
-		 */
-		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
-		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-			/*
-			 * The last valid PSN seen is the previous
-			 * request's.
-			 */
-			update_last_psn(qp, wqe->psn - 1);
-			/* Retry this request. */
-			ipath_restart_rc(qp, wqe->psn);
-			/*
-			 * No need to process the ACK/NAK since we are
-			 * restarting an earlier request.
-			 */
-			goto bail;
-		}
-		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			*(u64 *) wqe->sg_list[0].vaddr = val;
-		if (qp->s_num_rd_atomic &&
-		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
-		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
-			qp->s_num_rd_atomic--;
-			/* Restart sending task if fence is complete */
-			if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
-			     !qp->s_num_rd_atomic) ||
-			    qp->s_flags & IPATH_S_RDMAR_PENDING)
-				ipath_schedule_send(qp);
-		}
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof wc);
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			wc.src_qp = qp->remote_qpn;
-			wc.slid = qp->remote_ah_attr.dlid;
-			wc.sl = qp->remote_ah_attr.sl;
-			ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-		}
-		qp->s_retry = qp->s_retry_cnt;
-		/*
-		 * If we are completing a request which is in the process of
-		 * being resent, we can stop resending it since we know the
-		 * responder has already seen it.
-		 */
-		if (qp->s_last == qp->s_cur) {
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			qp->s_last = qp->s_cur;
-			if (qp->s_last == qp->s_tail)
-				break;
-			wqe = get_swqe_ptr(qp, qp->s_cur);
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = wqe->psn;
-		} else {
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-			if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
-				qp->s_draining = 0;
-			if (qp->s_last == qp->s_tail)
-				break;
-			wqe = get_swqe_ptr(qp, qp->s_last);
-		}
-	}
-
-	switch (aeth >> 29) {
-	case 0:		/* ACK */
-		dev->n_rc_acks++;
-		/* If this is a partial ACK, reset the retransmit timer. */
-		if (qp->s_last != qp->s_tail) {
-			spin_lock(&dev->pending_lock);
-			if (list_empty(&qp->timerwait))
-				list_add_tail(&qp->timerwait,
-					&dev->pending[dev->pending_index]);
-			spin_unlock(&dev->pending_lock);
-			/*
-			 * If we get a partial ACK for a resent operation,
-			 * we can stop resending the earlier packets and
-			 * continue with the next packet the receiver wants.
-			 */
-			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-				reset_psn(qp, psn + 1);
-				ipath_schedule_send(qp);
-			}
-		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = psn + 1;
-		}
-		ipath_get_credit(qp, aeth);
-		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		qp->s_retry = qp->s_retry_cnt;
-		update_last_psn(qp, psn);
-		ret = 1;
-		goto bail;
-
-	case 1:		/* RNR NAK */
-		dev->n_rnr_naks++;
-		if (qp->s_last == qp->s_tail)
-			goto bail;
-		if (qp->s_rnr_retry == 0) {
-			status = IB_WC_RNR_RETRY_EXC_ERR;
-			goto class_b;
-		}
-		if (qp->s_rnr_retry_cnt < 7)
-			qp->s_rnr_retry--;
-
-		/* The last valid PSN is the previous PSN. */
-		update_last_psn(qp, psn - 1);
-
-		if (wqe->wr.opcode == IB_WR_RDMA_READ)
-			dev->n_rc_resends++;
-		else
-			dev->n_rc_resends +=
-				(qp->s_psn - psn) & IPATH_PSN_MASK;
-
-		reset_psn(qp, psn);
-
-		qp->s_rnr_timeout =
-			ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
-					   IPATH_AETH_CREDIT_MASK];
-		ipath_insert_rnr_queue(qp);
-		ipath_schedule_send(qp);
-		goto bail;
-
-	case 3:		/* NAK */
-		if (qp->s_last == qp->s_tail)
-			goto bail;
-		/* The last valid PSN is the previous PSN. */
-		update_last_psn(qp, psn - 1);
-		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
-			IPATH_AETH_CREDIT_MASK) {
-		case 0:	/* PSN sequence error */
-			dev->n_seq_naks++;
-			/*
-			 * Back up to the responder's expected PSN.
-			 * Note that we might get a NAK in the middle of an
-			 * RDMA READ response which terminates the RDMA
-			 * READ.
-			 */
-			ipath_restart_rc(qp, psn);
-			break;
-
-		case 1:	/* Invalid Request */
-			status = IB_WC_REM_INV_REQ_ERR;
-			dev->n_other_naks++;
-			goto class_b;
-
-		case 2:	/* Remote Access Error */
-			status = IB_WC_REM_ACCESS_ERR;
-			dev->n_other_naks++;
-			goto class_b;
-
-		case 3:	/* Remote Operation Error */
-			status = IB_WC_REM_OP_ERR;
-			dev->n_other_naks++;
-		class_b:
-			ipath_send_complete(qp, wqe, status);
-			ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-			break;
-
-		default:
-			/* Ignore other reserved NAK error codes */
-			goto reserved;
-		}
-		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		goto bail;
-
-	default:		/* 2: reserved */
-	reserved:
-		/* Ignore reserved NAK codes. */
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_rc_rcv_resp - process an incoming RC response packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an incoming RC response
- * packet for the given QP.
- * Called at interrupt level.
- */
-static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
-				     struct ipath_other_headers *ohdr,
-				     void *data, u32 tlen,
-				     struct ipath_qp *qp,
-				     u32 opcode,
-				     u32 psn, u32 hdrsize, u32 pmtu,
-				     int header_in_data)
-{
-	struct ipath_swqe *wqe;
-	enum ib_wc_status status;
-	unsigned long flags;
-	int diff;
-	u32 pad;
-	u32 aeth;
-	u64 val;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto ack_done;
-
-	/* Ignore invalid responses. */
-	if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
-		goto ack_done;
-
-	/* Ignore duplicate responses. */
-	diff = ipath_cmp24(psn, qp->s_last_psn);
-	if (unlikely(diff <= 0)) {
-		/* Update credits for "ghost" ACKs */
-		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
-			if (!header_in_data)
-				aeth = be32_to_cpu(ohdr->u.aeth);
-			else {
-				aeth = be32_to_cpu(((__be32 *) data)[0]);
-				data += sizeof(__be32);
-			}
-			if ((aeth >> 29) == 0)
-				ipath_get_credit(qp, aeth);
-		}
-		goto ack_done;
-	}
-
-	if (unlikely(qp->s_last == qp->s_tail))
-		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_last);
-	status = IB_WC_SUCCESS;
-
-	switch (opcode) {
-	case OP(ACKNOWLEDGE):
-	case OP(ATOMIC_ACKNOWLEDGE):
-	case OP(RDMA_READ_RESPONSE_FIRST):
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else {
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		}
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-			if (!header_in_data) {
-				__be32 *p = ohdr->u.at.atomic_ack_eth;
-
-				val = ((u64) be32_to_cpu(p[0]) << 32) |
-					be32_to_cpu(p[1]);
-			} else
-				val = be64_to_cpu(((__be64 *) data)[0]);
-		} else
-			val = 0;
-		if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
-		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
-			goto ack_done;
-		hdrsize += 4;
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-		qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
-		/*
-		 * If this is a response to a resent RDMA read, we
-		 * have to be careful to copy the data to the right
-		 * location.
-		 */
-		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-						  wqe, psn, pmtu);
-		goto read_middle;
-
-	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		/* no AETH, no ACK */
-		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-			dev->n_rdma_seq++;
-			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-				goto ack_done;
-			qp->r_flags |= IPATH_R_RDMAR_SEQ;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-			goto ack_done;
-		}
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-	read_middle:
-		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto ack_len_err;
-		if (unlikely(pmtu >= qp->s_rdma_read_len))
-			goto ack_len_err;
-
-		/* We got a response so update the timeout. */
-		spin_lock(&dev->pending_lock);
-		if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
-			list_move_tail(&qp->timerwait,
-				       &dev->pending[dev->pending_index]);
-		spin_unlock(&dev->pending_lock);
-
-		if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
-			qp->s_retry = qp->s_retry_cnt;
-
-		/*
-		 * Update the RDMA receive state but do the copy w/o
-		 * holding the locks and blocking interrupts.
-		 */
-		qp->s_rdma_read_len -= pmtu;
-		update_last_psn(qp, psn);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
-		goto bail;
-
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-		if (!do_rc_ack(qp, aeth, psn, opcode, 0))
-			goto ack_done;
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/*
-		 * Check that the data size is >= 0 && <= pmtu.
-		 * Remember to account for the AETH header (4) and
-		 * ICRC (4).
-		 */
-		if (unlikely(tlen < (hdrsize + pad + 8)))
-			goto ack_len_err;
-		/*
-		 * If this is a response to a resent RDMA read, we
-		 * have to be careful to copy the data to the right
-		 * location.
-		 */
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
-						  wqe, psn, pmtu);
-		goto read_last;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/* ACKs READ req. */
-		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-			dev->n_rdma_seq++;
-			if (qp->r_flags & IPATH_R_RDMAR_SEQ)
-				goto ack_done;
-			qp->r_flags |= IPATH_R_RDMAR_SEQ;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-			goto ack_done;
-		}
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/*
-		 * Check that the data size is >= 1 && <= pmtu.
-		 * Remember to account for the AETH header (4) and
-		 * ICRC (4).
-		 */
-		if (unlikely(tlen <= (hdrsize + pad + 8)))
-			goto ack_len_err;
-	read_last:
-		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_rdma_read_len))
-			goto ack_len_err;
-		if (!header_in_data)
-			aeth = be32_to_cpu(ohdr->u.aeth);
-		else {
-			aeth = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		}
-		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-		(void) do_rc_ack(qp, aeth, psn,
-				 OP(RDMA_READ_RESPONSE_LAST), 0);
-		goto ack_done;
-	}
-
-ack_op_err:
-	status = IB_WC_LOC_QP_OP_ERR;
-	goto ack_err;
-
-ack_len_err:
-	status = IB_WC_LOC_LEN_ERR;
-ack_err:
-	ipath_send_complete(qp, wqe, status);
-	ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-ack_done:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-bail:
-	return;
-}
-
-/**
- * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
- * @dev: the device this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @diff: the difference between the PSN and the expected PSN
- * @header_in_data: true if part of the header data is in the data buffer
- *
- * This is called from ipath_rc_rcv() to process an unexpected
- * incoming RC packet for the given QP.
- * Called at interrupt level.
- * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent.
- */
-static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
-				     struct ipath_other_headers *ohdr,
-				     void *data,
-				     struct ipath_qp *qp,
-				     u32 opcode,
-				     u32 psn,
-				     int diff,
-				     int header_in_data)
-{
-	struct ipath_ack_entry *e;
-	u8 i, prev;
-	int old_req;
-	unsigned long flags;
-
-	if (diff > 0) {
-		/*
-		 * Packet sequence error.
-		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if we already sent one.
-		 */
-		if (!qp->r_nak_state) {
-			qp->r_nak_state = IB_NAK_PSN_ERROR;
-			/* Use the expected PSN. */
-			qp->r_ack_psn = qp->r_psn;
-			goto send_ack;
-		}
-		goto done;
-	}
-
-	/*
-	 * Handle a duplicate request.  Don't re-execute SEND, RDMA
-	 * write or atomic op.  Don't NAK errors, just silently drop
-	 * the duplicate request.  Note that r_sge, r_len, and
-	 * r_rcv_len may be in use so don't modify them.
-	 *
-	 * We are supposed to ACK the earliest duplicate PSN but we
-	 * can coalesce an outstanding duplicate ACK.  We have to
-	 * send the earliest so that RDMA reads can be restarted at
-	 * the requester's expected PSN.
-	 *
-	 * First, find where this duplicate PSN falls within the
-	 * ACKs previously sent.
-	 */
-	psn &= IPATH_PSN_MASK;
-	e = NULL;
-	old_req = 1;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-	/* Double check we can process this now that we hold the s_lock. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-		goto unlock_done;
-
-	for (i = qp->r_head_ack_queue; ; i = prev) {
-		if (i == qp->s_tail_ack_queue)
-			old_req = 0;
-		if (i)
-			prev = i - 1;
-		else
-			prev = IPATH_MAX_RDMA_ATOMIC;
-		if (prev == qp->r_head_ack_queue) {
-			e = NULL;
-			break;
-		}
-		e = &qp->s_ack_queue[prev];
-		if (!e->opcode) {
-			e = NULL;
-			break;
-		}
-		if (ipath_cmp24(psn, e->psn) >= 0) {
-			if (prev == qp->s_tail_ack_queue)
-				old_req = 0;
-			break;
-		}
-	}
-	switch (opcode) {
-	case OP(RDMA_READ_REQUEST): {
-		struct ib_reth *reth;
-		u32 offset;
-		u32 len;
-
-		/*
-		 * If we didn't find the RDMA read request in the ack queue,
-		 * or the send tasklet is already backed up to send an
-		 * earlier entry, we can ignore this request.
-		 */
-		if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
-			goto unlock_done;
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		/*
-		 * Address range must be a subset of the original
-		 * request and start on pmtu boundaries.
-		 * We reuse the old ack_queue slot since the requester
-		 * should not back up and request an earlier PSN for the
-		 * same request.
-		 */
-		offset = ((psn - e->psn) & IPATH_PSN_MASK) *
-			ib_mtu_enum_to_int(qp->path_mtu);
-		len = be32_to_cpu(reth->length);
-		if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
-			goto unlock_done;
-		if (len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			ok = ipath_rkey_ok(qp, &e->rdma_sge,
-					   len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok))
-				goto unlock_done;
-		} else {
-			e->rdma_sge.sg_list = NULL;
-			e->rdma_sge.num_sge = 0;
-			e->rdma_sge.sge.mr = NULL;
-			e->rdma_sge.sge.vaddr = NULL;
-			e->rdma_sge.sge.length = 0;
-			e->rdma_sge.sge.sge_length = 0;
-		}
-		e->psn = psn;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = prev;
-		break;
-	}
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD): {
-		/*
-		 * If we didn't find the atomic request in the ack queue
-		 * or the send tasklet is already backed up to send an
-		 * earlier entry, we can ignore this request.
-		 */
-		if (!e || e->opcode != (u8) opcode || old_req)
-			goto unlock_done;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = prev;
-		break;
-	}
-
-	default:
-		if (old_req)
-			goto unlock_done;
-		/*
-		 * Resend the most recent ACK if this request is
-		 * after all the previous RDMA reads and atomics.
-		 */
-		if (i == qp->r_head_ack_queue) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			qp->r_nak_state = 0;
-			qp->r_ack_psn = qp->r_psn - 1;
-			goto send_ack;
-		}
-		/*
-		 * Try to send a simple ACK to work around a Mellanox bug
-		 * which doesn't accept a RDMA read response or atomic
-		 * response as an ACK for earlier SENDs or RDMA writes.
-		 */
-		if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
-		    !(qp->s_flags & IPATH_S_ACK_PENDING) &&
-		    qp->s_ack_state == OP(ACKNOWLEDGE)) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			qp->r_nak_state = 0;
-			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
-			goto send_ack;
-		}
-		/*
-		 * Resend the RDMA read or atomic op which
-		 * ACKs this duplicate request.
-		 */
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		qp->s_tail_ack_queue = i;
-		break;
-	}
-	qp->r_nak_state = 0;
-	ipath_schedule_send(qp);
-
-unlock_done:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return 1;
-
-send_ack:
-	return 0;
-}
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
-{
-	unsigned long flags;
-	int lastwqe;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = ipath_error_qp(qp, err);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	if (lastwqe) {
-		struct ib_event ev;
-
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-}
-
-static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
-{
-	unsigned next;
-
-	next = n + 1;
-	if (next > IPATH_MAX_RDMA_ATOMIC)
-		next = 0;
-	if (n == qp->s_tail_ack_queue) {
-		qp->s_tail_ack_queue = next;
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-	}
-}
-
-/**
- * ipath_rc_rcv - process an incoming RC packet
- * @dev: the device this packet came in on
- * @hdr: the header of this packet
- * @has_grh: true if the header has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- *
- * This is called from ipath_qp_rcv() to process an incoming RC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	u32 opcode;
-	u32 hdrsize;
-	u32 psn;
-	u32 pad;
-	struct ib_wc wc;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	int diff;
-	struct ib_reth *reth;
-	int header_in_data;
-	unsigned long flags;
-
-	/* Validate the SLID. See Ch. 9.6.1.5 */
-	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-		goto done;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12;	/* LRH + BTH */
-		psn = be32_to_cpu(ohdr->bth[2]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12;	/* LRH + GRH + BTH */
-		/*
-		 * The header with GRH is 60 bytes and the core driver sets
-		 * the eager header buffer size to 56 bytes so the last 4
-		 * bytes of the BTH header (PSN) is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			psn = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		} else
-			psn = be32_to_cpu(ohdr->bth[2]);
-	}
-
-	/*
-	 * Process responses (ACKs) before anything else.  Note that the
-	 * packet sequence number will be for something in the send work
-	 * queue rather than the expected receive packet sequence number.
-	 * In other words, this QP is the requester.
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
-	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
-		ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
-				  hdrsize, pmtu, header_in_data);
-		goto done;
-	}
-
-	/* Compute 24 bits worth of difference. */
-	diff = ipath_cmp24(psn, qp->r_psn);
-	if (unlikely(diff)) {
-		if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
-				       psn, diff, header_in_data))
-			goto done;
-		goto send_ack;
-	}
-
-	/* Check for opcode sequence errors. */
-	switch (qp->r_state) {
-	case OP(SEND_FIRST):
-	case OP(SEND_MIDDLE):
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-			break;
-		goto nack_inv;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_MIDDLE):
-		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			break;
-		goto nack_inv;
-
-	default:
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			goto nack_inv;
-		/*
-		 * Note that it is up to the requester to not send a new
-		 * RDMA read or atomic operation before receiving an ACK
-		 * for the previous operation.
-		 */
-		break;
-	}
-
-	memset(&wc, 0, sizeof wc);
-
-	/* OK, process the packet. */
-	switch (opcode) {
-	case OP(SEND_FIRST):
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		qp->r_rcv_len = 0;
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-	case OP(RDMA_WRITE_MIDDLE):
-	send_middle:
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto nack_inv;
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len))
-			goto nack_inv;
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-		/* consume RWQE */
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		goto send_last_imm;
-
-	case OP(SEND_ONLY):
-	case OP(SEND_ONLY_WITH_IMMEDIATE):
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		qp->r_rcv_len = 0;
-		if (opcode == OP(SEND_ONLY))
-			goto send_last;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST_WITH_IMMEDIATE):
-	send_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST):
-	case OP(RDMA_WRITE_LAST):
-	send_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4)))
-			goto nack_inv;
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		wc.byte_len = tlen + qp->r_rcv_len;
-		if (unlikely(wc.byte_len > qp->r_len))
-			goto nack_inv;
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		qp->r_msn++;
-		if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-			break;
-		wc.wr_id = qp->r_wr_id;
-		wc.status = IB_WC_SUCCESS;
-		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		else
-			wc.opcode = IB_WC_RECV;
-		wc.qp = &qp->ibqp;
-		wc.src_qp = qp->remote_qpn;
-		wc.slid = qp->remote_ah_attr.dlid;
-		wc.sl = qp->remote_ah_attr.sl;
-		/* Signal completion event if the solicited bit is set. */
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			       (ohdr->bth[0] &
-				cpu_to_be32(1 << 23)) != 0);
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_ONLY):
-	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE)))
-			goto nack_inv;
-		/* consume RWQE */
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		hdrsize += sizeof(*reth);
-		qp->r_len = be32_to_cpu(reth->length);
-		qp->r_rcv_len = 0;
-		if (qp->r_len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &qp->r_sge,
-					   qp->r_len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_WRITE);
-			if (unlikely(!ok))
-				goto nack_acc;
-		} else {
-			qp->r_sge.sg_list = NULL;
-			qp->r_sge.sge.mr = NULL;
-			qp->r_sge.sge.vaddr = NULL;
-			qp->r_sge.sge.length = 0;
-			qp->r_sge.sge.sge_length = 0;
-		}
-		if (opcode == OP(RDMA_WRITE_FIRST))
-			goto send_middle;
-		else if (opcode == OP(RDMA_WRITE_ONLY))
-			goto send_last;
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		goto send_last_imm;
-
-	case OP(RDMA_READ_REQUEST): {
-		struct ipath_ack_entry *e;
-		u32 len;
-		u8 next;
-
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto nack_inv;
-		next = qp->r_head_ack_queue + 1;
-		if (next > IPATH_MAX_RDMA_ATOMIC)
-			next = 0;
-		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-			goto unlock;
-		if (unlikely(next == qp->s_tail_ack_queue)) {
-			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv_unlck;
-			ipath_update_ack_queue(qp, next);
-		}
-		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		len = be32_to_cpu(reth->length);
-		if (len) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-					   rkey, IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok))
-				goto nack_acc_unlck;
-			/*
-			 * Update the next expected PSN.  We add 1 later
-			 * below, so only add the remainder here.
-			 */
-			if (len > pmtu)
-				qp->r_psn += (len - 1) / pmtu;
-		} else {
-			e->rdma_sge.sg_list = NULL;
-			e->rdma_sge.num_sge = 0;
-			e->rdma_sge.sge.mr = NULL;
-			e->rdma_sge.sge.vaddr = NULL;
-			e->rdma_sge.sge.length = 0;
-			e->rdma_sge.sge.sge_length = 0;
-		}
-		e->opcode = opcode;
-		e->sent = 0;
-		e->psn = psn;
-		/*
-		 * We need to increment the MSN here instead of when we
-		 * finish sending the result since a duplicate request would
-		 * increment it more than once.
-		 */
-		qp->r_msn++;
-		qp->r_psn++;
-		qp->r_state = opcode;
-		qp->r_nak_state = 0;
-		qp->r_head_ack_queue = next;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-
-		goto unlock;
-	}
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD): {
-		struct ib_atomic_eth *ateth;
-		struct ipath_ack_entry *e;
-		u64 vaddr;
-		atomic64_t *maddr;
-		u64 sdata;
-		u32 rkey;
-		u8 next;
-
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_inv;
-		next = qp->r_head_ack_queue + 1;
-		if (next > IPATH_MAX_RDMA_ATOMIC)
-			next = 0;
-		spin_lock_irqsave(&qp->s_lock, flags);
-		/* Double check we can process this while holding the s_lock. */
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
-			goto unlock;
-		if (unlikely(next == qp->s_tail_ack_queue)) {
-			if (!qp->s_ack_queue[next].sent)
-				goto nack_inv_unlck;
-			ipath_update_ack_queue(qp, next);
-		}
-		if (!header_in_data)
-			ateth = &ohdr->u.atomic_eth;
-		else
-			ateth = (struct ib_atomic_eth *)data;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-			be32_to_cpu(ateth->vaddr[1]);
-		if (unlikely(vaddr & (sizeof(u64) - 1)))
-			goto nack_inv_unlck;
-		rkey = be32_to_cpu(ateth->rkey);
-		/* Check rkey & NAK */
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
-					    sizeof(u64), vaddr, rkey,
-					    IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc_unlck;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = be64_to_cpu(ateth->swap_data);
-		e = &qp->s_ack_queue[qp->r_head_ack_queue];
-		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      be64_to_cpu(ateth->compare_data),
-				      sdata);
-		e->opcode = opcode;
-		e->sent = 0;
-		e->psn = psn & IPATH_PSN_MASK;
-		qp->r_msn++;
-		qp->r_psn++;
-		qp->r_state = opcode;
-		qp->r_nak_state = 0;
-		qp->r_head_ack_queue = next;
-
-		/* Schedule the send tasklet. */
-		ipath_schedule_send(qp);
-
-		goto unlock;
-	}
-
-	default:
-		/* NAK unknown opcodes. */
-		goto nack_inv;
-	}
-	qp->r_psn++;
-	qp->r_state = opcode;
-	qp->r_ack_psn = psn;
-	qp->r_nak_state = 0;
-	/* Send an ACK if requested or required. */
-	if (psn & (1 << 31))
-		goto send_ack;
-	goto done;
-
-rnr_nak:
-	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
-	qp->r_ack_psn = qp->r_psn;
-	goto send_ack;
-
-nack_inv_unlck:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_inv:
-	ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
-	qp->r_ack_psn = qp->r_psn;
-	goto send_ack;
-
-nack_acc_unlck:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-nack_acc:
-	ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
-	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-	qp->r_ack_psn = qp->r_psn;
-send_ack:
-	send_rc_ack(qp);
-	goto done;
-
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-done:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644
index 8f44d0cf3833..000000000000
--- a/drivers/staging/rdma/ipath/ipath_registers.h
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_REGISTERS_H
-#define _IPATH_REGISTERS_H
-
-/*
- * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for InfiniPath chips.
- */
-
-/*
- * These are the InfiniPath register and buffer bit definitions,
- * that are visible to software, and needed only by the kernel
- * and diag code.  A few, that are visible to protocol and user
- * code are in ipath_common.h.  Some bits are specific
- * to a given chip implementation, and have been moved to the
- * chip-specific source file
- */
-
-/* kr_revision bits */
-#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
-#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
-#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
-#define INFINIPATH_R_ARCH_MASK 0xFF
-#define INFINIPATH_R_ARCH_SHIFT 16
-#define INFINIPATH_R_SOFTWARE_MASK 0xFF
-#define INFINIPATH_R_SOFTWARE_SHIFT 24
-#define INFINIPATH_R_BOARDID_MASK 0xFF
-#define INFINIPATH_R_BOARDID_SHIFT 32
-
-/* kr_control bits */
-#define INFINIPATH_C_FREEZEMODE 0x00000002
-#define INFINIPATH_C_LINKENABLE 0x00000004
-
-/* kr_sendctrl bits */
-#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
-#define INFINIPATH_S_UPDTHRESH_SHIFT 24
-#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
-
-#define IPATH_S_ABORT		0
-#define IPATH_S_PIOINTBUFAVAIL	1
-#define IPATH_S_PIOBUFAVAILUPD	2
-#define IPATH_S_PIOENABLE	3
-#define IPATH_S_SDMAINTENABLE	9
-#define IPATH_S_SDMASINGLEDESCRIPTOR	10
-#define IPATH_S_SDMAENABLE	11
-#define IPATH_S_SDMAHALT	12
-#define IPATH_S_DISARM		31
-
-#define INFINIPATH_S_ABORT		(1U << IPATH_S_ABORT)
-#define INFINIPATH_S_PIOINTBUFAVAIL	(1U << IPATH_S_PIOINTBUFAVAIL)
-#define INFINIPATH_S_PIOBUFAVAILUPD	(1U << IPATH_S_PIOBUFAVAILUPD)
-#define INFINIPATH_S_PIOENABLE		(1U << IPATH_S_PIOENABLE)
-#define INFINIPATH_S_SDMAINTENABLE	(1U << IPATH_S_SDMAINTENABLE)
-#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
-					(1U << IPATH_S_SDMASINGLEDESCRIPTOR)
-#define INFINIPATH_S_SDMAENABLE		(1U << IPATH_S_SDMAENABLE)
-#define INFINIPATH_S_SDMAHALT		(1U << IPATH_S_SDMAHALT)
-#define INFINIPATH_S_DISARM		(1U << IPATH_S_DISARM)
-
-/* kr_rcvctrl bits that are the same on multiple chips */
-#define INFINIPATH_R_PORTENABLE_SHIFT 0
-#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_SDMAINT		0x8000000000000000ULL
-#define INFINIPATH_I_SDMADISABLED	0x4000000000000000ULL
-#define INFINIPATH_I_ERROR		0x0000000080000000ULL
-#define INFINIPATH_I_SPIOSENT		0x0000000040000000ULL
-#define INFINIPATH_I_SPIOBUFAVAIL	0x0000000020000000ULL
-#define INFINIPATH_I_GPIO		0x0000000010000000ULL
-#define INFINIPATH_I_JINT		0x0000000004000000ULL
-
-/* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR			0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC			0x0000000000000002ULL
-#define INFINIPATH_E_RICRC			0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN			0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN			0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN		0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN		0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR			0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL			0x0000000000000100ULL
-#define INFINIPATH_E_REBP			0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW			0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION		0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL		0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL		0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID			0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN			0x0000000000008000ULL
-#define INFINIPATH_E_RHDR			0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK		0x0000000000020000ULL
-#define INFINIPATH_E_SENDSPECIALTRIGGER		0x0000000008000000ULL
-#define INFINIPATH_E_SDMADISABLED		0x0000000010000000ULL
-#define INFINIPATH_E_SMINPKTLEN			0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN			0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN			0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN			0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT		0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT		0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH		0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM		0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL			0x0000002000000000ULL
-#define INFINIPATH_E_SENDBUFMISUSE		0x0000004000000000ULL
-#define INFINIPATH_E_SDMAGENMISMATCH		0x0000008000000000ULL
-#define INFINIPATH_E_SDMAOUTOFBOUND		0x0000010000000000ULL
-#define INFINIPATH_E_SDMATAILOUTOFBOUND		0x0000020000000000ULL
-#define INFINIPATH_E_SDMABASE			0x0000040000000000ULL
-#define INFINIPATH_E_SDMA1STDESC		0x0000080000000000ULL
-#define INFINIPATH_E_SDMARPYTAG			0x0000100000000000ULL
-#define INFINIPATH_E_SDMADWEN			0x0000200000000000ULL
-#define INFINIPATH_E_SDMAMISSINGDW		0x0000400000000000ULL
-#define INFINIPATH_E_SDMAUNEXPDATA		0x0000800000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED		0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR		0x0002000000000000ULL
-#define INFINIPATH_E_RESET			0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE			0x0008000000000000ULL
-#define INFINIPATH_E_SDMADESCADDRMISALIGN	0x0010000000000000ULL
-#define INFINIPATH_E_INVALIDEEPCMD		0x0020000000000000ULL
-
-/*
- * this is used to print "common" packet errors only when the
- * __IPATH_ERRPKTDBG bit is set in ipath_debug.
- */
-#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
-		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
-		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
-		| INFINIPATH_E_REBP )
-
-/* Convenience for decoding Send DMA errors */
-#define INFINIPATH_E_SDMAERRS ( \
-	INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
-	INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
-	INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
-	INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
-	INFINIPATH_E_SDMAUNEXPDATA | \
-	INFINIPATH_E_SDMADESCADDRMISALIGN | \
-	INFINIPATH_E_SDMADISABLED | \
-	INFINIPATH_E_SENDBUFMISUSE)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
- * 		bit 4: flag buffer, 5: datainfo, 6: header info */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF	0x1ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC	0x2ULL
-#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
-/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
-#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
-/* waldo specific -- find the rest in ipath_6110.c */
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
-#define INFINIPATH_HWE_MEMBISTFAILED	0x0040000000000000ULL
-
-/* kr_hwdiagctrl bits */
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
-#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
-#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_DC_COUNTERDISABLE            0x1000000000000000ULL
-#define INFINIPATH_DC_COUNTERWREN               0x2000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
-#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
-
-/* kr_ibcctrl bits */
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
-#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
-#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
-/* wait for TS1, then go on */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
-#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
-#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1		/* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2		/* move to 0x21 */
-#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3	/* move to 0x31 */
-#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
-#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
-#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
-#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
-#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
-#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
-#define INFINIPATH_IBCC_LOOPBACK             0x8000000000000000ULL
-#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
-
-/* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
-#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-
-#define INFINIPATH_IBCS_TXREADY       0x40000000
-#define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by
-   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
-#define INFINIPATH_IBCS_LT_STATE_DISABLED	0x00
-#define INFINIPATH_IBCS_LT_STATE_LINKUP		0x01
-#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE	0x02
-#define INFINIPATH_IBCS_LT_STATE_POLLQUIET	0x03
-#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY	0x04
-#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET	0x05
-#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE	0x08
-#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG	0x09
-#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT	0x0a
-#define INFINIPATH_IBCS_LT_STATE_CFGIDLE	0x0b
-#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN	0x0c
-#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT	0x0e
-#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE	0x0f
-/* link state machine states (shift by ibcs_ls_shift) */
-#define INFINIPATH_IBCS_L_STATE_DOWN		0x0
-#define INFINIPATH_IBCS_L_STATE_INIT		0x1
-#define INFINIPATH_IBCS_L_STATE_ARM		0x2
-#define INFINIPATH_IBCS_L_STATE_ACTIVE		0x3
-#define INFINIPATH_IBCS_L_STATE_ACT_DEFER	0x4
-
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
-#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
-#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
-
-/* kr_extctrl bits */
-#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
-#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
-#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
-#define INFINIPATH_EXTC_SERDESENABLE         0x80000000ULL
-#define INFINIPATH_EXTC_SERDESCONNECT        0x40000000ULL
-#define INFINIPATH_EXTC_SERDESENTRUNKING     0x20000000ULL
-#define INFINIPATH_EXTC_SERDESDISRXFIFO      0x10000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK1       0x08000000ULL
-#define INFINIPATH_EXTC_SERDESENPLPBK2       0x04000000ULL
-#define INFINIPATH_EXTC_SERDESENENCDEC       0x02000000ULL
-#define INFINIPATH_EXTC_LED1SECPORT_ON       0x00000020ULL
-#define INFINIPATH_EXTC_LED2SECPORT_ON       0x00000010ULL
-#define INFINIPATH_EXTC_LED1PRIPORT_ON       0x00000008ULL
-#define INFINIPATH_EXTC_LED2PRIPORT_ON       0x00000004ULL
-#define INFINIPATH_EXTC_LEDGBLOK_ON          0x00000002ULL
-#define INFINIPATH_EXTC_LEDGBLERR_OFF        0x00000001ULL
-
-/* kr_partitionkey bits */
-#define INFINIPATH_PKEY_SIZE 16
-#define INFINIPATH_PKEY_MASK 0xFFFF
-#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
-
-/* kr_serdesconfig0 bits */
-#define INFINIPATH_SERDC0_RESET_MASK  0xfULL	/* overal reset bits */
-#define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL	/* pll reset */
-/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
-/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
-/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
-#define INFINIPATH_SERDC0_L1PWR_DN	 0xF0ULL
-
-/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
-#define INFINIPATH_XGXS_RX_POL_SHIFT 19
-#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-
-
-/*
- * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
- * PIO send buffers.  This is well beyond anything currently
- * defined in the InfiniBand spec.
- */
-#define IPATH_PIO_MAXIBHDR 128
-
-typedef u64 ipath_err_t;
-
-/* The following change with the type of device, so
- * need to be part of the ipath_devdata struct, or
- * we could have problems plugging in devices of
- * different types (e.g. one HT, one PCIE)
- * in one system, to be managed by one driver.
- * On the other hand, this file is may also be included
- * by other code, so leave the declarations here
- * temporarily. Minor footprint issue if common-model
- * linker used, none if C89+ linker used.
- */
-
-/* mask of defined bits for various registers */
-extern u64 infinipath_i_bitsextant;
-extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-
-/* masks that are different in various chips, or only exist in some chips */
-extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
-/*
- * These are the infinipath general register numbers (not offsets).
- * The kernel registers are used directly, those beyond the kernel
- * registers are calculated from one of the base registers.  The use of
- * an integer type doesn't allow type-checking as thorough as, say,
- * an enum but allows for better hiding of chip differences.
- */
-typedef const u16 ipath_kreg,	/* infinipath general registers */
- ipath_creg,			/* infinipath counter registers */
- ipath_sreg;			/* kernel-only, infinipath send registers */
-
-/*
- * These are the chip registers common to all infinipath chips, and
- * used both by the kernel and the diagnostics or other user code.
- * They are all implemented such that 64 bit accesses work.
- * Some implement no more than 32 bits.  Because 64 bit reads
- * require 2 HT cmds on opteron, we access those with 32 bit
- * reads for efficiency (they are written as 64 bits, since
- * the extra 32 bits are nearly free on writes, and it slightly reduces
- * complexity).  The rest are all accessed as 64 bits.
- */
-struct ipath_kregs {
-	/* These are the 32 bit group */
-	ipath_kreg kr_control;
-	ipath_kreg kr_counterregbase;
-	ipath_kreg kr_intmask;
-	ipath_kreg kr_intstatus;
-	ipath_kreg kr_pagealign;
-	ipath_kreg kr_portcnt;
-	ipath_kreg kr_rcvtidbase;
-	ipath_kreg kr_rcvtidcnt;
-	ipath_kreg kr_rcvegrbase;
-	ipath_kreg kr_rcvegrcnt;
-	ipath_kreg kr_scratch;
-	ipath_kreg kr_sendctrl;
-	ipath_kreg kr_sendpiobufbase;
-	ipath_kreg kr_sendpiobufcnt;
-	ipath_kreg kr_sendpiosize;
-	ipath_kreg kr_sendregbase;
-	ipath_kreg kr_userregbase;
-	/* These are the 64 bit group */
-	ipath_kreg kr_debugport;
-	ipath_kreg kr_debugportselect;
-	ipath_kreg kr_errorclear;
-	ipath_kreg kr_errormask;
-	ipath_kreg kr_errorstatus;
-	ipath_kreg kr_extctrl;
-	ipath_kreg kr_extstatus;
-	ipath_kreg kr_gpio_clear;
-	ipath_kreg kr_gpio_mask;
-	ipath_kreg kr_gpio_out;
-	ipath_kreg kr_gpio_status;
-	ipath_kreg kr_hwdiagctrl;
-	ipath_kreg kr_hwerrclear;
-	ipath_kreg kr_hwerrmask;
-	ipath_kreg kr_hwerrstatus;
-	ipath_kreg kr_ibcctrl;
-	ipath_kreg kr_ibcstatus;
-	ipath_kreg kr_intblocked;
-	ipath_kreg kr_intclear;
-	ipath_kreg kr_interruptconfig;
-	ipath_kreg kr_mdio;
-	ipath_kreg kr_partitionkey;
-	ipath_kreg kr_rcvbthqp;
-	ipath_kreg kr_rcvbufbase;
-	ipath_kreg kr_rcvbufsize;
-	ipath_kreg kr_rcvctrl;
-	ipath_kreg kr_rcvhdrcnt;
-	ipath_kreg kr_rcvhdrentsize;
-	ipath_kreg kr_rcvhdrsize;
-	ipath_kreg kr_rcvintmembase;
-	ipath_kreg kr_rcvintmemsize;
-	ipath_kreg kr_revision;
-	ipath_kreg kr_sendbuffererror;
-	ipath_kreg kr_sendpioavailaddr;
-	ipath_kreg kr_serdesconfig0;
-	ipath_kreg kr_serdesconfig1;
-	ipath_kreg kr_serdesstatus;
-	ipath_kreg kr_txintmembase;
-	ipath_kreg kr_txintmemsize;
-	ipath_kreg kr_xgxsconfig;
-	ipath_kreg kr_ibpllcfg;
-	/* use these two (and the following N ports) only with
-	 * ipath_k*_kreg64_port(); not *kreg64() */
-	ipath_kreg kr_rcvhdraddr;
-	ipath_kreg kr_rcvhdrtailaddr;
-
-	/* remaining registers are not present on all types of infinipath
-	   chips  */
-	ipath_kreg kr_rcvpktledcnt;
-	ipath_kreg kr_pcierbuftestreg0;
-	ipath_kreg kr_pcierbuftestreg1;
-	ipath_kreg kr_pcieq0serdesconfig0;
-	ipath_kreg kr_pcieq0serdesconfig1;
-	ipath_kreg kr_pcieq0serdesstatus;
-	ipath_kreg kr_pcieq1serdesconfig0;
-	ipath_kreg kr_pcieq1serdesconfig1;
-	ipath_kreg kr_pcieq1serdesstatus;
-	ipath_kreg kr_hrtbt_guid;
-	ipath_kreg kr_ibcddrctrl;
-	ipath_kreg kr_ibcddrstatus;
-	ipath_kreg kr_jintreload;
-
-	/* send dma related regs */
-	ipath_kreg kr_senddmabase;
-	ipath_kreg kr_senddmalengen;
-	ipath_kreg kr_senddmatail;
-	ipath_kreg kr_senddmahead;
-	ipath_kreg kr_senddmaheadaddr;
-	ipath_kreg kr_senddmabufmask0;
-	ipath_kreg kr_senddmabufmask1;
-	ipath_kreg kr_senddmabufmask2;
-	ipath_kreg kr_senddmastatus;
-
-	/* SerDes related regs (IBA7220-only) */
-	ipath_kreg kr_ibserdesctrl;
-	ipath_kreg kr_ib_epbacc;
-	ipath_kreg kr_ib_epbtrans;
-	ipath_kreg kr_pcie_epbacc;
-	ipath_kreg kr_pcie_epbtrans;
-	ipath_kreg kr_ib_ddsrxeq;
-};
-
-struct ipath_cregs {
-	ipath_creg cr_badformatcnt;
-	ipath_creg cr_erricrccnt;
-	ipath_creg cr_errlinkcnt;
-	ipath_creg cr_errlpcrccnt;
-	ipath_creg cr_errpkey;
-	ipath_creg cr_errrcvflowctrlcnt;
-	ipath_creg cr_err_rlencnt;
-	ipath_creg cr_errslencnt;
-	ipath_creg cr_errtidfull;
-	ipath_creg cr_errtidvalid;
-	ipath_creg cr_errvcrccnt;
-	ipath_creg cr_ibstatuschange;
-	ipath_creg cr_intcnt;
-	ipath_creg cr_invalidrlencnt;
-	ipath_creg cr_invalidslencnt;
-	ipath_creg cr_lbflowstallcnt;
-	ipath_creg cr_iblinkdowncnt;
-	ipath_creg cr_iblinkerrrecovcnt;
-	ipath_creg cr_ibsymbolerrcnt;
-	ipath_creg cr_pktrcvcnt;
-	ipath_creg cr_pktrcvflowctrlcnt;
-	ipath_creg cr_pktsendcnt;
-	ipath_creg cr_pktsendflowcnt;
-	ipath_creg cr_portovflcnt;
-	ipath_creg cr_rcvebpcnt;
-	ipath_creg cr_rcvovflcnt;
-	ipath_creg cr_rxdroppktcnt;
-	ipath_creg cr_senddropped;
-	ipath_creg cr_sendstallcnt;
-	ipath_creg cr_sendunderruncnt;
-	ipath_creg cr_unsupvlcnt;
-	ipath_creg cr_wordrcvcnt;
-	ipath_creg cr_wordsendcnt;
-	ipath_creg cr_vl15droppedpktcnt;
-	ipath_creg cr_rxotherlocalphyerrcnt;
-	ipath_creg cr_excessbufferovflcnt;
-	ipath_creg cr_locallinkintegrityerrcnt;
-	ipath_creg cr_rxvlerrcnt;
-	ipath_creg cr_rxdlidfltrcnt;
-	ipath_creg cr_psstat;
-	ipath_creg cr_psstart;
-	ipath_creg cr_psinterval;
-	ipath_creg cr_psrcvdatacount;
-	ipath_creg cr_psrcvpktscount;
-	ipath_creg cr_psxmitdatacount;
-	ipath_creg cr_psxmitpktscount;
-	ipath_creg cr_psxmitwaitcount;
-};
-
-#endif				/* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644
index e541a01f1f61..000000000000
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/*
- * Convert the AETH RNR timeout code into the number of milliseconds.
- */
-const u32 ib_ipath_rnr_table[32] = {
-	656,			/* 0 */
-	1,			/* 1 */
-	1,			/* 2 */
-	1,			/* 3 */
-	1,			/* 4 */
-	1,			/* 5 */
-	1,			/* 6 */
-	1,			/* 7 */
-	1,			/* 8 */
-	1,			/* 9 */
-	1,			/* A */
-	1,			/* B */
-	1,			/* C */
-	1,			/* D */
-	2,			/* E */
-	2,			/* F */
-	3,			/* 10 */
-	4,			/* 11 */
-	6,			/* 12 */
-	8,			/* 13 */
-	11,			/* 14 */
-	16,			/* 15 */
-	21,			/* 16 */
-	31,			/* 17 */
-	41,			/* 18 */
-	62,			/* 19 */
-	82,			/* 1A */
-	123,			/* 1B */
-	164,			/* 1C */
-	246,			/* 1D */
-	328,			/* 1E */
-	492			/* 1F */
-};
-
-/**
- * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
- * @qp: the QP
- *
- * Called with the QP s_lock held and interrupts disabled.
- * XXX Use a simple list for now.  We might need a priority
- * queue if we have lots of QPs waiting for RNR timeouts
- * but that should be rare.
- */
-void ipath_insert_rnr_queue(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-
-	/* We already did a spin_lock_irqsave(), so just use spin_lock */
-	spin_lock(&dev->pending_lock);
-	if (list_empty(&dev->rnrwait))
-		list_add(&qp->timerwait, &dev->rnrwait);
-	else {
-		struct list_head *l = &dev->rnrwait;
-		struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
-						  timerwait);
-
-		while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
-			qp->s_rnr_timeout -= nqp->s_rnr_timeout;
-			l = l->next;
-			if (l->next == &dev->rnrwait) {
-				nqp = NULL;
-				break;
-			}
-			nqp = list_entry(l->next, struct ipath_qp,
-					 timerwait);
-		}
-		if (nqp)
-			nqp->s_rnr_timeout -= qp->s_rnr_timeout;
-		list_add(&qp->timerwait, l);
-	}
-	spin_unlock(&dev->pending_lock);
-}
-
-/**
- * ipath_init_sge - Validate a RWQE and fill in the SGE state
- * @qp: the QP
- *
- * Return 1 if OK.
- */
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-		   u32 *lengthp, struct ipath_sge_state *ss)
-{
-	int i, j, ret;
-	struct ib_wc wc;
-
-	*lengthp = 0;
-	for (i = j = 0; i < wqe->num_sge; i++) {
-		if (wqe->sg_list[i].length == 0)
-			continue;
-		/* Check LKEY */
-		if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-			goto bad_lkey;
-		*lengthp += wqe->sg_list[i].length;
-		j++;
-	}
-	ss->num_sge = j;
-	ret = 1;
-	goto bail;
-
-bad_lkey:
-	memset(&wc, 0, sizeof(wc));
-	wc.wr_id = wqe->wr_id;
-	wc.status = IB_WC_LOC_PROT_ERR;
-	wc.opcode = IB_WC_RECV;
-	wc.qp = &qp->ibqp;
-	/* Signal solicited completion event. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return 0 if no RWQE is available, otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
-{
-	unsigned long flags;
-	struct ipath_rq *rq;
-	struct ipath_rwq *wq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
-	void (*handler)(struct ib_event *, void *);
-	u32 tail;
-	int ret;
-
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		handler = srq->ibsrq.event_handler;
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		handler = NULL;
-		rq = &qp->r_rq;
-	}
-
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		ret = 0;
-		goto unlock;
-	}
-
-	wq = rq->wq;
-	tail = wq->tail;
-	/* Validate tail before using it since it is user writable. */
-	if (tail >= rq->size)
-		tail = 0;
-	do {
-		if (unlikely(tail == wq->head)) {
-			ret = 0;
-			goto unlock;
-		}
-		/* Make sure entry is read after head index is read. */
-		smp_rmb();
-		wqe = get_rwqe_ptr(rq, tail);
-		if (++tail >= rq->size)
-			tail = 0;
-		if (wr_id_only)
-			break;
-		qp->r_sge.sg_list = qp->r_sg_list;
-	} while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
-	qp->r_wr_id = wqe->wr_id;
-	wq->tail = tail;
-
-	ret = 1;
-	set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
-	if (handler) {
-		u32 n;
-
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		n = wq->head;
-		if (n >= rq->size)
-			n = 0;
-		if (n < tail)
-			n += rq->size - tail;
-		else
-			n -= tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			handler(&ev, srq->ibsrq.srq_context);
-			goto bail;
-		}
-	}
-unlock:
-	spin_unlock_irqrestore(&rq->lock, flags);
-bail:
-	return ret;
-}
-
-/**
- * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the sending QP
- *
- * This is called from ipath_do_send() to
- * forward a WQE addressed to the same HCA.
- * Note that although we are single threaded due to the tasklet, we still
- * have to protect against post_send().  We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ipath_ruc_loopback(struct ipath_qp *sqp)
-{
-	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-	struct ipath_qp *qp;
-	struct ipath_swqe *wqe;
-	struct ipath_sge *sge;
-	unsigned long flags;
-	struct ib_wc wc;
-	u64 sdata;
-	atomic64_t *maddr;
-	enum ib_wc_status send_status;
-
-	/*
-	 * Note that we check the responder QP state after
-	 * checking the requester's state.
-	 */
-	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
-
-	spin_lock_irqsave(&sqp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-	    !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-		goto unlock;
-
-	sqp->s_flags |= IPATH_S_BUSY;
-
-again:
-	if (sqp->s_last == sqp->s_head)
-		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
-
-	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
-			goto clr_busy;
-		/* We are in the error state, flush the work request. */
-		send_status = IB_WC_WR_FLUSH_ERR;
-		goto flush_send;
-	}
-
-	/*
-	 * We can rely on the entry not changing without the s_lock
-	 * being held until we update s_last.
-	 * We increment s_cur to indicate s_last is in progress.
-	 */
-	if (sqp->s_last == sqp->s_cur) {
-		if (++sqp->s_cur >= sqp->s_size)
-			sqp->s_cur = 0;
-	}
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-
-	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		/*
-		 * For RC, the requester would timeout and retry so
-		 * shortcut the timeouts and just signal too many retries.
-		 */
-		if (sqp->ibqp.qp_type == IB_QPT_RC)
-			send_status = IB_WC_RETRY_EXC_ERR;
-		else
-			send_status = IB_WC_SUCCESS;
-		goto serr;
-	}
-
-	memset(&wc, 0, sizeof wc);
-	send_status = IB_WC_SUCCESS;
-
-	sqp->s_sge.sge = wqe->sg_list[0];
-	sqp->s_sge.sg_list = wqe->sg_list + 1;
-	sqp->s_sge.num_sge = wqe->wr.num_sge;
-	sqp->s_len = wqe->length;
-	switch (wqe->wr.opcode) {
-	case IB_WR_SEND_WITH_IMM:
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		/* FALLTHROUGH */
-	case IB_WR_SEND:
-		if (!ipath_get_rwqe(qp, 0))
-			goto rnr_nak;
-		break;
-
-	case IB_WR_RDMA_WRITE_WITH_IMM:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		if (!ipath_get_rwqe(qp, 1))
-			goto rnr_nak;
-		/* FALLTHROUGH */
-	case IB_WR_RDMA_WRITE:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
-			goto inv_err;
-		if (wqe->length == 0)
-			break;
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-					    wqe->rdma_wr.remote_addr,
-					    wqe->rdma_wr.rkey,
-					    IB_ACCESS_REMOTE_WRITE)))
-			goto acc_err;
-		break;
-
-	case IB_WR_RDMA_READ:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-			goto inv_err;
-		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-					    wqe->rdma_wr.remote_addr,
-					    wqe->rdma_wr.rkey,
-					    IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->wr.num_sge;
-		break;
-
-	case IB_WR_ATOMIC_CMP_AND_SWP:
-	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
-			goto inv_err;
-		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-					    wqe->atomic_wr.remote_addr,
-					    wqe->atomic_wr.rkey,
-					    IB_ACCESS_REMOTE_ATOMIC)))
-			goto acc_err;
-		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
-			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      sdata, wqe->atomic_wr.swap);
-		goto send_comp;
-
-	default:
-		send_status = IB_WC_LOC_QP_OP_ERR;
-		goto serr;
-	}
-
-	sge = &sqp->s_sge.sge;
-	while (sqp->s_len) {
-		u32 len = sqp->s_len;
-
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--sqp->s_sge.num_sge)
-				*sge = *sqp->s_sge.sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		sqp->s_len -= len;
-	}
-
-	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-		goto send_comp;
-
-	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-	else
-		wc.opcode = IB_WC_RECV;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.byte_len = wqe->length;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = qp->remote_qpn;
-	wc.slid = qp->remote_ah_attr.dlid;
-	wc.sl = qp->remote_ah_attr.sl;
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-flush_send:
-	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-	ipath_send_complete(sqp, wqe, send_status);
-	goto again;
-
-rnr_nak:
-	/* Handle RNR NAK */
-	if (qp->ibqp.qp_type == IB_QPT_UC)
-		goto send_comp;
-	/*
-	 * Note: we don't need the s_lock held since the BUSY flag
-	 * makes this single threaded.
-	 */
-	if (sqp->s_rnr_retry == 0) {
-		send_status = IB_WC_RNR_RETRY_EXC_ERR;
-		goto serr;
-	}
-	if (sqp->s_rnr_retry_cnt < 7)
-		sqp->s_rnr_retry--;
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
-		goto clr_busy;
-	sqp->s_flags |= IPATH_S_WAITING;
-	dev->n_rnr_naks++;
-	sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
-	ipath_insert_rnr_queue(sqp);
-	goto clr_busy;
-
-inv_err:
-	send_status = IB_WC_REM_INV_REQ_ERR;
-	wc.status = IB_WC_LOC_QP_OP_ERR;
-	goto err;
-
-acc_err:
-	send_status = IB_WC_REM_ACCESS_ERR;
-	wc.status = IB_WC_LOC_PROT_ERR;
-err:
-	/* responder goes to error state */
-	ipath_rc_error(qp, wc.status);
-
-serr:
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	ipath_send_complete(sqp, wqe, send_status);
-	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
-		sqp->s_flags &= ~IPATH_S_BUSY;
-		spin_unlock_irqrestore(&sqp->s_lock, flags);
-		if (lastwqe) {
-			struct ib_event ev;
-
-			ev.device = sqp->ibqp.device;
-			ev.element.qp = &sqp->ibqp;
-			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
-		}
-		goto done;
-	}
-clr_busy:
-	sqp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
-	if (qp && atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
-{
-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
-	    qp->ibqp.qp_type == IB_QPT_SMI) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-}
-
-/**
- * ipath_no_bufs_available - tell the layer driver we need buffers
- * @qp: the QP that caused the problem
- * @dev: the device we ran out of buffers on
- *
- * Called when we run out of PIO buffers.
- * If we are now in the error state, return zero to flush the
- * send work request.
- */
-static int ipath_no_bufs_available(struct ipath_qp *qp,
-				    struct ipath_ibdev *dev)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	/*
-	 * Note that as soon as want_buffer() is called and
-	 * possibly before it returns, ipath_ib_piobufavail()
-	 * could be called. Therefore, put QP on the piowait list before
-	 * enabling the PIO avail interrupt.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-		dev->n_piowait++;
-		qp->s_flags |= IPATH_S_WAITING;
-		qp->s_flags &= ~IPATH_S_BUSY;
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->piowait))
-			list_add_tail(&qp->piowait, &dev->piowait);
-		spin_unlock(&dev->pending_lock);
-	} else
-		ret = 0;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	if (ret)
-		want_buffer(dev->dd, qp);
-	return ret;
-}
-
-/**
- * ipath_make_grh - construct a GRH header
- * @dev: a pointer to the ipath device
- * @hdr: a pointer to the GRH header being constructed
- * @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
- * @nwords: the number of 32 bit words of data being sent
- *
- * Return the size of the header in 32 bit words.
- */
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-		   struct ib_global_route *grh, u32 hwords, u32 nwords)
-{
-	hdr->version_tclass_flow =
-		cpu_to_be32((6 << 28) |
-			    (grh->traffic_class << 20) |
-			    grh->flow_label);
-	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
-	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
-	hdr->next_hdr = 0x1B;
-	hdr->hop_limit = grh->hop_limit;
-	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
-	hdr->dgid = grh->dgid;
-
-	/* GRH header size in 32-bit words. */
-	return sizeof(struct ib_grh) / sizeof(u32);
-}
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			   struct ipath_other_headers *ohdr,
-			   u32 bth0, u32 bth2)
-{
-	u16 lrh0;
-	u32 nwords;
-	u32 extra_bytes;
-
-	/* Construct the header. */
-	extra_bytes = -qp->s_cur_size & 3;
-	nwords = (qp->s_cur_size + extra_bytes) >> 2;
-	lrh0 = IPATH_LRH_BTH;
-	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-						 &qp->remote_ah_attr.grh,
-						 qp->s_hdrwords, nwords);
-		lrh0 = IPATH_LRH_GRH;
-	}
-	lrh0 |= qp->remote_ah_attr.sl << 4;
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
-				       qp->remote_ah_attr.src_path_bits);
-	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-	bth0 |= extra_bytes << 20;
-	ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
-	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(bth2);
-}
-
-/**
- * ipath_do_send - perform a send on a QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, two threads could send packets out of order.
- */
-void ipath_do_send(unsigned long data)
-{
-	struct ipath_qp *qp = (struct ipath_qp *)data;
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	int (*make_req)(struct ipath_qp *qp);
-	unsigned long flags;
-
-	if ((qp->ibqp.qp_type == IB_QPT_RC ||
-	     qp->ibqp.qp_type == IB_QPT_UC) &&
-	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
-		ipath_ruc_loopback(qp);
-		goto bail;
-	}
-
-	if (qp->ibqp.qp_type == IB_QPT_RC)
-	       make_req = ipath_make_rc_req;
-	else if (qp->ibqp.qp_type == IB_QPT_UC)
-	       make_req = ipath_make_uc_req;
-	else
-	       make_req = ipath_make_ud_req;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Return if we are already busy processing a work request. */
-	if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
-	    !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		goto bail;
-	}
-
-	qp->s_flags |= IPATH_S_BUSY;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-again:
-	/* Check for a constructed packet to be sent. */
-	if (qp->s_hdrwords != 0) {
-		/*
-		 * If no PIO bufs are available, return.  An interrupt will
-		 * call ipath_ib_piobufavail() when one is available.
-		 */
-		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
-				     qp->s_cur_sge, qp->s_cur_size)) {
-			if (ipath_no_bufs_available(qp, dev))
-				goto bail;
-		}
-		dev->n_unicast_xmit++;
-		/* Record that we sent the packet and s_hdr is empty. */
-		qp->s_hdrwords = 0;
-	}
-
-	if (make_req(qp))
-		goto again;
-
-bail:;
-}
-
-/*
- * This should be called with s_lock held.
- */
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-			 enum ib_wc_status status)
-{
-	u32 old_last, last;
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
-		return;
-
-	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-	    status != IB_WC_SUCCESS) {
-		struct ib_wc wc;
-
-		memset(&wc, 0, sizeof wc);
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = status;
-		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		wc.qp = &qp->ibqp;
-		if (status == IB_WC_SUCCESS)
-			wc.byte_len = wqe->length;
-		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-			       status != IB_WC_SUCCESS);
-	}
-
-	old_last = last = qp->s_last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
-	if (qp->s_cur == old_last)
-		qp->s_cur = last;
-	if (qp->s_tail == old_last)
-		qp->s_tail = last;
-	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
-		qp->s_draining = 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644
index 1ffc06abf9da..000000000000
--- a/drivers/staging/rdma/ipath/ipath_sdma.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
-
-static void vl15_watchdog_enq(struct ipath_devdata *dd)
-{
-	/* ipath_sdma_lock must already be held */
-	if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
-		unsigned long interval = (HZ + 19) / 20;
-		dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
-		add_timer(&dd->ipath_sdma_vl15_timer);
-	}
-}
-
-static void vl15_watchdog_deq(struct ipath_devdata *dd)
-{
-	/* ipath_sdma_lock must already be held */
-	if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
-		unsigned long interval = (HZ + 19) / 20;
-		mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
-	} else {
-		del_timer(&dd->ipath_sdma_vl15_timer);
-	}
-}
-
-static void vl15_watchdog_timeout(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
-		ipath_dbg("vl15 watchdog timeout - clearing\n");
-		ipath_cancel_sends(dd, 1);
-		ipath_hol_down(dd);
-	} else {
-		ipath_dbg("vl15 watchdog timeout - "
-			  "condition already cleared\n");
-	}
-}
-
-static void unmap_desc(struct ipath_devdata *dd, unsigned head)
-{
-	__le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
-	u64 desc[2];
-	dma_addr_t addr;
-	size_t len;
-
-	desc[0] = le64_to_cpu(descqp[0]);
-	desc[1] = le64_to_cpu(descqp[1]);
-
-	addr = (desc[1] << 32) | (desc[0] >> 32);
-	len = (desc[0] >> 14) & (0x7ffULL << 2);
-	dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
-}
-
-/*
- * ipath_sdma_lock should be locked before calling this.
- */
-int ipath_sdma_make_progress(struct ipath_devdata *dd)
-{
-	struct list_head *lp = NULL;
-	struct ipath_sdma_txreq *txp = NULL;
-	u16 dmahead;
-	u16 start_idx = 0;
-	int progress = 0;
-
-	if (!list_empty(&dd->ipath_sdma_activelist)) {
-		lp = dd->ipath_sdma_activelist.next;
-		txp = list_entry(lp, struct ipath_sdma_txreq, list);
-		start_idx = txp->start_idx;
-	}
-
-	/*
-	 * Read the SDMA head register in order to know that the
-	 * interrupt clear has been written to the chip.
-	 * Otherwise, we may not get an interrupt for the last
-	 * descriptor in the queue.
-	 */
-	dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
-	/* sanity check return value for error handling (chip reset, etc.) */
-	if (dmahead >= dd->ipath_sdma_descq_cnt)
-		goto done;
-
-	while (dd->ipath_sdma_descq_head != dmahead) {
-		if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
-		    dd->ipath_sdma_descq_head == start_idx) {
-			unmap_desc(dd, dd->ipath_sdma_descq_head);
-			start_idx++;
-			if (start_idx == dd->ipath_sdma_descq_cnt)
-				start_idx = 0;
-		}
-
-		/* increment free count and head */
-		dd->ipath_sdma_descq_removed++;
-		if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
-			dd->ipath_sdma_descq_head = 0;
-
-		if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
-			/* move to notify list */
-			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-				vl15_watchdog_deq(dd);
-			list_move_tail(lp, &dd->ipath_sdma_notifylist);
-			if (!list_empty(&dd->ipath_sdma_activelist)) {
-				lp = dd->ipath_sdma_activelist.next;
-				txp = list_entry(lp, struct ipath_sdma_txreq,
-						 list);
-				start_idx = txp->start_idx;
-			} else {
-				lp = NULL;
-				txp = NULL;
-			}
-		}
-		progress = 1;
-	}
-
-	if (progress)
-		tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-done:
-	return progress;
-}
-
-static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
-{
-	struct ipath_sdma_txreq *txp, *txp_next;
-
-	list_for_each_entry_safe(txp, txp_next, list, list) {
-		list_del_init(&txp->list);
-
-		if (txp->callback)
-			(*txp->callback)(txp->callback_cookie,
-					 txp->callback_status);
-	}
-}
-
-static void sdma_notify_taskbody(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	struct list_head list;
-
-	INIT_LIST_HEAD(&list);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	list_splice_init(&dd->ipath_sdma_notifylist, &list);
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	ipath_sdma_notify(dd, &list);
-
-	/*
-	 * The IB verbs layer needs to see the callback before getting
-	 * the call to ipath_ib_piobufavail() because the callback
-	 * handles releasing resources the next send will need.
-	 * Otherwise, we could do these calls in
-	 * ipath_sdma_make_progress().
-	 */
-	ipath_ib_piobufavail(dd->verbs_dev);
-}
-
-static void sdma_notify_task(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
-
-	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		sdma_notify_taskbody(dd);
-}
-
-static void dump_sdma_state(struct ipath_devdata *dd)
-{
-	unsigned long reg;
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
-	ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
-	ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
-	ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
-	ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
-
-	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
-	ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
-}
-
-static void sdma_abort_task(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-	u64 status;
-	unsigned long flags;
-
-	if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		return;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
-
-	/* nothing to do */
-	if (status == IPATH_SDMA_ABORT_NONE)
-		goto unlock;
-
-	/* ipath_sdma_abort() is done, waiting for interrupt */
-	if (status == IPATH_SDMA_ABORT_DISARMED) {
-		if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
-			goto resched_noprint;
-		/* give up, intr got lost somewhere */
-		ipath_dbg("give up waiting for SDMADISABLED intr\n");
-		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		status = IPATH_SDMA_ABORT_ABORTED;
-	}
-
-	/* everything is stopped, time to clean up and restart */
-	if (status == IPATH_SDMA_ABORT_ABORTED) {
-		struct ipath_sdma_txreq *txp, *txpnext;
-		u64 hwstatus;
-		int notify = 0;
-
-		hwstatus = ipath_read_kreg64(dd,
-				dd->ipath_kregs->kr_senddmastatus);
-
-		if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
-				 IPATH_SDMA_STATUS_ABORT_IN_PROG	     |
-				 IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
-		    !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
-			if (dd->ipath_sdma_reset_wait > 0) {
-				/* not done shutting down sdma */
-				--dd->ipath_sdma_reset_wait;
-				goto resched;
-			}
-			ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
-				"status after SDMA reset, continuing\n");
-			dump_sdma_state(dd);
-		}
-
-		/* dequeue all "sent" requests */
-		list_for_each_entry_safe(txp, txpnext,
-					 &dd->ipath_sdma_activelist, list) {
-			txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
-			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-				vl15_watchdog_deq(dd);
-			list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-			notify = 1;
-		}
-		if (notify)
-			tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
-
-		/* reset our notion of head and tail */
-		dd->ipath_sdma_descq_tail = 0;
-		dd->ipath_sdma_descq_head = 0;
-		dd->ipath_sdma_head_dma[0] = 0;
-		dd->ipath_sdma_generation = 0;
-		dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
-
-		/* Reset SendDmaLenGen */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
-			(u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
-
-		/* done with sdma state for a bit */
-		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-		/*
-		 * Don't restart sdma here (with the exception
-		 * below). Wait until link is up to ACTIVE.  VL15 MADs
-		 * used to bring the link up use PIO, and multiple link
-		 * transitions otherwise cause the sdma engine to be
-		 * stopped and started multiple times.
-		 * The disable is done here, including the shadow,
-		 * so the state is kept consistent.
-		 * See ipath_restart_sdma() for the actual starting
-		 * of sdma.
-		 */
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-		/* make sure I see next message */
-		dd->ipath_sdma_abort_jiffies = 0;
-
-		/*
-		 * Not everything that takes SDMA offline is a link
-		 * status change.  If the link was up, restart SDMA.
-		 */
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			ipath_restart_sdma(dd);
-
-		goto done;
-	}
-
-resched:
-	/*
-	 * for now, keep spinning
-	 * JAG - this is bad to just have default be a loop without
-	 * state change
-	 */
-	if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
-		ipath_dbg("looping with status 0x%08lx\n",
-			  dd->ipath_sdma_status);
-		dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
-	}
-resched_noprint:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-		tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
-	return;
-
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-done:
-	return;
-}
-
-/*
- * This is called from interrupt context.
- */
-void ipath_sdma_intr(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	(void) ipath_sdma_make_progress(dd);
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-}
-
-static int alloc_sdma(struct ipath_devdata *dd)
-{
-	int ret = 0;
-
-	/* Allocate memory for SendDMA descriptor FIFO */
-	dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
-		SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
-
-	if (!dd->ipath_sdma_descq) {
-		ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
-			"FIFO memory\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	dd->ipath_sdma_descq_cnt =
-		SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
-
-	/* Allocate memory for DMA of head register to memory */
-	dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
-		PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
-	if (!dd->ipath_sdma_head_dma) {
-		ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
-		ret = -ENOMEM;
-		goto cleanup_descq;
-	}
-	dd->ipath_sdma_head_dma[0] = 0;
-
-	setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
-			(unsigned long)dd);
-
-	atomic_set(&dd->ipath_sdma_vl15_count, 0);
-
-	goto done;
-
-cleanup_descq:
-	dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-		(void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
-	dd->ipath_sdma_descq = NULL;
-	dd->ipath_sdma_descq_phys = 0;
-done:
-	return ret;
-}
-
-int setup_sdma(struct ipath_devdata *dd)
-{
-	int ret = 0;
-	unsigned i, n;
-	u64 tmp64;
-	u64 senddmabufmask[3] = { 0 };
-	unsigned long flags;
-
-	ret = alloc_sdma(dd);
-	if (ret)
-		goto done;
-
-	if (!dd->ipath_sdma_descq) {
-		ipath_dev_err(dd, "SendDMA memory not allocated\n");
-		goto done;
-	}
-
-	/*
-	 * Set initial status as if we had been up, then gone down.
-	 * This lets initial start on transition to ACTIVE be the
-	 * same as restart after link flap.
-	 */
-	dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
-	dd->ipath_sdma_abort_jiffies = 0;
-	dd->ipath_sdma_generation = 0;
-	dd->ipath_sdma_descq_tail = 0;
-	dd->ipath_sdma_descq_head = 0;
-	dd->ipath_sdma_descq_removed = 0;
-	dd->ipath_sdma_descq_added = 0;
-
-	/* Set SendDmaBase */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
-			 dd->ipath_sdma_descq_phys);
-	/* Set SendDmaLenGen */
-	tmp64 = dd->ipath_sdma_descq_cnt;
-	tmp64 |= 1<<18; /* enable generation checking */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
-	/* Set SendDmaTail */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
-			 dd->ipath_sdma_descq_tail);
-	/* Set SendDmaHeadAddr */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
-			 dd->ipath_sdma_head_phys);
-
-	/*
-	 * Reserve all the former "kernel" piobufs, using high number range
-	 * so we get as many 4K buffers as possible
-	 */
-	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
-	ipath_chg_pioavailkernel(dd, i, n - i , 0);
-	for (; i < n; ++i) {
-		unsigned word = i / 64;
-		unsigned bit = i & 63;
-		BUG_ON(word >= 3);
-		senddmabufmask[word] |= 1ULL << bit;
-	}
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
-			 senddmabufmask[0]);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
-			 senddmabufmask[1]);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
-			 senddmabufmask[2]);
-
-	INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
-	INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
-
-	tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
-		     (unsigned long) dd);
-	tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
-		     (unsigned long) dd);
-
-	/*
-	 * No use to turn on SDMA here, as link is probably not ACTIVE
-	 * Just mark it RUNNING and enable the interrupt, and let the
-	 * ipath_restart_sdma() on link transition to ACTIVE actually
-	 * enable it.
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	__set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-done:
-	return ret;
-}
-
-void teardown_sdma(struct ipath_devdata *dd)
-{
-	struct ipath_sdma_txreq *txp, *txpnext;
-	unsigned long flags;
-	dma_addr_t sdma_head_phys = 0;
-	dma_addr_t sdma_descq_phys = 0;
-	void *sdma_descq = NULL;
-	void *sdma_head_dma = NULL;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	__clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
-	__set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	__set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	tasklet_kill(&dd->ipath_sdma_abort_task);
-	tasklet_kill(&dd->ipath_sdma_notify_task);
-
-	/* turn off sdma */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	/* dequeue all "sent" requests */
-	list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
-				 list) {
-		txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
-		if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
-			vl15_watchdog_deq(dd);
-		list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	sdma_notify_taskbody(dd);
-
-	del_timer_sync(&dd->ipath_sdma_vl15_timer);
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	dd->ipath_sdma_abort_jiffies = 0;
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
-
-	if (dd->ipath_sdma_head_dma) {
-		sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
-		sdma_head_phys = dd->ipath_sdma_head_phys;
-		dd->ipath_sdma_head_dma = NULL;
-		dd->ipath_sdma_head_phys = 0;
-	}
-
-	if (dd->ipath_sdma_descq) {
-		sdma_descq = dd->ipath_sdma_descq;
-		sdma_descq_phys = dd->ipath_sdma_descq_phys;
-		dd->ipath_sdma_descq = NULL;
-		dd->ipath_sdma_descq_phys = 0;
-	}
-
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	if (sdma_head_dma)
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  sdma_head_dma, sdma_head_phys);
-
-	if (sdma_descq)
-		dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
-				  sdma_descq, sdma_descq_phys);
-}
-
-/*
- * [Re]start SDMA, if we use it, and it's not already OK.
- * This is called on transition to link ACTIVE, either the first or
- * subsequent times.
- */
-void ipath_restart_sdma(struct ipath_devdata *dd)
-{
-	unsigned long flags;
-	int needed = 1;
-
-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-		goto bail;
-
-	/*
-	 * First, make sure we should, which is to say,
-	 * check that we are "RUNNING" (not in teardown)
-	 * and not "SHUTDOWN"
-	 */
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
-		|| test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
-			needed = 0;
-	else {
-		__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-	}
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-	if (!needed) {
-		ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
-			dd->ipath_sdma_status);
-		goto bail;
-	}
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	/*
-	 * First clear, just to be safe. Enable is only done
-	 * in chip on 0->1 transition
-	 */
-	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
-	/* notify upper layers */
-	ipath_ib_piobufavail(dd->verbs_dev);
-
-bail:
-	return;
-}
-
-static inline void make_sdma_desc(struct ipath_devdata *dd,
-	u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
-{
-	WARN_ON(addr & 3);
-	/* SDmaPhyAddr[47:32] */
-	sdmadesc[1] = addr >> 32;
-	/* SDmaPhyAddr[31:0] */
-	sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
-	/* SDmaGeneration[1:0] */
-	sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
-	/* SDmaDwordCount[10:0] */
-	sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
-	/* SDmaBufOffset[12:2] */
-	sdmadesc[0] |= dwoffset & 0x7ffULL;
-}
-
-/*
- * This function queues one IB packet onto the send DMA queue per call.
- * The caller is responsible for checking:
- * 1) The number of send DMA descriptor entries is less than the size of
- *    the descriptor queue.
- * 2) The IB SGE addresses and lengths are 32-bit aligned
- *    (except possibly the last SGE's length)
- * 3) The SGE addresses are suitable for passing to dma_map_single().
- */
-int ipath_sdma_verbs_send(struct ipath_devdata *dd,
-	struct ipath_sge_state *ss, u32 dwords,
-	struct ipath_verbs_txreq *tx)
-{
-
-	unsigned long flags;
-	struct ipath_sge *sge;
-	int ret = 0;
-	u16 tail;
-	__le64 *descqp;
-	u64 sdmadesc[2];
-	u32 dwoffset;
-	dma_addr_t addr;
-
-	if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
-		ipath_dbg("packet size %X > ibmax %X, fail\n",
-			tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
-		ret = -EMSGSIZE;
-		goto fail;
-	}
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-retry:
-	if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
-		if (ipath_sdma_make_progress(dd))
-			goto retry;
-		ret = -ENOBUFS;
-		goto unlock;
-	}
-
-	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
-			      tx->map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&dd->pcidev->dev, addr))
-		goto ioerr;
-
-	dwoffset = tx->map_len >> 2;
-	make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
-
-	/* SDmaFirstDesc */
-	sdmadesc[0] |= 1ULL << 12;
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-		sdmadesc[0] |= 1ULL << 14;	/* SDmaUseLargeBuf */
-
-	/* write to the descq */
-	tail = dd->ipath_sdma_descq_tail;
-	descqp = &dd->ipath_sdma_descq[tail].qw[0];
-	*descqp++ = cpu_to_le64(sdmadesc[0]);
-	*descqp++ = cpu_to_le64(sdmadesc[1]);
-
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
-		tx->txreq.start_idx = tail;
-
-	/* increment the tail */
-	if (++tail == dd->ipath_sdma_descq_cnt) {
-		tail = 0;
-		descqp = &dd->ipath_sdma_descq[0].qw[0];
-		++dd->ipath_sdma_generation;
-	}
-
-	sge = &ss->sge;
-	while (dwords) {
-		u32 dw;
-		u32 len;
-
-		len = dwords << 2;
-		if (len > sge->length)
-			len = sge->length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		dw = (len + 3) >> 2;
-		addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
-				      DMA_TO_DEVICE);
-		if (dma_mapping_error(&dd->pcidev->dev, addr))
-			goto unmap;
-		make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
-		/* SDmaUseLargeBuf has to be set in every descriptor */
-		if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
-			sdmadesc[0] |= 1ULL << 14;
-		/* write to the descq */
-		*descqp++ = cpu_to_le64(sdmadesc[0]);
-		*descqp++ = cpu_to_le64(sdmadesc[1]);
-
-		/* increment the tail */
-		if (++tail == dd->ipath_sdma_descq_cnt) {
-			tail = 0;
-			descqp = &dd->ipath_sdma_descq[0].qw[0];
-			++dd->ipath_sdma_generation;
-		}
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-
-		dwoffset += dw;
-		dwords -= dw;
-	}
-
-	if (!tail)
-		descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
-	descqp -= 2;
-	/* SDmaLastDesc */
-	descqp[0] |= cpu_to_le64(1ULL << 11);
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
-		/* SDmaIntReq */
-		descqp[0] |= cpu_to_le64(1ULL << 15);
-	}
-
-	/* Commit writes to memory and advance the tail on the chip */
-	wmb();
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-
-	tx->txreq.next_descq_idx = tail;
-	tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
-	dd->ipath_sdma_descq_tail = tail;
-	dd->ipath_sdma_descq_added += tx->txreq.sg_count;
-	list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
-		vl15_watchdog_enq(dd);
-	goto unlock;
-
-unmap:
-	while (tail != dd->ipath_sdma_descq_tail) {
-		if (!tail)
-			tail = dd->ipath_sdma_descq_cnt - 1;
-		else
-			tail--;
-		unmap_desc(dd, tail);
-	}
-ioerr:
-	ret = -EIO;
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-fail:
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644
index 26271984b717..000000000000
--- a/drivers/staging/rdma/ipath/ipath_srq.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ipath_verbs.h"
-
-/**
- * ipath_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			   struct ib_recv_wr **bad_wr)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct ipath_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libipathverbs when creating a user SRQ
- */
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-				struct ib_srq_init_attr *srq_init_attr,
-				struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibpd->device);
-	struct ipath_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_wr == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
-	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See ipath_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    ipath_create_mmap_info(dev, s,
-					   ibpd->uobject->context,
-					   srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
- 	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * ipath_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for ipathverbs.so
- */
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask,
-		     struct ib_udata *udata)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwq *owq;
-		struct ipath_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct ipath_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		if (head >= srq->rq.size)
-			head = 0;
-		tail = owq->tail;
-		if (tail >= srq->rq.size)
-			tail = 0;
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct ipath_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct ipath_mmap_info *ip = srq->ip;
-			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct ipath_rwq) + size * sz;
-
-			ipath_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See ipath_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * ipath_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int ipath_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, ipath_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644
index f63e143e3292..000000000000
--- a/drivers/staging/rdma/ipath/ipath_stats.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_kernel.h"
-
-struct infinipath_stats ipath_stats;
-
-/**
- * ipath_snap_cntr - snapshot a chip counter
- * @dd: the infinipath device
- * @creg: the counter to snapshot
- *
- * called from add_timer and user counter read calls, to deal with
- * counters that wrap in "human time".  The words sent and received, and
- * the packets sent and received are all that we worry about.  For now,
- * at least, we don't worry about error counters, because if they wrap
- * that quickly, we probably don't care.  We may eventually just make this
- * handle all the counters.  word counters can wrap in about 20 seconds
- * of full bandwidth traffic, packet counters in a few hours.
- */
-
-u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
-{
-	u32 val, reg64 = 0;
-	u64 val64;
-	unsigned long t0, t1;
-	u64 ret;
-
-	t0 = jiffies;
-	/* If fast increment counters are only 32 bits, snapshot them,
-	 * and maintain them as 64bit values in the driver */
-	if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
-	    (creg == dd->ipath_cregs->cr_wordsendcnt ||
-	     creg == dd->ipath_cregs->cr_wordrcvcnt ||
-	     creg == dd->ipath_cregs->cr_pktsendcnt ||
-	     creg == dd->ipath_cregs->cr_pktrcvcnt)) {
-		val64 = ipath_read_creg(dd, creg);
-		val = val64 == ~0ULL ? ~0U : 0;
-		reg64 = 1;
-	} else			/* val64 just to keep gcc quiet... */
-		val64 = val = ipath_read_creg32(dd, creg);
-	/*
-	 * See if a second has passed.  This is just a way to detect things
-	 * that are quite broken.  Normally this should take just a few
-	 * cycles (the check is for long enough that we don't care if we get
-	 * pre-empted.)  An Opteron HT O read timeout is 4 seconds with
-	 * normal NB values
-	 */
-	t1 = jiffies;
-	if (time_before(t0 + HZ, t1) && val == -1) {
-		ipath_dev_err(dd, "Error!  Read counter 0x%x timed out\n",
-			      creg);
-		ret = 0ULL;
-		goto bail;
-	}
-	if (reg64) {
-		ret = val64;
-		goto bail;
-	}
-
-	if (creg == dd->ipath_cregs->cr_wordsendcnt) {
-		if (val != dd->ipath_lastsword) {
-			dd->ipath_sword += val - dd->ipath_lastsword;
-			dd->ipath_lastsword = val;
-		}
-		val64 = dd->ipath_sword;
-	} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
-		if (val != dd->ipath_lastrword) {
-			dd->ipath_rword += val - dd->ipath_lastrword;
-			dd->ipath_lastrword = val;
-		}
-		val64 = dd->ipath_rword;
-	} else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
-		if (val != dd->ipath_lastspkts) {
-			dd->ipath_spkts += val - dd->ipath_lastspkts;
-			dd->ipath_lastspkts = val;
-		}
-		val64 = dd->ipath_spkts;
-	} else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
-		if (val != dd->ipath_lastrpkts) {
-			dd->ipath_rpkts += val - dd->ipath_lastrpkts;
-			dd->ipath_lastrpkts = val;
-		}
-		val64 = dd->ipath_rpkts;
-	} else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
-		if (dd->ibdeltainprog)
-			val64 -= val64 - dd->ibsymsnap;
-		val64 -= dd->ibsymdelta;
-	} else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
-		if (dd->ibdeltainprog)
-			val64 -= val64 - dd->iblnkerrsnap;
-		val64 -= dd->iblnkerrdelta;
-	} else
-		val64 = (u64) val;
-
-	ret = val64;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
- * @dd: the infinipath device
- *
- * print the delta of egrfull/hdrqfull errors for kernel ports no more than
- * every 5 seconds.  User processes are printed at close, but kernel doesn't
- * close, so...  Separate routine so may call from other places someday, and
- * so function name when printed by _IPATH_INFO is meaningfull
- */
-static void ipath_qcheck(struct ipath_devdata *dd)
-{
-	static u64 last_tot_hdrqfull;
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-	size_t blen = 0;
-	char buf[128];
-	u32 hdrqtail;
-
-	*buf = 0;
-	if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
-		blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
-				pd->port_hdrqfull -
-				dd->ipath_p0_hdrqfull);
-		dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
-	}
-	if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
-		blen += snprintf(buf + blen, sizeof buf - blen,
-				 "%srcvegrfull %llu",
-				 blen ? ", " : "",
-				 (unsigned long long)
-				 (ipath_stats.sps_etidfull -
-				  dd->ipath_last_tidfull));
-		dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
-	}
-
-	/*
-	 * this is actually the number of hdrq full interrupts, not actual
-	 * events, but at the moment that's mostly what I'm interested in.
-	 * Actual count, etc. is in the counters, if needed.  For production
-	 * users this won't ordinarily be printed.
-	 */
-
-	if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
-	    ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
-		blen += snprintf(buf + blen, sizeof buf - blen,
-				 "%shdrqfull %llu (all ports)",
-				 blen ? ", " : "",
-				 (unsigned long long)
-				 (ipath_stats.sps_hdrqfull -
-				  last_tot_hdrqfull));
-		last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
-	}
-	if (blen)
-		ipath_dbg("%s\n", buf);
-
-	hdrqtail = ipath_get_hdrqtail(pd);
-	if (pd->port_head != hdrqtail) {
-		if (dd->ipath_lastport0rcv_cnt ==
-		    ipath_stats.sps_port0pkts) {
-			ipath_cdbg(PKT, "missing rcv interrupts? "
-				   "port0 hd=%x tl=%x; port0pkts %llx; write"
-				   " hd (w/intr)\n",
-				   pd->port_head, hdrqtail,
-				   (unsigned long long)
-				   ipath_stats.sps_port0pkts);
-			ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
-				dd->ipath_rhdrhead_intr_off, pd->port_port);
-		}
-		dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
-	}
-}
-
-static void ipath_chk_errormask(struct ipath_devdata *dd)
-{
-	static u32 fixed;
-	u32 ctrl;
-	unsigned long errormask;
-	unsigned long hwerrs;
-
-	if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
-		return;
-
-	errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
-
-	if (errormask == dd->ipath_errormask)
-		return;
-	fixed++;
-
-	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
-	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-		dd->ipath_errormask);
-
-	if ((hwerrs & dd->ipath_hwerrmask) ||
-		(ctrl & INFINIPATH_C_FREEZEMODE)) {
-		/* force re-interrupt of pending events, just in case */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
-		dev_info(&dd->pcidev->dev,
-			"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
-			fixed, errormask, (unsigned long)dd->ipath_errormask,
-			ctrl, hwerrs);
-	} else
-		ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
-			fixed, errormask,
-			(unsigned long)dd->ipath_errormask);
-}
-
-
-/**
- * ipath_get_faststats - get word counters from chip before they overflow
- * @opaque - contains a pointer to the infinipath device ipath_devdata
- *
- * called from add_timer
- */
-void ipath_get_faststats(unsigned long opaque)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-	int i;
-	static unsigned cnt;
-	unsigned long flags;
-	u64 traffic_wds;
-
-	/*
-	 * don't access the chip while running diags, or memory diags can
-	 * fail
-	 */
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
-	    ipath_diag_inuse)
-		/* but re-arm the timer, for diags case; won't hurt other */
-		goto done;
-
-	/*
-	 * We now try to maintain a "active timer", based on traffic
-	 * exceeding a threshold, so we need to check the word-counts
-	 * even if they are 64-bit.
-	 */
-	traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-	traffic_wds -= dd->ipath_traffic_wds;
-	dd->ipath_traffic_wds += traffic_wds;
-	if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
-		atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
-
-	if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	}
-
-	ipath_qcheck(dd);
-
-	/*
-	 * deal with repeat error suppression.  Doesn't really matter if
-	 * last error was almost a full interval ago, or just a few usecs
-	 * ago; still won't get more than 2 per interval.  We may want
-	 * longer intervals for this eventually, could do with mod, counter
-	 * or separate timer.  Also see code in ipath_handle_errors() and
-	 * ipath_handle_hwerrors().
-	 */
-
-	if (dd->ipath_lasterror)
-		dd->ipath_lasterror = 0;
-	if (dd->ipath_lasthwerror)
-		dd->ipath_lasthwerror = 0;
-	if (dd->ipath_maskederrs
-	    && time_after(jiffies, dd->ipath_unmasktime)) {
-		char ebuf[256];
-		int iserr;
-		iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
-					 dd->ipath_maskederrs);
-		if (dd->ipath_maskederrs &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-		      INFINIPATH_E_PKTERRS))
-			ipath_dev_err(dd, "Re-enabling masked errors "
-				      "(%s)\n", ebuf);
-		else {
-			/*
-			 * rcvegrfull and rcvhdrqfull are "normal", for some
-			 * types of processes (mostly benchmarks) that send
-			 * huge numbers of messages, while not processing
-			 * them.  So only complain about these at debug
-			 * level.
-			 */
-			if (iserr)
-				ipath_dbg(
-					"Re-enabling queue full errors (%s)\n",
-					ebuf);
-			else
-				ipath_cdbg(ERRPKT, "Re-enabling packet"
-					" problem interrupt (%s)\n", ebuf);
-		}
-
-		/* re-enable masked errors */
-		dd->ipath_errormask |= dd->ipath_maskederrs;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-				 dd->ipath_errormask);
-		dd->ipath_maskederrs = 0;
-	}
-
-	/* limit qfull messages to ~one per minute per port */
-	if ((++cnt & 0x10)) {
-		for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-
-			if (pd && pd->port_lastrcvhdrqtail != -1)
-				pd->port_lastrcvhdrqtail = -1;
-		}
-	}
-
-	ipath_chk_errormask(dd);
-done:
-	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644
index b12b1f6caf59..000000000000
--- a/drivers/staging/rdma/ipath/ipath_sysfs.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/**
- * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
- * @str: the string containing the number
- * @valp: where to put the result
- *
- * returns the number of bytes consumed, or negative value on error
- */
-int ipath_parse_ushort(const char *str, unsigned short *valp)
-{
-	unsigned long val;
-	char *end;
-	int ret;
-
-	if (!isdigit(str[0])) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	val = simple_strtoul(str, &end, 0);
-
-	if (val > 0xffff) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*valp = val;
-
-	ret = end + 1 - str;
-	if (ret == 0)
-		ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-static ssize_t show_version(struct device_driver *dev, char *buf)
-{
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
-}
-
-static ssize_t show_num_units(struct device_driver *dev, char *buf)
-{
-	return scnprintf(buf, PAGE_SIZE, "%d\n",
-			 ipath_count_units(NULL, NULL, NULL));
-}
-
-static ssize_t show_status(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-
-	if (!dd->ipath_statusp) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
-			(unsigned long long) *(dd->ipath_statusp));
-
-bail:
-	return ret;
-}
-
-static const char *ipath_status_str[] = {
-	"Initted",
-	"Disabled",
-	"Admin_Disabled",
-	"", /* This used to be the old "OIB_SMA" status. */
-	"", /* This used to be the old "SMA" status. */
-	"Present",
-	"IB_link_up",
-	"IB_configured",
-	"NoIBcable",
-	"Fatal_Hardware_Error",
-	NULL,
-};
-
-static ssize_t show_status_str(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int i, any;
-	u64 s;
-	ssize_t ret;
-
-	if (!dd->ipath_statusp) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	s = *(dd->ipath_statusp);
-	*buf = '\0';
-	for (any = i = 0; s && ipath_status_str[i]; i++) {
-		if (s & 1) {
-			if (any && strlcat(buf, " ", PAGE_SIZE) >=
-			    PAGE_SIZE)
-				/* overflow */
-				break;
-			if (strlcat(buf, ipath_status_str[i],
-				    PAGE_SIZE) >= PAGE_SIZE)
-				break;
-			any = 1;
-		}
-		s >>= 1;
-	}
-	if (any)
-		strlcat(buf, "\n", PAGE_SIZE);
-
-	ret = strlen(buf);
-
-bail:
-	return ret;
-}
-
-static ssize_t show_boardversion(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
-}
-
-static ssize_t show_localbus_info(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
-}
-
-static ssize_t show_lmc(struct device *dev,
-			struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
-}
-
-static ssize_t store_lmc(struct device *dev,
-			 struct device_attribute *attr,
-			 const char *buf,
-			 size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 lmc = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &lmc);
-	if (ret < 0)
-		goto invalid;
-
-	if (lmc > 7) {
-		ret = -EINVAL;
-		goto invalid;
-	}
-
-	ipath_set_lid(dd, dd->ipath_lid, lmc);
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
-bail:
-	return ret;
-}
-
-static ssize_t show_lid(struct device *dev,
-			struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
-}
-
-static ssize_t store_lid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 lid = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &lid);
-	if (ret < 0)
-		goto invalid;
-
-	if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
-		ret = -EINVAL;
-		goto invalid;
-	}
-
-	ipath_set_lid(dd, lid, dd->ipath_lmc);
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
-bail:
-	return ret;
-}
-
-static ssize_t show_mlid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
-}
-
-static ssize_t store_mlid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 mlid;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &mlid);
-	if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
-		goto invalid;
-
-	dd->ipath_mlid = mlid;
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid MLID\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_guid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u8 *guid;
-
-	guid = (u8 *) & (dd->ipath_guid);
-
-	return scnprintf(buf, PAGE_SIZE,
-			 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
-			 guid[0], guid[1], guid[2], guid[3],
-			 guid[4], guid[5], guid[6], guid[7]);
-}
-
-static ssize_t store_guid(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	unsigned short guid[8];
-	__be64 new_guid;
-	u8 *ng;
-	int i;
-
-	if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
-		   &guid[0], &guid[1], &guid[2], &guid[3],
-		   &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
-		goto invalid;
-
-	ng = (u8 *) &new_guid;
-
-	for (i = 0; i < 8; i++) {
-		if (guid[i] > 0xff)
-			goto invalid;
-		ng[i] = guid[i];
-	}
-
-	if (new_guid == 0)
-		goto invalid;
-
-	dd->ipath_guid = new_guid;
-	dd->ipath_nguid = 1;
-	if (dd->verbs_dev)
-		dd->verbs_dev->ibdev.node_guid = new_guid;
-
-	ret = strlen(buf);
-	goto bail;
-
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid GUID\n");
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-static ssize_t show_nguid(struct device *dev,
-			  struct device_attribute *attr,
-			  char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
-}
-
-static ssize_t show_nports(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	/* Return the number of user ports available. */
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
-}
-
-static ssize_t show_serial(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	buf[sizeof dd->ipath_serial] = '\0';
-	memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
-	strcat(buf, "\n");
-	return strlen(buf);
-}
-
-static ssize_t show_unit(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
-}
-
-static ssize_t show_jint_max_packets(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
-}
-
-static ssize_t store_jint_max_packets(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf,
-				      size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 v = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &v);
-	if (ret < 0)
-		ipath_dev_err(dd, "invalid jint_max_packets.\n");
-	else
-		dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
-
-	return ret;
-}
-
-static ssize_t show_jint_idle_ticks(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
-}
-
-static ssize_t store_jint_idle_ticks(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf,
-				     size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	u16 v = 0;
-	int ret;
-
-	ret = ipath_parse_ushort(buf, &v);
-	if (ret < 0)
-		ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
-	else
-		dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
-
-	return ret;
-}
-
-#define DEVICE_COUNTER(name, attr) \
-	static ssize_t show_counter_##name(struct device *dev, \
-					   struct device_attribute *attr, \
-					   char *buf) \
-	{ \
-		struct ipath_devdata *dd = dev_get_drvdata(dev); \
-		return scnprintf(\
-			buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
-			ipath_snap_cntr( \
-				dd, offsetof(struct infinipath_counters, \
-					     attr) / sizeof(u64)));	\
-	} \
-	static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
-
-DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
-DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
-DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
-DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
-DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
-DEVICE_COUNTER(lb_ints, LBIntCnt);
-DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
-DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
-DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
-DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
-DEVICE_COUNTER(rx_dwords, RxDwordCnt);
-DEVICE_COUNTER(rx_ebps, RxEBPCnt);
-DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
-DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
-DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
-DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
-DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
-DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
-DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
-DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
-DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
-DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
-DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
-DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
-DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
-DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
-DEVICE_COUNTER(tx_dwords, TxDwordCnt);
-DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
-DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
-DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
-DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
-DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
-DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
-
-static struct attribute *dev_counter_attributes[] = {
-	&dev_attr_ib_link_downeds.attr,
-	&dev_attr_ib_link_err_recoveries.attr,
-	&dev_attr_ib_status_changes.attr,
-	&dev_attr_ib_symbol_errs.attr,
-	&dev_attr_lb_flow_stalls.attr,
-	&dev_attr_lb_ints.attr,
-	&dev_attr_rx_bad_formats.attr,
-	&dev_attr_rx_buf_ovfls.attr,
-	&dev_attr_rx_data_pkts.attr,
-	&dev_attr_rx_dropped_pkts.attr,
-	&dev_attr_rx_dwords.attr,
-	&dev_attr_rx_ebps.attr,
-	&dev_attr_rx_flow_ctrl_errs.attr,
-	&dev_attr_rx_flow_pkts.attr,
-	&dev_attr_rx_icrc_errs.attr,
-	&dev_attr_rx_len_errs.attr,
-	&dev_attr_rx_link_problems.attr,
-	&dev_attr_rx_lpcrc_errs.attr,
-	&dev_attr_rx_max_min_len_errs.attr,
-	&dev_attr_rx_p0_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p1_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p2_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p3_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p4_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p5_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p6_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p7_hdr_egr_ovfls.attr,
-	&dev_attr_rx_p8_hdr_egr_ovfls.attr,
-	&dev_attr_rx_pkey_mismatches.attr,
-	&dev_attr_rx_tid_full_errs.attr,
-	&dev_attr_rx_tid_valid_errs.attr,
-	&dev_attr_rx_vcrc_errs.attr,
-	&dev_attr_tx_data_pkts.attr,
-	&dev_attr_tx_dropped_pkts.attr,
-	&dev_attr_tx_dwords.attr,
-	&dev_attr_tx_flow_pkts.attr,
-	&dev_attr_tx_flow_stalls.attr,
-	&dev_attr_tx_len_errs.attr,
-	&dev_attr_tx_max_min_len_errs.attr,
-	&dev_attr_tx_underruns.attr,
-	&dev_attr_tx_unsup_vl_errs.attr,
-	NULL
-};
-
-static struct attribute_group dev_counter_attr_group = {
-	.name = "counters",
-	.attrs = dev_counter_attributes
-};
-
-static ssize_t store_reset(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	if (count < 5 || memcmp(buf, "reset", 5)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	if (dd->ipath_flags & IPATH_DISABLED) {
-		/*
-		 * post-reset init would re-enable interrupts, etc.
-		 * so don't allow reset on disabled devices.  Not
-		 * perfect error, but about the best choice.
-		 */
-		dev_info(dev,"Unit %d is disabled, can't reset\n",
-			 dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_reset_device(dd->ipath_unit);
-bail:
-	return ret<0 ? ret : count;
-}
-
-static ssize_t store_link_state(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 state;
-
-	ret = ipath_parse_ushort(buf, &state);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_linkstate(dd, state);
-	if (r < 0) {
-		ret = r;
-		goto bail;
-	}
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid link state\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_mtu(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
-}
-
-static ssize_t store_mtu(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	u16 mtu = 0;
-	int r;
-
-	ret = ipath_parse_ushort(buf, &mtu);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_mtu(dd, mtu);
-	if (r < 0)
-		ret = r;
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid MTU\n");
-bail:
-	return ret;
-}
-
-static ssize_t show_enabled(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	return scnprintf(buf, PAGE_SIZE, "%u\n",
-			 (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
-}
-
-static ssize_t store_enabled(struct device *dev,
-			 struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	ssize_t ret;
-	u16 enable = 0;
-
-	ret = ipath_parse_ushort(buf, &enable);
-	if (ret < 0) {
-		ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
-		goto bail;
-	}
-
-	if (enable) {
-		if (!(dd->ipath_flags & IPATH_DISABLED))
-			goto bail;
-
-		dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
-		/* same as post-reset */
-		ret = ipath_init_chip(dd, 1);
-		if (ret)
-			ipath_dev_err(dd, "Failed to enable unit %d\n",
-				      dd->ipath_unit);
-		else {
-			dd->ipath_flags &= ~IPATH_DISABLED;
-			*dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
-		}
-	} else if (!(dd->ipath_flags & IPATH_DISABLED)) {
-		dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
-		ipath_shutdown_device(dd);
-		dd->ipath_flags |= IPATH_DISABLED;
-		*dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
-	}
-
-bail:
-	return ret;
-}
-
-static ssize_t store_rx_pol_inv(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret < 0)
-		goto invalid;
-
-	r = ipath_set_rx_pol_inv(dd, val);
-	if (r < 0) {
-		ret = r;
-		goto bail;
-	}
-
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
-bail:
-	return ret;
-}
-
-static ssize_t store_led_override(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret > 0)
-		ipath_set_led_override(dd, val);
-	else
-		ipath_dev_err(dd, "attempt to set invalid LED override\n");
-	return ret;
-}
-
-static ssize_t show_logged_errs(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int idx, count;
-
-	/* force consistency with actual EEPROM */
-	if (ipath_update_eeprom_log(dd) != 0)
-		return -ENXIO;
-
-	count = 0;
-	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
-		count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
-			dd->ipath_eep_st_errs[idx],
-			idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
-	}
-
-	return count;
-}
-
-/*
- * New sysfs entries to control various IB config. These all turn into
- * accesses via ipath_f_get/set_ib_cfg.
- *
- * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
- */
-static ssize_t show_hrtbt_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_hrtbt_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 3)
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
-		goto bail;
-	}
-
-	/*
-	 * Set the "intentional" heartbeat enable per either of
-	 * "Enable" and "Auto", as these are normally set together.
-	 * This bit is consulted when leaving loopback mode,
-	 * because entering loopback mode overrides it and automatically
-	 * disables heartbeat.
-	 */
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
-	if (r < 0)
-		ret = r;
-	else if (val == IPATH_IB_HRTBT_OFF)
-		dd->ipath_flags |= IPATH_NO_HRTBT;
-	else
-		dd->ipath_flags &= ~IPATH_NO_HRTBT;
-
-bail:
-	return ret;
-}
-
-/*
- * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
- * _not_ the particular encoding of any given chip)
- */
-static ssize_t show_lwid_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_lwid_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && (val == 0 || val > 3))
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Link Width (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/* Get current link width */
-static ssize_t show_lwid(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-/*
- * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
- */
-static ssize_t show_spd_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_spd_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
-		ret = -EINVAL;
-	if (ret < 0) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Link Speed (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/* Get current link speed */
-static ssize_t show_spd(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-/*
- * Get/Set RX polarity-invert enable. 0=no, 1=yes.
- */
-static ssize_t show_rx_polinv_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_rx_polinv_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 1) {
-		ipath_dev_err(dd,
-			"attempt to set invalid Rx Polarity (enable)\n");
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-/*
- * Get/Set RX lane-reversal enable. 0=no, 1=yes.
- */
-static ssize_t show_lanerev_enb(struct device *dev,
-			 struct device_attribute *attr,
-			 char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-
-	ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
-	if (ret >= 0)
-		ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
-	return ret;
-}
-
-static ssize_t store_lanerev_enb(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf,
-			  size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, r;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret >= 0 && val > 1) {
-		ret = -EINVAL;
-		ipath_dev_err(dd,
-			"attempt to set invalid Lane reversal (enable)\n");
-		goto bail;
-	}
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
-	if (r < 0)
-		ret = r;
-
-bail:
-	return ret;
-}
-
-static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
-static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
-
-static struct attribute *driver_attributes[] = {
-	&driver_attr_num_units.attr,
-	&driver_attr_version.attr,
-	NULL
-};
-
-static struct attribute_group driver_attr_group = {
-	.attrs = driver_attributes
-};
-
-static ssize_t store_tempsense(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf,
-			       size_t count)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret, stat;
-	u16 val;
-
-	ret = ipath_parse_ushort(buf, &val);
-	if (ret <= 0) {
-		ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
-		goto bail;
-	}
-	/* If anything but the highest limit, enable T_CRIT_A "interrupt" */
-	stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set tempsense config\n");
-		ret = -1;
-		goto bail;
-	}
-	stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set local Tcrit\n");
-		ret = -1;
-		goto bail;
-	}
-	stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
-	if (stat) {
-		ipath_dev_err(dd, "Unable to set remote Tcrit\n");
-		ret = -1;
-		goto bail;
-	}
-
-bail:
-	return ret;
-}
-
-/*
- * dump tempsense regs. in decimal, to ease shell-scripts.
- */
-static ssize_t show_tempsense(struct device *dev,
-			      struct device_attribute *attr,
-			      char *buf)
-{
-	struct ipath_devdata *dd = dev_get_drvdata(dev);
-	int ret;
-	int idx;
-	u8 regvals[8];
-
-	ret = -ENXIO;
-	for (idx = 0; idx < 8; ++idx) {
-		if (idx == 6)
-			continue;
-		ret = ipath_tempsense_read(dd, idx);
-		if (ret < 0)
-			break;
-		regvals[idx] = ret;
-	}
-	if (idx == 8)
-		ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
-			*(signed char *)(regvals),
-			*(signed char *)(regvals + 1),
-			regvals[2], regvals[3],
-			*(signed char *)(regvals + 5),
-			*(signed char *)(regvals + 7));
-	return ret;
-}
-
-const struct attribute_group *ipath_driver_attr_groups[] = {
-	&driver_attr_group,
-	NULL,
-};
-
-static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
-static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
-static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
-static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
-static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
-static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
-static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
-static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
-static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
-static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
-static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
-static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
-static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
-static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
-static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
-static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
-		   show_jint_max_packets, store_jint_max_packets);
-static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
-		   show_jint_idle_ticks, store_jint_idle_ticks);
-static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
-		   show_tempsense, store_tempsense);
-
-static struct attribute *dev_attributes[] = {
-	&dev_attr_guid.attr,
-	&dev_attr_lmc.attr,
-	&dev_attr_lid.attr,
-	&dev_attr_link_state.attr,
-	&dev_attr_mlid.attr,
-	&dev_attr_mtu.attr,
-	&dev_attr_nguid.attr,
-	&dev_attr_nports.attr,
-	&dev_attr_serial.attr,
-	&dev_attr_status.attr,
-	&dev_attr_status_str.attr,
-	&dev_attr_boardversion.attr,
-	&dev_attr_unit.attr,
-	&dev_attr_enabled.attr,
-	&dev_attr_rx_pol_inv.attr,
-	&dev_attr_led_override.attr,
-	&dev_attr_logged_errors.attr,
-	&dev_attr_tempsense.attr,
-	&dev_attr_localbus_info.attr,
-	NULL
-};
-
-static struct attribute_group dev_attr_group = {
-	.attrs = dev_attributes
-};
-
-static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
-		   store_hrtbt_enb);
-static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
-		   store_lwid_enb);
-static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
-static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
-		   store_spd_enb);
-static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
-static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
-		   store_rx_polinv_enb);
-static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
-		   store_lanerev_enb);
-
-static struct attribute *dev_ibcfg_attributes[] = {
-	&dev_attr_hrtbt_enable.attr,
-	&dev_attr_link_width_enable.attr,
-	&dev_attr_link_width.attr,
-	&dev_attr_link_speed_enable.attr,
-	&dev_attr_link_speed.attr,
-	&dev_attr_rx_pol_inv_enable.attr,
-	&dev_attr_rx_lane_rev_enable.attr,
-	NULL
-};
-
-static struct attribute_group dev_ibcfg_attr_group = {
-	.attrs = dev_ibcfg_attributes
-};
-
-/**
- * ipath_expose_reset - create a device reset file
- * @dev: the device structure
- *
- * Only expose a file that lets us reset the device after someone
- * enters diag mode.  A device reset is quite likely to crash the
- * machine entirely, so we don't want to normally make it
- * available.
- *
- * Called with ipath_mutex held.
- */
-int ipath_expose_reset(struct device *dev)
-{
-	static int exposed;
-	int ret;
-
-	if (!exposed) {
-		ret = device_create_file(dev, &dev_attr_reset);
-		exposed = 1;
-	} else {
-		ret = 0;
-	}
-
-	return ret;
-}
-
-int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
-{
-	int ret;
-
-	ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
-	if (ret)
-		goto bail;
-
-	ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
-	if (ret)
-		goto bail_attrs;
-
-	if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-		ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
-		if (ret)
-			goto bail_counter;
-		ret = device_create_file(dev, &dev_attr_jint_max_packets);
-		if (ret)
-			goto bail_idle;
-
-		ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
-		if (ret)
-			goto bail_max;
-	}
-
-	return 0;
-
-bail_max:
-	device_remove_file(dev, &dev_attr_jint_max_packets);
-bail_idle:
-	device_remove_file(dev, &dev_attr_jint_idle_ticks);
-bail_counter:
-	sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-bail_attrs:
-	sysfs_remove_group(&dev->kobj, &dev_attr_group);
-bail:
-	return ret;
-}
-
-void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
-{
-	sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
-
-	if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
-		sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
-		device_remove_file(dev, &dev_attr_jint_idle_ticks);
-		device_remove_file(dev, &dev_attr_jint_max_packets);
-	}
-
-	sysfs_remove_group(&dev->kobj, &dev_attr_group);
-
-	device_remove_file(dev, &dev_attr_reset);
-}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644
index 0246b30280b9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/**
- * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
- * @qp: a pointer to the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_uc_req(struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	struct ipath_swqe *wqe;
-	unsigned long flags;
-	u32 hwords;
-	u32 bth0;
-	u32 len;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	int ret = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	ohdr = &qp->s_hdr.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
-
-	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
-	hwords = 5;
-	bth0 = 1 << 22; /* Set M bit */
-
-	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	qp->s_wqe = NULL;
-	switch (qp->s_state) {
-	default:
-		if (!(ib_ipath_state_ops[qp->state] &
-		    IPATH_PROCESS_NEXT_SEND_OK))
-			goto bail;
-		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head)
-			goto bail;
-		/*
-		 * Start a new request.
-		 */
-		qp->s_psn = wqe->psn = qp->s_next_psn;
-		qp->s_sge.sge = wqe->sg_list[0];
-		qp->s_sge.sg_list = wqe->sg_list + 1;
-		qp->s_sge.num_sge = wqe->wr.num_sge;
-		qp->s_len = len = wqe->length;
-		switch (wqe->wr.opcode) {
-		case IB_WR_SEND:
-		case IB_WR_SEND_WITH_IMM:
-			if (len > pmtu) {
-				qp->s_state = OP(SEND_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_SEND)
-				qp->s_state = OP(SEND_ONLY);
-			else {
-				qp->s_state =
-					OP(SEND_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-			}
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-			qp->s_wqe = wqe;
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		case IB_WR_RDMA_WRITE:
-		case IB_WR_RDMA_WRITE_WITH_IMM:
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->rdma_wr.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / 4;
-			if (len > pmtu) {
-				qp->s_state = OP(RDMA_WRITE_FIRST);
-				len = pmtu;
-				break;
-			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
-				qp->s_state =
-					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
-				/* Immediate data comes after the RETH */
-				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
-				hwords += 1;
-				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-					bth0 |= 1 << 23;
-			}
-			qp->s_wqe = wqe;
-			if (++qp->s_cur >= qp->s_size)
-				qp->s_cur = 0;
-			break;
-
-		default:
-			goto bail;
-		}
-		break;
-
-	case OP(SEND_FIRST):
-		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_SEND)
-			qp->s_state = OP(SEND_LAST);
-		else {
-			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-		}
-		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-			bth0 |= 1 << 23;
-		qp->s_wqe = wqe;
-		if (++qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		len = qp->s_len;
-		if (len > pmtu) {
-			len = pmtu;
-			break;
-		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
-			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
-			qp->s_state =
-				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
-			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.ex.imm_data;
-			hwords += 1;
-			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
-				bth0 |= 1 << 23;
-		}
-		qp->s_wqe = wqe;
-		if (++qp->s_cur >= qp->s_size)
-			qp->s_cur = 0;
-		break;
-	}
-	qp->s_len -= len;
-	qp->s_hdrwords = hwords;
-	qp->s_cur_sge = &qp->s_sge;
-	qp->s_cur_size = len;
-	ipath_make_ruc_header(to_idev(qp->ibqp.device),
-			      qp, ohdr, bth0 | (qp->s_state << 24),
-			      qp->s_next_psn++ & IPATH_PSN_MASK);
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_uc_rcv - handle an incoming UC packet
- * @dev: the device the packet came in on
- * @hdr: the header of the packet
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
- *
- * This is called from ipath_qp_rcv() to process an incoming UC packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	int opcode;
-	u32 hdrsize;
-	u32 psn;
-	u32 pad;
-	struct ib_wc wc;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	struct ib_reth *reth;
-	int header_in_data;
-
-	/* Validate the SLID. See Ch. 9.6.1.5 */
-	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
-		goto done;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12;	/* LRH + BTH */
-		psn = be32_to_cpu(ohdr->bth[2]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12;	/* LRH + GRH + BTH */
-		/*
-		 * The header with GRH is 60 bytes and the
-		 * core driver sets the eager header buffer
-		 * size to 56 bytes so the last 4 bytes of
-		 * the BTH header (PSN) is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			psn = be32_to_cpu(((__be32 *) data)[0]);
-			data += sizeof(__be32);
-		} else
-			psn = be32_to_cpu(ohdr->bth[2]);
-	}
-	/*
-	 * The opcode is in the low byte when its in network order
-	 * (top byte when in host order).
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-
-	memset(&wc, 0, sizeof wc);
-
-	/* Compare the PSN verses the expected PSN. */
-	if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
-		/*
-		 * Handle a sequence error.
-		 * Silently drop any current message.
-		 */
-		qp->r_psn = psn;
-	inv:
-		qp->r_state = OP(SEND_LAST);
-		switch (opcode) {
-		case OP(SEND_FIRST):
-		case OP(SEND_ONLY):
-		case OP(SEND_ONLY_WITH_IMMEDIATE):
-			goto send_first;
-
-		case OP(RDMA_WRITE_FIRST):
-		case OP(RDMA_WRITE_ONLY):
-		case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
-			goto rdma_first;
-
-		default:
-			dev->n_pkt_drops++;
-			goto done;
-		}
-	}
-
-	/* Check for opcode sequence errors. */
-	switch (qp->r_state) {
-	case OP(SEND_FIRST):
-	case OP(SEND_MIDDLE):
-		if (opcode == OP(SEND_MIDDLE) ||
-		    opcode == OP(SEND_LAST) ||
-		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
-			break;
-		goto inv;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_MIDDLE):
-		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
-		    opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
-			break;
-		goto inv;
-
-	default:
-		if (opcode == OP(SEND_FIRST) ||
-		    opcode == OP(SEND_ONLY) ||
-		    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
-		    opcode == OP(RDMA_WRITE_FIRST) ||
-		    opcode == OP(RDMA_WRITE_ONLY) ||
-		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			break;
-		goto inv;
-	}
-
-	/* OK, process the packet. */
-	switch (opcode) {
-	case OP(SEND_FIRST):
-	case OP(SEND_ONLY):
-	case OP(SEND_ONLY_WITH_IMMEDIATE):
-	send_first:
-		if (qp->r_flags & IPATH_R_REUSE_SGE) {
-			qp->r_flags &= ~IPATH_R_REUSE_SGE;
-			qp->r_sge = qp->s_rdma_read_sge;
-		} else if (!ipath_get_rwqe(qp, 0)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Save the WQE so we can reuse it in case of an error. */
-		qp->s_rdma_read_sge = qp->r_sge;
-		qp->r_rcv_len = 0;
-		if (opcode == OP(SEND_ONLY))
-			goto send_last;
-		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
-			goto send_last_imm;
-		/* FALLTHROUGH */
-	case OP(SEND_MIDDLE):
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len)) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(SEND_LAST_WITH_IMMEDIATE):
-	send_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		/* FALLTHROUGH */
-	case OP(SEND_LAST):
-	send_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		wc.byte_len = tlen + qp->r_rcv_len;
-		if (unlikely(wc.byte_len > qp->r_len)) {
-			qp->r_flags |= IPATH_R_REUSE_SGE;
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		wc.opcode = IB_WC_RECV;
-	last_imm:
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		wc.wr_id = qp->r_wr_id;
-		wc.status = IB_WC_SUCCESS;
-		wc.qp = &qp->ibqp;
-		wc.src_qp = qp->remote_qpn;
-		wc.slid = qp->remote_ah_attr.dlid;
-		wc.sl = qp->remote_ah_attr.sl;
-		/* Signal completion event if the solicited bit is set. */
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			       (ohdr->bth[0] &
-				cpu_to_be32(1 << 23)) != 0);
-		break;
-
-	case OP(RDMA_WRITE_FIRST):
-	case OP(RDMA_WRITE_ONLY):
-	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
-	rdma_first:
-		/* RETH comes after BTH */
-		if (!header_in_data)
-			reth = &ohdr->u.rc.reth;
-		else {
-			reth = (struct ib_reth *)data;
-			data += sizeof(*reth);
-		}
-		hdrsize += sizeof(*reth);
-		qp->r_len = be32_to_cpu(reth->length);
-		qp->r_rcv_len = 0;
-		if (qp->r_len != 0) {
-			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
-			int ok;
-
-			/* Check rkey */
-			ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
-					   vaddr, rkey,
-					   IB_ACCESS_REMOTE_WRITE);
-			if (unlikely(!ok)) {
-				dev->n_pkt_drops++;
-				goto done;
-			}
-		} else {
-			qp->r_sge.sg_list = NULL;
-			qp->r_sge.sge.mr = NULL;
-			qp->r_sge.sge.vaddr = NULL;
-			qp->r_sge.sge.length = 0;
-			qp->r_sge.sge.sge_length = 0;
-		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		if (opcode == OP(RDMA_WRITE_ONLY))
-			goto rdma_last;
-		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
-			goto rdma_last_imm;
-		/* FALLTHROUGH */
-	case OP(RDMA_WRITE_MIDDLE):
-		/* Check for invalid length PMTU or posted rwqe len. */
-		if (unlikely(tlen != (hdrsize + pmtu + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		qp->r_rcv_len += pmtu;
-		if (unlikely(qp->r_rcv_len > qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, pmtu);
-		break;
-
-	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
-	rdma_last_imm:
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else {
-			/* Immediate data comes after BTH */
-			wc.ex.imm_data = ohdr->u.imm_data;
-		}
-		hdrsize += 4;
-		wc.wc_flags = IB_WC_WITH_IMM;
-
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		if (qp->r_flags & IPATH_R_REUSE_SGE)
-			qp->r_flags &= ~IPATH_R_REUSE_SGE;
-		else if (!ipath_get_rwqe(qp, 1)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		wc.byte_len = qp->r_len;
-		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		goto last_imm;
-
-	case OP(RDMA_WRITE_LAST):
-	rdma_last:
-		/* Get the number of bytes the message was padded by. */
-		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		/* Check for invalid length. */
-		/* XXX LAST len should be >= 1 */
-		if (unlikely(tlen < (hdrsize + pad + 4))) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		/* Don't count the CRC. */
-		tlen -= (hdrsize + pad + 4);
-		if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
-			dev->n_pkt_drops++;
-			goto done;
-		}
-		ipath_copy_sge(&qp->r_sge, data, tlen);
-		break;
-
-	default:
-		/* Drop packet for unknown opcodes. */
-		dev->n_pkt_drops++;
-		goto done;
-	}
-	qp->r_psn++;
-	qp->r_state = opcode;
-done:
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644
index 385d9410a51e..000000000000
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_smi.h>
-
-#include "ipath_verbs.h"
-#include "ipath_kernel.h"
-
-/**
- * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the sending QP
- * @swqe: the send work request
- *
- * This is called from ipath_make_ud_req() to forward a WQE addressed
- * to the same HCA.
- * Note that the receive interrupt handler may be calling ipath_ud_rcv()
- * while this is being called.
- */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
-{
-	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
-	struct ipath_qp *qp;
-	struct ib_ah_attr *ah_attr;
-	unsigned long flags;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_sge_state rsge;
-	struct ipath_sge *sge;
-	struct ipath_rwq *wq;
-	struct ipath_rwqe *wqe;
-	void (*handler)(struct ib_event *, void *);
-	struct ib_wc wc;
-	u32 tail;
-	u32 rlen;
-	u32 length;
-
-	qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
-	if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		goto done;
-	}
-
-	/*
-	 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
-	 * Qkeys with the high order bit set mean use the
-	 * qkey from the QP context instead of the WR (see 10.2.5).
-	 */
-	if (unlikely(qp->ibqp.qp_num &&
-		     ((int) swqe->ud_wr.remote_qkey < 0 ?
-		      sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
-		/* XXX OK to lose a count once in a while. */
-		dev->qkey_violations++;
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-
-	/*
-	 * A GRH is expected to precede the data even if not
-	 * present on the wire.
-	 */
-	length = swqe->length;
-	memset(&wc, 0, sizeof wc);
-	wc.byte_len = length + sizeof(struct ib_grh);
-
-	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
-		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.ex.imm_data = swqe->wr.ex.imm_data;
-	}
-
-	/*
-	 * This would be a lot simpler if we could call ipath_get_rwqe()
-	 * but that uses state that the receive interrupt handler uses
-	 * so we would need to lock out receive interrupts while doing
-	 * local loopback.
-	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		handler = srq->ibsrq.event_handler;
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		handler = NULL;
-		rq = &qp->r_rq;
-	}
-
-	/*
-	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
-	 */
-	spin_lock_irqsave(&rq->lock, flags);
-	wq = rq->wq;
-	tail = wq->tail;
-	/* Validate tail before using it since it is user writable. */
-	if (tail >= rq->size)
-		tail = 0;
-	if (unlikely(tail == wq->head)) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	wqe = get_rwqe_ptr(rq, tail);
-	rsge.sg_list = qp->r_ud_sg_list;
-	if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	/* Silently drop packets which are too big. */
-	if (wc.byte_len > rlen) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto drop;
-	}
-	if (++tail >= rq->size)
-		tail = 0;
-	wq->tail = tail;
-	wc.wr_id = wqe->wr_id;
-	if (handler) {
-		u32 n;
-
-		/*
-		 * validate head pointer value and compute
-		 * the number of remaining WQEs.
-		 */
-		n = wq->head;
-		if (n >= rq->size)
-			n = 0;
-		if (n < tail)
-			n += rq->size - tail;
-		else
-			n -= tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			handler(&ev, srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
-
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-		wc.wc_flags |= IB_WC_GRH;
-	} else
-		ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-	sge = swqe->sg_list;
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		ipath_copy_sge(&rsge, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--swqe->wr.num_sge)
-				sge++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		length -= len;
-	}
-	wc.status = IB_WC_SUCCESS;
-	wc.opcode = IB_WC_RECV;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = sqp->ibqp.qp_num;
-	/* XXX do we know which pkey matched? Only needed for GSI. */
-	wc.pkey_index = 0;
-	wc.slid = dev->dd->ipath_lid |
-		(ah_attr->src_path_bits &
-		 ((1 << dev->dd->ipath_lmc) - 1));
-	wc.sl = ah_attr->sl;
-	wc.dlid_path_bits =
-		ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
-drop:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-done:;
-}
-
-/**
- * ipath_make_ud_req - construct a UD request packet
- * @qp: the QP
- *
- * Return 1 if constructed; otherwise, return 0.
- */
-int ipath_make_ud_req(struct ipath_qp *qp)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_other_headers *ohdr;
-	struct ib_ah_attr *ah_attr;
-	struct ipath_swqe *wqe;
-	unsigned long flags;
-	u32 nwords;
-	u32 extra_bytes;
-	u32 bth0;
-	u16 lrh0;
-	u16 lid;
-	int ret = 0;
-	int next_cur;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
-			goto bail;
-		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
-			goto bail;
-		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= IPATH_S_WAIT_DMA;
-			goto bail;
-		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
-	}
-
-	if (qp->s_cur == qp->s_head)
-		goto bail;
-
-	wqe = get_swqe_ptr(qp, qp->s_cur);
-	next_cur = qp->s_cur + 1;
-	if (next_cur >= qp->s_size)
-		next_cur = 0;
-
-	/* Construct the header. */
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
-		if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
-			dev->n_multicast_xmit++;
-		else
-			dev->n_unicast_xmit++;
-	} else {
-		dev->n_unicast_xmit++;
-		lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
-		if (unlikely(lid == dev->dd->ipath_lid)) {
-			/*
-			 * If DMAs are in progress, we can't generate
-			 * a completion for the loopback packet since
-			 * it would be out of order.
-			 * XXX Instead of waiting, we could queue a
-			 * zero length descriptor so we get a callback.
-			 */
-			if (atomic_read(&qp->s_dma_busy)) {
-				qp->s_flags |= IPATH_S_WAIT_DMA;
-				goto bail;
-			}
-			qp->s_cur = next_cur;
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			ipath_ud_loopback(qp, wqe);
-			spin_lock_irqsave(&qp->s_lock, flags);
-			ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
-			goto done;
-		}
-	}
-
-	qp->s_cur = next_cur;
-	extra_bytes = -wqe->length & 3;
-	nwords = (wqe->length + extra_bytes) >> 2;
-
-	/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
-	qp->s_hdrwords = 7;
-	qp->s_cur_size = wqe->length;
-	qp->s_cur_sge = &qp->s_sge;
-	qp->s_dmult = ah_attr->static_rate;
-	qp->s_wqe = wqe;
-	qp->s_sge.sge = wqe->sg_list[0];
-	qp->s_sge.sg_list = wqe->sg_list + 1;
-	qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
-
-	if (ah_attr->ah_flags & IB_AH_GRH) {
-		/* Header size in 32-bit words. */
-		qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-						 &ah_attr->grh,
-						 qp->s_hdrwords, nwords);
-		lrh0 = IPATH_LRH_GRH;
-		ohdr = &qp->s_hdr.u.l.oth;
-		/*
-		 * Don't worry about sending to locally attached multicast
-		 * QPs.  It is unspecified by the spec. what happens.
-		 */
-	} else {
-		/* Header size in 32-bit words. */
-		lrh0 = IPATH_LRH_BTH;
-		ohdr = &qp->s_hdr.u.oth;
-	}
-	if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
-		qp->s_hdrwords++;
-		ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
-		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-	} else
-		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-	lrh0 |= ah_attr->sl << 4;
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		lrh0 |= 0xF000;	/* Set VL (see ch. 13.5.3.1) */
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);	/* DEST LID */
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-					   SIZE_OF_CRC);
-	lid = dev->dd->ipath_lid;
-	if (lid) {
-		lid |= ah_attr->src_path_bits &
-			((1 << dev->dd->ipath_lmc) - 1);
-		qp->s_hdr.lrh[3] = cpu_to_be16(lid);
-	} else
-		qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-	if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
-		bth0 |= 1 << 23;
-	bth0 |= extra_bytes << 20;
-	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-		ipath_get_pkey(dev->dd, qp->s_pkey_index);
-	ohdr->bth[0] = cpu_to_be32(bth0);
-	/*
-	 * Use the multicast QP if the destination LID is a multicast LID.
-	 */
-	ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-		ah_attr->dlid != IPATH_PERMISSIVE_LID ?
-		cpu_to_be32(IPATH_MULTICAST_QPN) :
-		cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
-	/*
-	 * Qkeys with the high order bit set mean use the
-	 * qkey from the QP context instead of the WR (see 10.2.5).
-	 */
-	ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
-					 qp->qkey : wqe->ud_wr.remote_qkey);
-	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-
-done:
-	ret = 1;
-	goto unlock;
-
-bail:
-	qp->s_flags &= ~IPATH_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_ud_rcv - receive an incoming UD packet
- * @dev: the device the packet came in on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_qp_rcv() to process an incoming UD packet
- * for the given QP.
- * Called at interrupt level.
- */
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
-{
-	struct ipath_other_headers *ohdr;
-	int opcode;
-	u32 hdrsize;
-	u32 pad;
-	struct ib_wc wc;
-	u32 qkey;
-	u32 src_qp;
-	u16 dlid;
-	int header_in_data;
-
-	/* Check for GRH */
-	if (!has_grh) {
-		ohdr = &hdr->u.oth;
-		hdrsize = 8 + 12 + 8;	/* LRH + BTH + DETH */
-		qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-		src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-		header_in_data = 0;
-	} else {
-		ohdr = &hdr->u.l.oth;
-		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
-		/*
-		 * The header with GRH is 68 bytes and the core driver sets
-		 * the eager header buffer size to 56 bytes so the last 12
-		 * bytes of the IB header is in the data buffer.
-		 */
-		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
-		if (header_in_data) {
-			qkey = be32_to_cpu(((__be32 *) data)[1]);
-			src_qp = be32_to_cpu(((__be32 *) data)[2]);
-			data += 12;
-		} else {
-			qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-			src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
-		}
-	}
-	src_qp &= IPATH_QPN_MASK;
-
-	/*
-	 * Check that the permissive LID is only used on QP0
-	 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
-	 */
-	if (qp->ibqp.qp_num) {
-		if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
-			     hdr->lrh[3] == IB_LID_PERMISSIVE)) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		if (unlikely(qkey != qp->qkey)) {
-			/* XXX OK to lose a count once in a while. */
-			dev->qkey_violations++;
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-	} else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
-		   hdr->lrh[3] == IB_LID_PERMISSIVE) {
-		struct ib_smp *smp = (struct ib_smp *) data;
-
-		if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-	}
-
-	/*
-	 * The opcode is in the low byte when its in network order
-	 * (top byte when in host order).
-	 */
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-	if (qp->ibqp.qp_num > 1 &&
-	    opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
-		if (header_in_data) {
-			wc.ex.imm_data = *(__be32 *) data;
-			data += sizeof(__be32);
-		} else
-			wc.ex.imm_data = ohdr->u.ud.imm_data;
-		wc.wc_flags = IB_WC_WITH_IMM;
-		hdrsize += sizeof(u32);
-	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
-		wc.ex.imm_data = 0;
-		wc.wc_flags = 0;
-	} else {
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-
-	/* Get the number of bytes the message was padded by. */
-	pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-	if (unlikely(tlen < (hdrsize + pad + 4))) {
-		/* Drop incomplete packets. */
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-	tlen -= hdrsize + pad + 4;
-
-	/* Drop invalid MAD packets (see 13.5.3.1). */
-	if (unlikely((qp->ibqp.qp_num == 0 &&
-		      (tlen != 256 ||
-		       (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
-		     (qp->ibqp.qp_num == 1 &&
-		      (tlen != 256 ||
-		       (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-
-	/*
-	 * A GRH is expected to precede the data even if not
-	 * present on the wire.
-	 */
-	wc.byte_len = tlen + sizeof(struct ib_grh);
-
-	/*
-	 * Get the next work request entry to find where to put the data.
-	 */
-	if (qp->r_flags & IPATH_R_REUSE_SGE)
-		qp->r_flags &= ~IPATH_R_REUSE_SGE;
-	else if (!ipath_get_rwqe(qp, 0)) {
-		/*
-		 * Count VL15 packets dropped due to no receive buffer.
-		 * Otherwise, count them as buffer overruns since usually,
-		 * the HW will be able to receive packets even if there are
-		 * no QPs with posted receive buffers.
-		 */
-		if (qp->ibqp.qp_num == 0)
-			dev->n_vl15_dropped++;
-		else
-			dev->rcv_errors++;
-		goto bail;
-	}
-	/* Silently drop packets which are too big. */
-	if (wc.byte_len > qp->r_len) {
-		qp->r_flags |= IPATH_R_REUSE_SGE;
-		dev->n_pkt_drops++;
-		goto bail;
-	}
-	if (has_grh) {
-		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			       sizeof(struct ib_grh));
-		wc.wc_flags |= IB_WC_GRH;
-	} else
-		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
-	ipath_copy_sge(&qp->r_sge, data,
-		       wc.byte_len - sizeof(struct ib_grh));
-	if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
-		goto bail;
-	wc.wr_id = qp->r_wr_id;
-	wc.status = IB_WC_SUCCESS;
-	wc.opcode = IB_WC_RECV;
-	wc.vendor_err = 0;
-	wc.qp = &qp->ibqp;
-	wc.src_qp = src_qp;
-	/* XXX do we know which pkey matched? Only needed for GSI. */
-	wc.pkey_index = 0;
-	wc.slid = be16_to_cpu(hdr->lrh[3]);
-	wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
-	dlid = be16_to_cpu(hdr->lrh[1]);
-	/*
-	 * Save the LMC lower bits if the destination LID is a unicast LID.
-	 */
-	wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-		dlid & ((1 << dev->dd->ipath_lmc) - 1);
-	wc.port_num = 1;
-	/* Signal completion event if the solicited bit is set. */
-	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       (ohdr->bth[0] &
-			cpu_to_be32(1 << 23)) != 0);
-
-bail:;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644
index d29b4daf61f8..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_pages.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/mm.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include "ipath_kernel.h"
-
-static void __ipath_release_user_pages(struct page **p, size_t num_pages,
-				   int dirty)
-{
-	size_t i;
-
-	for (i = 0; i < num_pages; i++) {
-		ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
-			   (unsigned long) num_pages, p[i]);
-		if (dirty)
-			set_page_dirty_lock(p[i]);
-		put_page(p[i]);
-	}
-}
-
-/* call with current->mm->mmap_sem held */
-static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-				  struct page **p)
-{
-	unsigned long lock_limit;
-	size_t got;
-	int ret;
-
-	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-	if (num_pages > lock_limit) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
-		   (unsigned long) num_pages, start_page);
-
-	for (got = 0; got < num_pages; got += ret) {
-		ret = get_user_pages(current, current->mm,
-				     start_page + got * PAGE_SIZE,
-				     num_pages - got, 1, 1,
-				     p + got, NULL);
-		if (ret < 0)
-			goto bail_release;
-	}
-
-	current->mm->pinned_vm += num_pages;
-
-	ret = 0;
-	goto bail;
-
-bail_release:
-	__ipath_release_user_pages(p, got, 0);
-bail:
-	return ret;
-}
-
-/**
- * ipath_map_page - a safety wrapper around pci_map_page()
- *
- * A dma_addr of all 0's is interpreted by the chip as "disabled".
- * Unfortunately, it can also be a valid dma_addr returned on some
- * architectures.
- *
- * The powerpc iommu assigns dma_addrs in ascending order, so we don't
- * have to bother with retries or mapping a dummy page to insure we
- * don't just get the same mapping again.
- *
- * I'm sure we won't be so lucky with other iommu's, so FIXME.
- */
-dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
-	unsigned long offset, size_t size, int direction)
-{
-	dma_addr_t phys;
-
-	phys = pci_map_page(hwdev, page, offset, size, direction);
-
-	if (phys == 0) {
-		pci_unmap_page(hwdev, phys, size, direction);
-		phys = pci_map_page(hwdev, page, offset, size, direction);
-		/*
-		 * FIXME: If we get 0 again, we should keep this page,
-		 * map another, then free the 0 page.
-		 */
-	}
-
-	return phys;
-}
-
-/**
- * ipath_map_single - a safety wrapper around pci_map_single()
- *
- * Same idea as ipath_map_page().
- */
-dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
-	int direction)
-{
-	dma_addr_t phys;
-
-	phys = pci_map_single(hwdev, ptr, size, direction);
-
-	if (phys == 0) {
-		pci_unmap_single(hwdev, phys, size, direction);
-		phys = pci_map_single(hwdev, ptr, size, direction);
-		/*
-		 * FIXME: If we get 0 again, we should keep this page,
-		 * map another, then free the 0 page.
-		 */
-	}
-
-	return phys;
-}
-
-/**
- * ipath_get_user_pages - lock user pages into memory
- * @start_page: the start page
- * @num_pages: the number of pages
- * @p: the output page structures
- *
- * This function takes a given start page (page aligned user virtual
- * address) and pins it and the following specified number of pages.  For
- * now, num_pages is always 1, but that will probably change at some point
- * (because caller is doing expected sends on a single virtually contiguous
- * buffer, so we can do all pages at once).
- */
-int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
-			 struct page **p)
-{
-	int ret;
-
-	down_write(&current->mm->mmap_sem);
-
-	ret = __ipath_get_user_pages(start_page, num_pages, p);
-
-	up_write(&current->mm->mmap_sem);
-
-	return ret;
-}
-
-void ipath_release_user_pages(struct page **p, size_t num_pages)
-{
-	down_write(&current->mm->mmap_sem);
-
-	__ipath_release_user_pages(p, num_pages, 1);
-
-	current->mm->pinned_vm -= num_pages;
-
-	up_write(&current->mm->mmap_sem);
-}
-
-struct ipath_user_pages_work {
-	struct work_struct work;
-	struct mm_struct *mm;
-	unsigned long num_pages;
-};
-
-static void user_pages_account(struct work_struct *_work)
-{
-	struct ipath_user_pages_work *work =
-		container_of(_work, struct ipath_user_pages_work, work);
-
-	down_write(&work->mm->mmap_sem);
-	work->mm->pinned_vm -= work->num_pages;
-	up_write(&work->mm->mmap_sem);
-	mmput(work->mm);
-	kfree(work);
-}
-
-void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
-{
-	struct ipath_user_pages_work *work;
-	struct mm_struct *mm;
-
-	__ipath_release_user_pages(p, num_pages, 1);
-
-	mm = get_task_mm(current);
-	if (!mm)
-		return;
-
-	work = kmalloc(sizeof(*work), GFP_KERNEL);
-	if (!work)
-		goto bail_mm;
-
-	INIT_WORK(&work->work, user_pages_account);
-	work->mm = mm;
-	work->num_pages = num_pages;
-
-	queue_work(ib_wq, &work->work);
-	return;
-
-bail_mm:
-	mmput(mm);
-	return;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644
index 8c12e3cccc58..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/dmapool.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <linux/io.h>
-#include <linux/uio.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_user_sdma.h"
-
-/* minimum size of header */
-#define IPATH_USER_SDMA_MIN_HEADER_LENGTH	64
-/* expected size of headers (for dma_pool) */
-#define IPATH_USER_SDMA_EXP_HEADER_LENGTH	64
-/* length mask in PBC (lower 11 bits) */
-#define IPATH_PBC_LENGTH_MASK			((1 << 11) - 1)
-
-struct ipath_user_sdma_pkt {
-	u8 naddr;		/* dimension of addr (1..3) ... */
-	u32 counter;		/* sdma pkts queued counter for this entry */
-	u64 added;		/* global descq number of entries */
-
-	struct {
-		u32 offset;			/* offset for kvaddr, addr */
-		u32 length;			/* length in page */
-		u8  put_page;			/* should we put_page? */
-		u8  dma_mapped;			/* is page dma_mapped? */
-		struct page *page;		/* may be NULL (coherent mem) */
-		void *kvaddr;			/* FIXME: only for pio hack */
-		dma_addr_t addr;
-	} addr[4];   /* max pages, any more and we coalesce */
-	struct list_head list;	/* list element */
-};
-
-struct ipath_user_sdma_queue {
-	/*
-	 * pkts sent to dma engine are queued on this
-	 * list head.  the type of the elements of this
-	 * list are struct ipath_user_sdma_pkt...
-	 */
-	struct list_head sent;
-
-	/* headers with expected length are allocated from here... */
-	char header_cache_name[64];
-	struct dma_pool *header_cache;
-
-	/* packets are allocated from the slab cache... */
-	char pkt_slab_name[64];
-	struct kmem_cache *pkt_slab;
-
-	/* as packets go on the queued queue, they are counted... */
-	u32 counter;
-	u32 sent_counter;
-
-	/* dma page table */
-	struct rb_root dma_pages_root;
-
-	/* protect everything above... */
-	struct mutex lock;
-};
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
-{
-	struct ipath_user_sdma_queue *pq =
-		kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
-
-	if (!pq)
-		goto done;
-
-	pq->counter = 0;
-	pq->sent_counter = 0;
-	INIT_LIST_HEAD(&pq->sent);
-
-	mutex_init(&pq->lock);
-
-	snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
-		 "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
-	pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
-					 sizeof(struct ipath_user_sdma_pkt),
-					 0, 0, NULL);
-
-	if (!pq->pkt_slab)
-		goto err_kfree;
-
-	snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
-		 "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
-	pq->header_cache = dma_pool_create(pq->header_cache_name,
-					   dev,
-					   IPATH_USER_SDMA_EXP_HEADER_LENGTH,
-					   4, 0);
-	if (!pq->header_cache)
-		goto err_slab;
-
-	pq->dma_pages_root = RB_ROOT;
-
-	goto done;
-
-err_slab:
-	kmem_cache_destroy(pq->pkt_slab);
-err_kfree:
-	kfree(pq);
-	pq = NULL;
-
-done:
-	return pq;
-}
-
-static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
-				      int i, size_t offset, size_t len,
-				      int put_page, int dma_mapped,
-				      struct page *page,
-				      void *kvaddr, dma_addr_t dma_addr)
-{
-	pkt->addr[i].offset = offset;
-	pkt->addr[i].length = len;
-	pkt->addr[i].put_page = put_page;
-	pkt->addr[i].dma_mapped = dma_mapped;
-	pkt->addr[i].page = page;
-	pkt->addr[i].kvaddr = kvaddr;
-	pkt->addr[i].addr = dma_addr;
-}
-
-static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
-					u32 counter, size_t offset,
-					size_t len, int dma_mapped,
-					struct page *page,
-					void *kvaddr, dma_addr_t dma_addr)
-{
-	pkt->naddr = 1;
-	pkt->counter = counter;
-	ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
-				  kvaddr, dma_addr);
-}
-
-/* we've too many pages in the iovec, coalesce to a single page */
-static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
-				    struct ipath_user_sdma_pkt *pkt,
-				    const struct iovec *iov,
-				    unsigned long niov) {
-	int ret = 0;
-	struct page *page = alloc_page(GFP_KERNEL);
-	void *mpage_save;
-	char *mpage;
-	int i;
-	int len = 0;
-	dma_addr_t dma_addr;
-
-	if (!page) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	mpage = kmap(page);
-	mpage_save = mpage;
-	for (i = 0; i < niov; i++) {
-		int cfur;
-
-		cfur = copy_from_user(mpage,
-				      iov[i].iov_base, iov[i].iov_len);
-		if (cfur) {
-			ret = -EFAULT;
-			goto free_unmap;
-		}
-
-		mpage += iov[i].iov_len;
-		len += iov[i].iov_len;
-	}
-
-	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
-				DMA_TO_DEVICE);
-	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-		ret = -ENOMEM;
-		goto free_unmap;
-	}
-
-	ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
-				  dma_addr);
-	pkt->naddr = 2;
-
-	goto done;
-
-free_unmap:
-	kunmap(page);
-	__free_page(page);
-done:
-	return ret;
-}
-
-/* how many pages in this iovec element? */
-static int ipath_user_sdma_num_pages(const struct iovec *iov)
-{
-	const unsigned long addr  = (unsigned long) iov->iov_base;
-	const unsigned long  len  = iov->iov_len;
-	const unsigned long spage = addr & PAGE_MASK;
-	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
-	return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-/* truncate length to page boundary */
-static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
-{
-	const unsigned long offset = offset_in_page(addr);
-
-	return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
-}
-
-static void ipath_user_sdma_free_pkt_frag(struct device *dev,
-					  struct ipath_user_sdma_queue *pq,
-					  struct ipath_user_sdma_pkt *pkt,
-					  int frag)
-{
-	const int i = frag;
-
-	if (pkt->addr[i].page) {
-		if (pkt->addr[i].dma_mapped)
-			dma_unmap_page(dev,
-				       pkt->addr[i].addr,
-				       pkt->addr[i].length,
-				       DMA_TO_DEVICE);
-
-		if (pkt->addr[i].kvaddr)
-			kunmap(pkt->addr[i].page);
-
-		if (pkt->addr[i].put_page)
-			put_page(pkt->addr[i].page);
-		else
-			__free_page(pkt->addr[i].page);
-	} else if (pkt->addr[i].kvaddr)
-		/* free coherent mem from cache... */
-		dma_pool_free(pq->header_cache,
-			      pkt->addr[i].kvaddr, pkt->addr[i].addr);
-}
-
-/* return number of pages pinned... */
-static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
-				     struct ipath_user_sdma_pkt *pkt,
-				     unsigned long addr, int tlen, int npages)
-{
-	struct page *pages[2];
-	int j;
-	int ret;
-
-	ret = get_user_pages_fast(addr, npages, 0, pages);
-	if (ret != npages) {
-		int i;
-
-		for (i = 0; i < ret; i++)
-			put_page(pages[i]);
-
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	for (j = 0; j < npages; j++) {
-		/* map the pages... */
-		const int flen =
-			ipath_user_sdma_page_length(addr, tlen);
-		dma_addr_t dma_addr =
-			dma_map_page(&dd->pcidev->dev,
-				     pages[j], 0, flen, DMA_TO_DEVICE);
-		unsigned long fofs = offset_in_page(addr);
-
-		if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-			ret = -ENOMEM;
-			goto done;
-		}
-
-		ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
-					  pages[j], kmap(pages[j]),
-					  dma_addr);
-
-		pkt->naddr++;
-		addr += flen;
-		tlen -= flen;
-	}
-
-done:
-	return ret;
-}
-
-static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
-				   struct ipath_user_sdma_queue *pq,
-				   struct ipath_user_sdma_pkt *pkt,
-				   const struct iovec *iov,
-				   unsigned long niov)
-{
-	int ret = 0;
-	unsigned long idx;
-
-	for (idx = 0; idx < niov; idx++) {
-		const int npages = ipath_user_sdma_num_pages(iov + idx);
-		const unsigned long addr = (unsigned long) iov[idx].iov_base;
-
-		ret = ipath_user_sdma_pin_pages(dd, pkt,
-						addr, iov[idx].iov_len,
-						npages);
-		if (ret < 0)
-			goto free_pkt;
-	}
-
-	goto done;
-
-free_pkt:
-	for (idx = 0; idx < pkt->naddr; idx++)
-		ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
-
-done:
-	return ret;
-}
-
-static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
-					struct ipath_user_sdma_queue *pq,
-					struct ipath_user_sdma_pkt *pkt,
-					const struct iovec *iov,
-					unsigned long niov, int npages)
-{
-	int ret = 0;
-
-	if (npages >= ARRAY_SIZE(pkt->addr))
-		ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
-	else
-		ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
-
-	return ret;
-}
-
-/* free a packet list -- return counter value of last packet */
-static void ipath_user_sdma_free_pkt_list(struct device *dev,
-					  struct ipath_user_sdma_queue *pq,
-					  struct list_head *list)
-{
-	struct ipath_user_sdma_pkt *pkt, *pkt_next;
-
-	list_for_each_entry_safe(pkt, pkt_next, list, list) {
-		int i;
-
-		for (i = 0; i < pkt->naddr; i++)
-			ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-		kmem_cache_free(pq->pkt_slab, pkt);
-	}
-}
-
-/*
- * copy headers, coalesce etc -- pq->lock must be held
- *
- * we queue all the packets to list, returning the
- * number of bytes total.  list must be empty initially,
- * as, if there is an error we clean it...
- */
-static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
-				      struct ipath_user_sdma_queue *pq,
-				      struct list_head *list,
-				      const struct iovec *iov,
-				      unsigned long niov,
-				      int maxpkts)
-{
-	unsigned long idx = 0;
-	int ret = 0;
-	int npkts = 0;
-	struct page *page = NULL;
-	__le32 *pbc;
-	dma_addr_t dma_addr;
-	struct ipath_user_sdma_pkt *pkt = NULL;
-	size_t len;
-	size_t nw;
-	u32 counter = pq->counter;
-	int dma_mapped = 0;
-
-	while (idx < niov && npkts < maxpkts) {
-		const unsigned long addr = (unsigned long) iov[idx].iov_base;
-		const unsigned long idx_save = idx;
-		unsigned pktnw;
-		unsigned pktnwc;
-		int nfrags = 0;
-		int npages = 0;
-		int cfur;
-
-		dma_mapped = 0;
-		len = iov[idx].iov_len;
-		nw = len >> 2;
-		page = NULL;
-
-		pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-		if (!pkt) {
-			ret = -ENOMEM;
-			goto free_list;
-		}
-
-		if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
-		    len > PAGE_SIZE || len & 3 || addr & 3) {
-			ret = -EINVAL;
-			goto free_pkt;
-		}
-
-		if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
-			pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
-					     &dma_addr);
-		else
-			pbc = NULL;
-
-		if (!pbc) {
-			page = alloc_page(GFP_KERNEL);
-			if (!page) {
-				ret = -ENOMEM;
-				goto free_pkt;
-			}
-			pbc = kmap(page);
-		}
-
-		cfur = copy_from_user(pbc, iov[idx].iov_base, len);
-		if (cfur) {
-			ret = -EFAULT;
-			goto free_pbc;
-		}
-
-		/*
-		 * this assignment is a bit strange.  it's because the
-		 * the pbc counts the number of 32 bit words in the full
-		 * packet _except_ the first word of the pbc itself...
-		 */
-		pktnwc = nw - 1;
-
-		/*
-		 * pktnw computation yields the number of 32 bit words
-		 * that the caller has indicated in the PBC.  note that
-		 * this is one less than the total number of words that
-		 * goes to the send DMA engine as the first 32 bit word
-		 * of the PBC itself is not counted.  Armed with this count,
-		 * we can verify that the packet is consistent with the
-		 * iovec lengths.
-		 */
-		pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
-		if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
-			ret = -EINVAL;
-			goto free_pbc;
-		}
-
-
-		idx++;
-		while (pktnwc < pktnw && idx < niov) {
-			const size_t slen = iov[idx].iov_len;
-			const unsigned long faddr =
-				(unsigned long) iov[idx].iov_base;
-
-			if (slen & 3 || faddr & 3 || !slen ||
-			    slen > PAGE_SIZE) {
-				ret = -EINVAL;
-				goto free_pbc;
-			}
-
-			npages++;
-			if ((faddr & PAGE_MASK) !=
-			    ((faddr + slen - 1) & PAGE_MASK))
-				npages++;
-
-			pktnwc += slen >> 2;
-			idx++;
-			nfrags++;
-		}
-
-		if (pktnwc != pktnw) {
-			ret = -EINVAL;
-			goto free_pbc;
-		}
-
-		if (page) {
-			dma_addr = dma_map_page(&dd->pcidev->dev,
-						page, 0, len, DMA_TO_DEVICE);
-			if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-				ret = -ENOMEM;
-				goto free_pbc;
-			}
-
-			dma_mapped = 1;
-		}
-
-		ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
-					    page, pbc, dma_addr);
-
-		if (nfrags) {
-			ret = ipath_user_sdma_init_payload(dd, pq, pkt,
-							   iov + idx_save + 1,
-							   nfrags, npages);
-			if (ret < 0)
-				goto free_pbc_dma;
-		}
-
-		counter++;
-		npkts++;
-
-		list_add_tail(&pkt->list, list);
-	}
-
-	ret = idx;
-	goto done;
-
-free_pbc_dma:
-	if (dma_mapped)
-		dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
-free_pbc:
-	if (page) {
-		kunmap(page);
-		__free_page(page);
-	} else
-		dma_pool_free(pq->header_cache, pbc, dma_addr);
-free_pkt:
-	kmem_cache_free(pq->pkt_slab, pkt);
-free_list:
-	ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
-done:
-	return ret;
-}
-
-static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
-						 u32 c)
-{
-	pq->sent_counter = c;
-}
-
-/* try to clean out queue -- needs pq->lock */
-static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
-				       struct ipath_user_sdma_queue *pq)
-{
-	struct list_head free_list;
-	struct ipath_user_sdma_pkt *pkt;
-	struct ipath_user_sdma_pkt *pkt_prev;
-	int ret = 0;
-
-	INIT_LIST_HEAD(&free_list);
-
-	list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
-		s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
-
-		if (descd < 0)
-			break;
-
-		list_move_tail(&pkt->list, &free_list);
-
-		/* one more packet cleaned */
-		ret++;
-	}
-
-	if (!list_empty(&free_list)) {
-		u32 counter;
-
-		pkt = list_entry(free_list.prev,
-				 struct ipath_user_sdma_pkt, list);
-		counter = pkt->counter;
-
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-		ipath_user_sdma_set_complete_counter(pq, counter);
-	}
-
-	return ret;
-}
-
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
-{
-	if (!pq)
-		return;
-
-	kmem_cache_destroy(pq->pkt_slab);
-	dma_pool_destroy(pq->header_cache);
-	kfree(pq);
-}
-
-/* clean descriptor queue, returns > 0 if some elements cleaned */
-static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-	ret = ipath_sdma_make_progress(dd);
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	return ret;
-}
-
-/* we're in close, drain packets so that we can cleanup successfully... */
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-				 struct ipath_user_sdma_queue *pq)
-{
-	int i;
-
-	if (!pq)
-		return;
-
-	for (i = 0; i < 100; i++) {
-		mutex_lock(&pq->lock);
-		if (list_empty(&pq->sent)) {
-			mutex_unlock(&pq->lock);
-			break;
-		}
-		ipath_user_sdma_hwqueue_clean(dd);
-		ipath_user_sdma_queue_clean(dd, pq);
-		mutex_unlock(&pq->lock);
-		msleep(10);
-	}
-
-	if (!list_empty(&pq->sent)) {
-		struct list_head free_list;
-
-		printk(KERN_INFO "drain: lists not empty: forcing!\n");
-		INIT_LIST_HEAD(&free_list);
-		mutex_lock(&pq->lock);
-		list_splice_init(&pq->sent, &free_list);
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
-		mutex_unlock(&pq->lock);
-	}
-}
-
-static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
-					   u64 addr, u64 dwlen, u64 dwoffset)
-{
-	return cpu_to_le64(/* SDmaPhyAddr[31:0] */
-			   ((addr & 0xfffffffcULL) << 32) |
-			   /* SDmaGeneration[1:0] */
-			   ((dd->ipath_sdma_generation & 3ULL) << 30) |
-			   /* SDmaDwordCount[10:0] */
-			   ((dwlen & 0x7ffULL) << 16) |
-			   /* SDmaBufOffset[12:2] */
-			   (dwoffset & 0x7ffULL));
-}
-
-static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
-{
-	return descq | cpu_to_le64(1ULL << 12);
-}
-
-static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
-{
-					      /* last */  /* dma head */
-	return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
-}
-
-static inline __le64 ipath_sdma_make_desc1(u64 addr)
-{
-	/* SDmaPhyAddr[47:32] */
-	return cpu_to_le64(addr >> 32);
-}
-
-static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
-				      struct ipath_user_sdma_pkt *pkt, int idx,
-				      unsigned ofs, u16 tail)
-{
-	const u64 addr = (u64) pkt->addr[idx].addr +
-		(u64) pkt->addr[idx].offset;
-	const u64 dwlen = (u64) pkt->addr[idx].length / 4;
-	__le64 *descqp;
-	__le64 descq0;
-
-	descqp = &dd->ipath_sdma_descq[tail].qw[0];
-
-	descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
-	if (idx == 0)
-		descq0 = ipath_sdma_make_first_desc0(descq0);
-	if (idx == pkt->naddr - 1)
-		descq0 = ipath_sdma_make_last_desc0(descq0);
-
-	descqp[0] = descq0;
-	descqp[1] = ipath_sdma_make_desc1(addr);
-}
-
-/* pq->lock must be held, get packets on the wire... */
-static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
-				     struct ipath_user_sdma_queue *pq,
-				     struct list_head *pktlist)
-{
-	int ret = 0;
-	unsigned long flags;
-	u16 tail;
-
-	if (list_empty(pktlist))
-		return 0;
-
-	if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
-		return -ECOMM;
-
-	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-
-	if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
-		ret = -ECOMM;
-		goto unlock;
-	}
-
-	tail = dd->ipath_sdma_descq_tail;
-	while (!list_empty(pktlist)) {
-		struct ipath_user_sdma_pkt *pkt =
-			list_entry(pktlist->next, struct ipath_user_sdma_pkt,
-				   list);
-		int i;
-		unsigned ofs = 0;
-		u16 dtail = tail;
-
-		if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
-			goto unlock_check_tail;
-
-		for (i = 0; i < pkt->naddr; i++) {
-			ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
-			ofs += pkt->addr[i].length >> 2;
-
-			if (++tail == dd->ipath_sdma_descq_cnt) {
-				tail = 0;
-				++dd->ipath_sdma_generation;
-			}
-		}
-
-		if ((ofs<<2) > dd->ipath_ibmaxlen) {
-			ipath_dbg("packet size %X > ibmax %X, fail\n",
-				ofs<<2, dd->ipath_ibmaxlen);
-			ret = -EMSGSIZE;
-			goto unlock;
-		}
-
-		/*
-		 * if the packet is >= 2KB mtu equivalent, we have to use
-		 * the large buffers, and have to mark each descriptor as
-		 * part of a large buffer packet.
-		 */
-		if (ofs >= IPATH_SMALLBUF_DWORDS) {
-			for (i = 0; i < pkt->naddr; i++) {
-				dd->ipath_sdma_descq[dtail].qw[0] |=
-					cpu_to_le64(1ULL << 14);
-				if (++dtail == dd->ipath_sdma_descq_cnt)
-					dtail = 0;
-			}
-		}
-
-		dd->ipath_sdma_descq_added += pkt->naddr;
-		pkt->added = dd->ipath_sdma_descq_added;
-		list_move_tail(&pkt->list, &pq->sent);
-		ret++;
-	}
-
-unlock_check_tail:
-	/* advance the tail on the chip if necessary */
-	if (dd->ipath_sdma_descq_tail != tail) {
-		wmb();
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
-		dd->ipath_sdma_descq_tail = tail;
-	}
-
-unlock:
-	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	return ret;
-}
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-			   struct ipath_user_sdma_queue *pq,
-			   const struct iovec *iov,
-			   unsigned long dim)
-{
-	int ret = 0;
-	struct list_head list;
-	int npkts = 0;
-
-	INIT_LIST_HEAD(&list);
-
-	mutex_lock(&pq->lock);
-
-	if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
-		ipath_user_sdma_hwqueue_clean(dd);
-		ipath_user_sdma_queue_clean(dd, pq);
-	}
-
-	while (dim) {
-		const int mxp = 8;
-
-		ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
-		if (ret <= 0)
-			goto done_unlock;
-		else {
-			dim -= ret;
-			iov += ret;
-		}
-
-		/* force packets onto the sdma hw queue... */
-		if (!list_empty(&list)) {
-			/*
-			 * lazily clean hw queue.  the 4 is a guess of about
-			 * how many sdma descriptors a packet will take (it
-			 * doesn't have to be perfect).
-			 */
-			if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
-				ipath_user_sdma_hwqueue_clean(dd);
-				ipath_user_sdma_queue_clean(dd, pq);
-			}
-
-			ret = ipath_user_sdma_push_pkts(dd, pq, &list);
-			if (ret < 0)
-				goto done_unlock;
-			else {
-				npkts += ret;
-				pq->counter += ret;
-
-				if (!list_empty(&list))
-					goto done_unlock;
-			}
-		}
-	}
-
-done_unlock:
-	if (!list_empty(&list))
-		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
-	mutex_unlock(&pq->lock);
-
-	return (ret < 0) ? ret : npkts;
-}
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-				  struct ipath_user_sdma_queue *pq)
-{
-	int ret = 0;
-
-	mutex_lock(&pq->lock);
-	ipath_user_sdma_hwqueue_clean(dd);
-	ret = ipath_user_sdma_queue_clean(dd, pq);
-	mutex_unlock(&pq->lock);
-
-	return ret;
-}
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
-{
-	return pq->sent_counter;
-}
-
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
-{
-	return pq->counter;
-}
-
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644
index fc76316c4a58..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/device.h>
-
-struct ipath_user_sdma_queue;
-
-struct ipath_user_sdma_queue *
-ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
-void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
-
-int ipath_user_sdma_writev(struct ipath_devdata *dd,
-			   struct ipath_user_sdma_queue *pq,
-			   const struct iovec *iov,
-			   unsigned long dim);
-
-int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
-				  struct ipath_user_sdma_queue *pq);
-
-void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
-				 struct ipath_user_sdma_queue *pq);
-
-u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
-u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644
index 53f9dcab180d..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ /dev/null
@@ -1,2376 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_mad.h>
-#include <rdma/ib_user_verbs.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/utsname.h>
-#include <linux/rculist.h>
-
-#include "ipath_kernel.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-static unsigned int ib_ipath_qp_table_size = 251;
-module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
-MODULE_PARM_DESC(qp_table_size, "QP table size");
-
-unsigned int ib_ipath_lkey_table_size = 12;
-module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
-		   S_IRUGO);
-MODULE_PARM_DESC(lkey_table_size,
-		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
-
-static unsigned int ib_ipath_max_pds = 0xFFFF;
-module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_pds,
-		 "Maximum number of protection domains to support");
-
-static unsigned int ib_ipath_max_ahs = 0xFFFF;
-module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-
-unsigned int ib_ipath_max_cqes = 0x2FFFF;
-module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqes,
-		 "Maximum number of completion queue entries to support");
-
-unsigned int ib_ipath_max_cqs = 0x1FFFF;
-module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
-
-unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
-module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
-		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-
-unsigned int ib_ipath_max_qps = 16384;
-module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
-
-unsigned int ib_ipath_max_sges = 0x60;
-module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
-
-unsigned int ib_ipath_max_mcast_grps = 16384;
-module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
-		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_grps,
-		 "Maximum number of multicast groups to support");
-
-unsigned int ib_ipath_max_mcast_qp_attached = 16;
-module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_mcast_qp_attached,
-		 "Maximum number of attached QPs to support");
-
-unsigned int ib_ipath_max_srqs = 1024;
-module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
-
-unsigned int ib_ipath_max_srq_sges = 128;
-module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
-
-unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
-module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
-		   uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
-
-static unsigned int ib_ipath_disable_sma;
-module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(disable_sma, "Disable the SMA");
-
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; ipath_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = IPATH_POST_RECV_OK,
-	[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
-	    IPATH_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
-	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-	[IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
-	    IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
-};
-
-struct ipath_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
-/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
-	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-	[IB_WR_SEND] = IB_WC_SEND,
-	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
- * System image GUID.
- */
-static __be64 sys_image_guid;
-
-/**
- * ipath_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- */
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		memcpy(sge->vaddr, data, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		data += len;
-		length -= len;
-	}
-}
-
-/**
- * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		length -= len;
-	}
-}
-
-/*
- * Count the number of DMA descriptors needed to send length bytes of data.
- * Don't modify the ipath_sge_state to get the count.
- * Return zero if any of the segments is not aligned.
- */
-static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sg_list = ss->sg_list;
-	struct ipath_sge sge = ss->sge;
-	u8 num_sge = ss->num_sge;
-	u32 ndesc = 1;	/* count the header */
-
-	while (length) {
-		u32 len = sge.length;
-
-		if (len > length)
-			len = length;
-		if (len > sge.sge_length)
-			len = sge.sge_length;
-		BUG_ON(len == 0);
-		if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
-		    (len != length && (len & (sizeof(u32) - 1)))) {
-			ndesc = 0;
-			break;
-		}
-		ndesc++;
-		sge.vaddr += len;
-		sge.length -= len;
-		sge.sge_length -= len;
-		if (sge.sge_length == 0) {
-			if (--num_sge)
-				sge = *sg_list++;
-		} else if (sge.length == 0 && sge.mr != NULL) {
-			if (++sge.n >= IPATH_SEGSZ) {
-				if (++sge.m >= sge.mr->mapsz)
-					break;
-				sge.n = 0;
-			}
-			sge.vaddr =
-				sge.mr->map[sge.m]->segs[sge.n].vaddr;
-			sge.length =
-				sge.mr->map[sge.m]->segs[sge.n].length;
-		}
-		length -= len;
-	}
-	return ndesc;
-}
-
-/*
- * Copy from the SGEs to the data buffer.
- */
-static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
-				u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	while (length) {
-		u32 len = sge->length;
-
-		if (len > length)
-			len = length;
-		if (len > sge->sge_length)
-			len = sge->sge_length;
-		BUG_ON(len == 0);
-		memcpy(data, sge->vaddr, len);
-		sge->vaddr += len;
-		sge->length -= len;
-		sge->sge_length -= len;
-		if (sge->sge_length == 0) {
-			if (--ss->num_sge)
-				*sge = *ss->sg_list++;
-		} else if (sge->length == 0 && sge->mr != NULL) {
-			if (++sge->n >= IPATH_SEGSZ) {
-				if (++sge->m >= sge->mr->mapsz)
-					break;
-				sge->n = 0;
-			}
-			sge->vaddr =
-				sge->mr->map[sge->m]->segs[sge->n].vaddr;
-			sge->length =
-				sge->mr->map[sge->m]->segs[sge->n].length;
-		}
-		data += len;
-		length -= len;
-	}
-}
-
-/**
- * ipath_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-	struct ipath_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	int ret;
-	unsigned long flags;
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (qp->ibqp.qp_type != IB_QPT_SMI &&
-	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
-		ret = -ENETDOWN;
-		goto bail;
-	}
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
-		goto bail_inval;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge)
-		goto bail_inval;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type == IB_QPT_UD) {
-		/* Check UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			goto bail_inval;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			goto bail_inval;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		goto bail_inval;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
-					   &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval;
-	} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
-		goto bail_inval;
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	ret = 0;
-	goto bail;
-
-bail_inval:
-	ret = -EINVAL;
-bail:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * ipath_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			   struct ib_send_wr **bad_wr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	int err = 0;
-
-	for (; wr; wr = wr->next) {
-		err = ipath_post_one_send(qp, wr);
-		if (err) {
-			*bad_wr = wr;
-			goto bail;
-		}
-	}
-
-	/* Try to do the send work in the caller's context. */
-	ipath_do_send((unsigned long) qp);
-
-bail:
-	return err;
-}
-
-/**
- * ipath_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct ipath_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_qp_rcv - processing an incoming packet on a QP
- * @dev: the device the packet came on
- * @hdr: the packet header
- * @has_grh: true if the packet has a GRH
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
- *
- * This is called from ipath_ib_rcv() to process an incoming packet
- * for the given QP.
- * Called at interrupt level.
- */
-static void ipath_qp_rcv(struct ipath_ibdev *dev,
-			 struct ipath_ib_header *hdr, int has_grh,
-			 void *data, u32 tlen, struct ipath_qp *qp)
-{
-	/* Check for valid receive state. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
-		dev->n_pkt_drops++;
-		return;
-	}
-
-	switch (qp->ibqp.qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (ib_ipath_disable_sma)
-			break;
-		/* FALLTHROUGH */
-	case IB_QPT_UD:
-		ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	case IB_QPT_RC:
-		ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	case IB_QPT_UC:
-		ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
-		break;
-
-	default:
-		break;
-	}
-}
-
-/**
- * ipath_ib_rcv - process an incoming packet
- * @arg: the device pointer
- * @rhdr: the header of the packet
- * @data: the packet data
- * @tlen: the packet length
- *
- * This is called from ipath_kreceive() to process an incoming packet at
- * interrupt level. Tlen is the length of the header + data + CRC in bytes.
- */
-void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
-		  u32 tlen)
-{
-	struct ipath_ib_header *hdr = rhdr;
-	struct ipath_other_headers *ohdr;
-	struct ipath_qp *qp;
-	u32 qp_num;
-	int lnh;
-	u8 opcode;
-	u16 lid;
-
-	if (unlikely(dev == NULL))
-		goto bail;
-
-	if (unlikely(tlen < 24)) {	/* LRH+BTH+CRC */
-		dev->rcv_errors++;
-		goto bail;
-	}
-
-	/* Check for a valid destination LID (see ch. 7.11.1). */
-	lid = be16_to_cpu(hdr->lrh[1]);
-	if (lid < IPATH_MULTICAST_LID_BASE) {
-		lid &= ~((1 << dev->dd->ipath_lmc) - 1);
-		if (unlikely(lid != dev->dd->ipath_lid)) {
-			dev->rcv_errors++;
-			goto bail;
-		}
-	}
-
-	/* Check for GRH */
-	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-	if (lnh == IPATH_LRH_BTH)
-		ohdr = &hdr->u.oth;
-	else if (lnh == IPATH_LRH_GRH)
-		ohdr = &hdr->u.l.oth;
-	else {
-		dev->rcv_errors++;
-		goto bail;
-	}
-
-	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
-	dev->opstats[opcode].n_bytes += tlen;
-	dev->opstats[opcode].n_packets++;
-
-	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
-	if (qp_num == IPATH_MULTICAST_QPN) {
-		struct ipath_mcast *mcast;
-		struct ipath_mcast_qp *p;
-
-		if (lnh != IPATH_LRH_GRH) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
-		if (mcast == NULL) {
-			dev->n_pkt_drops++;
-			goto bail;
-		}
-		dev->n_multicast_rcv++;
-		list_for_each_entry_rcu(p, &mcast->qp_list, list)
-			ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
-		/*
-		 * Notify ipath_multicast_detach() if it is waiting for us
-		 * to finish.
-		 */
-		if (atomic_dec_return(&mcast->refcount) <= 1)
-			wake_up(&mcast->wait);
-	} else {
-		qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
-		if (qp) {
-			dev->n_unicast_rcv++;
-			ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-				     tlen, qp);
-			/*
-			 * Notify ipath_destroy_qp() if it is waiting
-			 * for us to finish.
-			 */
-			if (atomic_dec_and_test(&qp->refcount))
-				wake_up(&qp->wait);
-		} else
-			dev->n_pkt_drops++;
-	}
-
-bail:;
-}
-
-/**
- * ipath_ib_timer - verbs timer
- * @arg: the device pointer
- *
- * This is called from ipath_do_rcv_timer() at interrupt level to check for
- * QPs which need retransmits and to collect performance numbers.
- */
-static void ipath_ib_timer(struct ipath_ibdev *dev)
-{
-	struct ipath_qp *resend = NULL;
-	struct ipath_qp *rnr = NULL;
-	struct list_head *last;
-	struct ipath_qp *qp;
-	unsigned long flags;
-
-	if (dev == NULL)
-		return;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	/* Start filling the next pending queue. */
-	if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
-		dev->pending_index = 0;
-	/* Save any requests still in the new queue, they have timed out. */
-	last = &dev->pending[dev->pending_index];
-	while (!list_empty(last)) {
-		qp = list_entry(last->next, struct ipath_qp, timerwait);
-		list_del_init(&qp->timerwait);
-		qp->timer_next = resend;
-		resend = qp;
-		atomic_inc(&qp->refcount);
-	}
-	last = &dev->rnrwait;
-	if (!list_empty(last)) {
-		qp = list_entry(last->next, struct ipath_qp, timerwait);
-		if (--qp->s_rnr_timeout == 0) {
-			do {
-				list_del_init(&qp->timerwait);
-				qp->timer_next = rnr;
-				rnr = qp;
-				atomic_inc(&qp->refcount);
-				if (list_empty(last))
-					break;
-				qp = list_entry(last->next, struct ipath_qp,
-						timerwait);
-			} while (qp->s_rnr_timeout == 0);
-		}
-	}
-	/*
-	 * We should only be in the started state if pma_sample_start != 0
-	 */
-	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
-	    --dev->pma_sample_start == 0) {
-		dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-		ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
-					&dev->ipath_rword,
-					&dev->ipath_spkts,
-					&dev->ipath_rpkts,
-					&dev->ipath_xmit_wait);
-	}
-	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
-		if (dev->pma_sample_interval == 0) {
-			u64 ta, tb, tc, td, te;
-
-			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-			ipath_snapshot_counters(dev->dd, &ta, &tb,
-						&tc, &td, &te);
-
-			dev->ipath_sword = ta - dev->ipath_sword;
-			dev->ipath_rword = tb - dev->ipath_rword;
-			dev->ipath_spkts = tc - dev->ipath_spkts;
-			dev->ipath_rpkts = td - dev->ipath_rpkts;
-			dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
-		} else {
-			dev->pma_sample_interval--;
-		}
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	/* XXX What if timer fires again while this is running? */
-	while (resend != NULL) {
-		qp = resend;
-		resend = qp->timer_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_last != qp->s_tail &&
-		    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
-			dev->n_timeouts++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1);
-		}
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-	while (rnr != NULL) {
-		qp = rnr;
-		rnr = qp->timer_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-}
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	sge->vaddr += length;
-	sge->length -= length;
-	sge->sge_length -= length;
-	if (sge->sge_length == 0) {
-		if (--ss->num_sge)
-			*sge = *ss->sg_list++;
-	} else if (sge->length == 0 && sge->mr != NULL) {
-		if (++sge->n >= IPATH_SEGSZ) {
-			if (++sge->m >= sge->mr->mapsz)
-				return;
-			sge->n = 0;
-		}
-		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-	}
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-		    u32 length, unsigned flush_wc)
-{
-	u32 extra = 0;
-	u32 data = 0;
-	u32 last;
-
-	while (1) {
-		u32 len = ss->sge.length;
-		u32 off;
-
-		if (len > length)
-			len = length;
-		if (len > ss->sge.sge_length)
-			len = ss->sge.sge_length;
-		BUG_ON(len == 0);
-		/* If the source address is not aligned, try to align it. */
-		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-		if (off) {
-			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-					    ~(sizeof(u32) - 1));
-			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-			u32 y;
-
-			y = sizeof(u32) - off;
-			if (len > y)
-				len = y;
-			if (len + extra >= sizeof(u32)) {
-				data |= set_upper_bits(v, extra *
-						       BITS_PER_BYTE);
-				len = sizeof(u32) - extra;
-				if (len == length) {
-					last = data;
-					break;
-				}
-				__raw_writel(data, piobuf);
-				piobuf++;
-				extra = 0;
-				data = 0;
-			} else {
-				/* Clear unused upper bytes */
-				data |= clear_upper_bytes(v, len, extra);
-				if (len == length) {
-					last = data;
-					break;
-				}
-				extra += len;
-			}
-		} else if (extra) {
-			/* Source address is aligned. */
-			u32 *addr = (u32 *) ss->sge.vaddr;
-			int shift = extra * BITS_PER_BYTE;
-			int ushift = 32 - shift;
-			u32 l = len;
-
-			while (l >= sizeof(u32)) {
-				u32 v = *addr;
-
-				data |= set_upper_bits(v, shift);
-				__raw_writel(data, piobuf);
-				data = get_upper_bits(v, ushift);
-				piobuf++;
-				addr++;
-				l -= sizeof(u32);
-			}
-			/*
-			 * We still have 'extra' number of bytes leftover.
-			 */
-			if (l) {
-				u32 v = *addr;
-
-				if (l + extra >= sizeof(u32)) {
-					data |= set_upper_bits(v, shift);
-					len -= l + extra - sizeof(u32);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					__raw_writel(data, piobuf);
-					piobuf++;
-					extra = 0;
-					data = 0;
-				} else {
-					/* Clear unused upper bytes */
-					data |= clear_upper_bytes(v, l,
-								  extra);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					extra += l;
-				}
-			} else if (len == length) {
-				last = data;
-				break;
-			}
-		} else if (len == length) {
-			u32 w;
-
-			/*
-			 * Need to round up for the last dword in the
-			 * packet.
-			 */
-			w = (len + 3) >> 2;
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-			piobuf += w - 1;
-			last = ((u32 *) ss->sge.vaddr)[w - 1];
-			break;
-		} else {
-			u32 w = len >> 2;
-
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
-			piobuf += w;
-
-			extra = len & (sizeof(u32) - 1);
-			if (extra) {
-				u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-				/* Clear unused upper bytes */
-				data = clear_upper_bytes(v, extra, 0);
-			}
-		}
-		update_sge(ss, len);
-		length -= len;
-	}
-	/* Update address before sending packet. */
-	update_sge(ss, length);
-	if (flush_wc) {
-		/* must flush early everything before trigger word */
-		ipath_flush_wc();
-		__raw_writel(last, piobuf);
-		/* be sure trigger word is written */
-		ipath_flush_wc();
-	} else
-		__raw_writel(last, piobuf);
-}
-
-/*
- * Convert IB rate to delay multiplier.
- */
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
-{
-	switch (rate) {
-	case IB_RATE_2_5_GBPS: return 8;
-	case IB_RATE_5_GBPS:   return 4;
-	case IB_RATE_10_GBPS:  return 2;
-	case IB_RATE_20_GBPS:  return 1;
-	default:	       return 0;
-	}
-}
-
-/*
- * Convert delay multiplier to IB rate
- */
-static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
-{
-	switch (mult) {
-	case 8:  return IB_RATE_2_5_GBPS;
-	case 4:  return IB_RATE_5_GBPS;
-	case 2:  return IB_RATE_10_GBPS;
-	case 1:  return IB_RATE_20_GBPS;
-	default: return IB_RATE_PORT_CURRENT;
-	}
-}
-
-static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
-{
-	struct ipath_verbs_txreq *tx = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (!list_empty(&dev->txreq_free)) {
-		struct list_head *l = dev->txreq_free.next;
-
-		list_del(l);
-		tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-	return tx;
-}
-
-static inline void put_txreq(struct ipath_ibdev *dev,
-			     struct ipath_verbs_txreq *tx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	list_add(&tx->txreq.list, &dev->txreq_free);
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-}
-
-static void sdma_complete(void *cookie, int status)
-{
-	struct ipath_verbs_txreq *tx = cookie;
-	struct ipath_qp *qp = tx->qp;
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	unsigned long flags;
-	enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
-		IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (tx->wqe)
-			ipath_send_complete(qp, tx->wqe, ibs);
-		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-		     qp->s_last != qp->s_head) ||
-		    (qp->s_flags & IPATH_S_WAIT_DMA))
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		wake_up(&qp->wait_dma);
-	} else if (tx->wqe) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		ipath_send_complete(qp, tx->wqe, ibs);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-	}
-
-	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
-		kfree(tx->txreq.map_addr);
-	put_txreq(dev, tx);
-
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-}
-
-static void decrement_dma_busy(struct ipath_qp *qp)
-{
-	unsigned long flags;
-
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
-		     qp->s_last != qp->s_head) ||
-		    (qp->s_flags & IPATH_S_WAIT_DMA))
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		wake_up(&qp->wait_dma);
-	}
-}
-
-/*
- * Compute the number of clock cycles of delay before sending the next packet.
- * The multipliers reflect the number of clocks for the fastest rate so
- * one tick at 4xDDR is 8 ticks at 1xSDR.
- * If the destination port will take longer to receive a packet than
- * the outgoing link can send it, we need to delay sending the next packet
- * by the difference in time it takes the receiver to receive and the sender
- * to send this packet.
- * Note that this delay is always correct for UC and RC but not always
- * optimal for UD. For UD, the destination HCA can be different for each
- * packet, in which case, we could send packets to a different destination
- * while "waiting" for the delay. The overhead for doing this without
- * HW support is more than just paying the cost of delaying some packets
- * unnecessarily.
- */
-static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
-{
-	return (rcv_mult > snd_mult) ?
-		(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
-}
-
-static int ipath_verbs_send_dma(struct ipath_qp *qp,
-				struct ipath_ib_header *hdr, u32 hdrwords,
-				struct ipath_sge_state *ss, u32 len,
-				u32 plen, u32 dwords)
-{
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ipath_devdata *dd = dev->dd;
-	struct ipath_verbs_txreq *tx;
-	u32 *piobuf;
-	u32 control;
-	u32 ndesc;
-	int ret;
-
-	tx = qp->s_tx;
-	if (tx) {
-		qp->s_tx = NULL;
-		/* resend previously constructed packet */
-		atomic_inc(&qp->s_dma_busy);
-		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
-		if (ret) {
-			qp->s_tx = tx;
-			decrement_dma_busy(qp);
-		}
-		goto bail;
-	}
-
-	tx = get_txreq(dev);
-	if (!tx) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Get the saved delay count we computed for the previous packet
-	 * and save the delay count for this packet to be used next time
-	 * we get here.
-	 */
-	control = qp->s_pkt_delay;
-	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-	tx->qp = qp;
-	atomic_inc(&qp->refcount);
-	tx->wqe = qp->s_wqe;
-	tx->txreq.callback = sdma_complete;
-	tx->txreq.callback_cookie = tx;
-	tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
-		IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
-	if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
-		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
-
-	/* VL15 packets bypass credit check */
-	if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
-		control |= 1ULL << 31;
-		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
-	}
-
-	if (len) {
-		/*
-		 * Don't try to DMA if it takes more descriptors than
-		 * the queue holds.
-		 */
-		ndesc = ipath_count_sge(ss, len);
-		if (ndesc >= dd->ipath_sdma_descq_cnt)
-			ndesc = 0;
-	} else
-		ndesc = 1;
-	if (ndesc) {
-		tx->hdr.pbc[0] = cpu_to_le32(plen);
-		tx->hdr.pbc[1] = cpu_to_le32(control);
-		memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
-		tx->txreq.sg_count = ndesc;
-		tx->map_len = (hdrwords + 2) << 2;
-		tx->txreq.map_addr = &tx->hdr;
-		atomic_inc(&qp->s_dma_busy);
-		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
-		if (ret) {
-			/* save ss and length in dwords */
-			tx->ss = ss;
-			tx->len = dwords;
-			qp->s_tx = tx;
-			decrement_dma_busy(qp);
-		}
-		goto bail;
-	}
-
-	/* Allocate a buffer and copy the header and payload to it. */
-	tx->map_len = (plen + 1) << 2;
-	piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto err_tx;
-	}
-	tx->txreq.map_addr = piobuf;
-	tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
-	tx->txreq.sg_count = 1;
-
-	*piobuf++ = (__force u32) cpu_to_le32(plen);
-	*piobuf++ = (__force u32) cpu_to_le32(control);
-	memcpy(piobuf, hdr, hdrwords << 2);
-	ipath_copy_from_sge(piobuf + hdrwords, ss, len);
-
-	atomic_inc(&qp->s_dma_busy);
-	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
-	/*
-	 * If we couldn't queue the DMA request, save the info
-	 * and try again later rather than destroying the
-	 * buffer and undoing the side effects of the copy.
-	 */
-	if (ret) {
-		tx->ss = NULL;
-		tx->len = 0;
-		qp->s_tx = tx;
-		decrement_dma_busy(qp);
-	}
-	dev->n_unaligned++;
-	goto bail;
-
-err_tx:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-	put_txreq(dev, tx);
-bail:
-	return ret;
-}
-
-static int ipath_verbs_send_pio(struct ipath_qp *qp,
-				struct ipath_ib_header *ibhdr, u32 hdrwords,
-				struct ipath_sge_state *ss, u32 len,
-				u32 plen, u32 dwords)
-{
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-	u32 *hdr = (u32 *) ibhdr;
-	u32 __iomem *piobuf;
-	unsigned flush_wc;
-	u32 control;
-	int ret;
-	unsigned long flags;
-
-	piobuf = ipath_getpiobuf(dd, plen, NULL);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Get the saved delay count we computed for the previous packet
-	 * and save the delay count for this packet to be used next time
-	 * we get here.
-	 */
-	control = qp->s_pkt_delay;
-	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
-
-	/* VL15 packets bypass credit check */
-	if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
-		control |= 1ULL << 31;
-
-	/*
-	 * Write the length to the control qword plus any needed flags.
-	 * We have to flush after the PBC for correctness on some cpus
-	 * or WC buffer can be written out of order.
-	 */
-	writeq(((u64) control << 32) | plen, piobuf);
-	piobuf += 2;
-
-	flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
-	if (len == 0) {
-		/*
-		 * If there is just the header portion, must flush before
-		 * writing last word of header for correctness, and after
-		 * the last header word (trigger word).
-		 */
-		if (flush_wc) {
-			ipath_flush_wc();
-			__iowrite32_copy(piobuf, hdr, hdrwords - 1);
-			ipath_flush_wc();
-			__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-			ipath_flush_wc();
-		} else
-			__iowrite32_copy(piobuf, hdr, hdrwords);
-		goto done;
-	}
-
-	if (flush_wc)
-		ipath_flush_wc();
-	__iowrite32_copy(piobuf, hdr, hdrwords);
-	piobuf += hdrwords;
-
-	/* The common case is aligned and contained in one segment. */
-	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-		u32 *addr = (u32 *) ss->sge.vaddr;
-
-		/* Update address before sending packet. */
-		update_sge(ss, len);
-		if (flush_wc) {
-			__iowrite32_copy(piobuf, addr, dwords - 1);
-			/* must flush early everything before trigger word */
-			ipath_flush_wc();
-			__raw_writel(addr[dwords - 1], piobuf + dwords - 1);
-			/* be sure trigger word is written */
-			ipath_flush_wc();
-		} else
-			__iowrite32_copy(piobuf, addr, dwords);
-		goto done;
-	}
-	copy_io(piobuf, ss, len, flush_wc);
-done:
-	if (qp->s_wqe) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-	}
-	ret = 0;
-bail:
-	return ret;
-}
-
-/**
- * ipath_verbs_send - send a packet
- * @qp: the QP to send on
- * @hdr: the packet header
- * @hdrwords: the number of 32-bit words in the header
- * @ss: the SGE to send
- * @len: the length of the packet in bytes
- */
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-		     u32 hdrwords, struct ipath_sge_state *ss, u32 len)
-{
-	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
-	u32 plen;
-	int ret;
-	u32 dwords = (len + 3) >> 2;
-
-	/*
-	 * Calculate the send buffer trigger address.
-	 * The +1 counts for the pbc control dword following the pbc length.
-	 */
-	plen = hdrwords + dwords + 1;
-
-	/*
-	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-	 * can defer SDMA restart until link goes ACTIVE without
-	 * worrying about just how we got there.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_SMI ||
-	    !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
-					   plen, dwords);
-	else
-		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
-					   plen, dwords);
-
-	return ret;
-}
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-			    u64 *rwords, u64 *spkts, u64 *rpkts,
-			    u64 *xmit_wait)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ret = -EINVAL;
-		goto bail;
-	}
-	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_get_counters(struct ipath_devdata *dd,
-		       struct ipath_verbs_counters *cntrs)
-{
-	struct ipath_cregs const *crp = dd->ipath_cregs;
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ret = -EINVAL;
-		goto bail;
-	}
-	cntrs->symbol_error_counter =
-		ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
-	cntrs->link_error_recovery_counter =
-		ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
-	/*
-	 * The link downed counter counts when the other side downs the
-	 * connection.  We add in the number of times we downed the link
-	 * due to local link integrity errors to compensate.
-	 */
-	cntrs->link_downed_counter =
-		ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
-	cntrs->port_rcv_errors =
-		ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
-		ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
-		ipath_snap_cntr(dd, crp->cr_portovflcnt) +
-		ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
-		ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
-		ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
-		ipath_snap_cntr(dd, crp->cr_erricrccnt) +
-		ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
-		ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, crp->cr_badformatcnt) +
-		dd->ipath_rxfc_unsupvl_errs;
-	if (crp->cr_rxotherlocalphyerrcnt)
-		cntrs->port_rcv_errors +=
-			ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
-	if (crp->cr_rxvlerrcnt)
-		cntrs->port_rcv_errors +=
-			ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
-	cntrs->port_rcv_remphys_errors =
-		ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
-	cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
-	cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
-	cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
-	cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
-	cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors =
-		crp->cr_locallinkintegrityerrcnt ?
-		ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
-		((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		 dd->ipath_lli_errs : dd->ipath_lli_errors);
-	cntrs->excessive_buffer_overrun_errors =
-		crp->cr_excessbufferovflcnt ?
-		ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
-		dd->ipath_overrun_thresh_errs;
-	cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
-		ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_ib_piobufavail - callback when a PIO buffer is available
- * @arg: the device pointer
- *
- * This is called from ipath_intr() at interrupt level when a PIO buffer is
- * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just restart the send tasklet and
- * return zero).
- */
-int ipath_ib_piobufavail(struct ipath_ibdev *dev)
-{
-	struct list_head *list;
-	struct ipath_qp *qplist;
-	struct ipath_qp *qp;
-	unsigned long flags;
-
-	if (dev == NULL)
-		goto bail;
-
-	list = &dev->piowait;
-	qplist = NULL;
-
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	while (!list_empty(list)) {
-		qp = list_entry(list->next, struct ipath_qp, piowait);
-		list_del_init(&qp->piowait);
-		qp->pio_next = qplist;
-		qplist = qp;
-		atomic_inc(&qp->refcount);
-	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
-
-	while (qplist != NULL) {
-		qp = qplist;
-		qplist = qp->pio_next;
-
-		spin_lock_irqsave(&qp->s_lock, flags);
-		if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
-			ipath_schedule_send(qp);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-
-		/* Notify ipath_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-
-bail:
-	return 0;
-}
-
-static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			      struct ib_udata *uhw)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
-	props->vendor_part_id = dev->dd->ipath_deviceid;
-	props->hw_ver = dev->dd->ipath_pcirev;
-
-	props->sys_image_guid = dev->sys_image_guid;
-
-	props->max_mr_size = ~0ull;
-	props->max_qp = ib_ipath_max_qps;
-	props->max_qp_wr = ib_ipath_max_qp_wrs;
-	props->max_sge = ib_ipath_max_sges;
-	props->max_sge_rd = ib_ipath_max_sges;
-	props->max_cq = ib_ipath_max_cqs;
-	props->max_ah = ib_ipath_max_ahs;
-	props->max_cqe = ib_ipath_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = ib_ipath_max_pds;
-	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = ib_ipath_max_srqs;
-	props->max_srq_wr = ib_ipath_max_srq_wrs;
-	props->max_srq_sge = ib_ipath_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = ipath_get_npkeys(dev->dd);
-	props->max_mcast_grp = ib_ipath_max_mcast_grps;
-	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
-}
-
-const u8 ipath_cvt_physportstate[32] = {
-	[INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
-	[INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
-	[INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
-	[INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
-	[INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
-	[INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
-	[INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
-		IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
-		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
-	[0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
-};
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
-{
-	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-static int ipath_query_port(struct ib_device *ibdev,
-			    u8 port, struct ib_port_attr *props)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_devdata *dd = dev->dd;
-	enum ib_mtu mtu;
-	u16 lid = dd->ipath_lid;
-	u64 ibcstat;
-
-	memset(props, 0, sizeof(*props));
-	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
-	props->lmc = dd->ipath_lmc;
-	props->sm_lid = dev->sm_lid;
-	props->sm_sl = dev->sm_sl;
-	ibcstat = dd->ipath_lastibcstat;
-	/* map LinkState to IB portinfo values.  */
-	props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
-
-	/* See phys_state_show() */
-	props->phys_state = /* MEA: assumes shift == 0 */
-		ipath_cvt_physportstate[dd->ipath_lastibcstat &
-		dd->ibcs_lts_mask];
-	props->port_cap_flags = dev->port_cap_flags;
-	props->gid_tbl_len = 1;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = ipath_get_npkeys(dd);
-	props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
-		dev->z_pkey_violations;
-	props->qkey_viol_cntr = dev->qkey_violations;
-	props->active_width = dd->ipath_link_width_active;
-	/* See rate_show() */
-	props->active_speed = dd->ipath_link_speed_active;
-	props->max_vl_num = 1;		/* VLCap = VL0 */
-	props->init_type_reply = 0;
-
-	props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
-	switch (dd->ipath_ibmtu) {
-	case 4096:
-		mtu = IB_MTU_4096;
-		break;
-	case 2048:
-		mtu = IB_MTU_2048;
-		break;
-	case 1024:
-		mtu = IB_MTU_1024;
-		break;
-	case 512:
-		mtu = IB_MTU_512;
-		break;
-	case 256:
-		mtu = IB_MTU_256;
-		break;
-	default:
-		mtu = IB_MTU_2048;
-	}
-	props->active_mtu = mtu;
-	props->subnet_timeout = dev->subnet_timeout;
-
-	return 0;
-}
-
-static int ipath_modify_device(struct ib_device *device,
-			       int device_modify_mask,
-			       struct ib_device_modify *device_modify)
-{
-	int ret;
-
-	if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
-				   IB_DEVICE_MODIFY_NODE_DESC)) {
-		ret = -EOPNOTSUPP;
-		goto bail;
-	}
-
-	if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
-		memcpy(device->node_desc, device_modify->node_desc, 64);
-
-	if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
-		to_idev(device)->sys_image_guid =
-			cpu_to_be64(device_modify->sys_image_guid);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static int ipath_modify_port(struct ib_device *ibdev,
-			     u8 port, int port_modify_mask,
-			     struct ib_port_modify *props)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-
-	dev->port_cap_flags |= props->set_port_cap_mask;
-	dev->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		dev->qkey_violations = 0;
-	return 0;
-}
-
-static int ipath_query_gid(struct ib_device *ibdev, u8 port,
-			   int index, union ib_gid *gid)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	if (index >= 1) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	gid->global.subnet_prefix = dev->gid_prefix;
-	gid->global.interface_id = dev->dd->ipath_guid;
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
-				    struct ib_ucontext *context,
-				    struct ib_udata *udata)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.	Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof *pd, GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == ib_ipath_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
-}
-
-static int ipath_dealloc_pd(struct ib_pd *ibpd)
-{
-	struct ipath_pd *pd = to_ipd(ibpd);
-	struct ipath_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
-
-	kfree(pd);
-
-	return 0;
-}
-
-/**
- * ipath_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
-				     struct ib_ah_attr *ah_attr)
-{
-	struct ipath_ah *ah;
-	struct ib_ah *ret;
-	struct ipath_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
-
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != IPATH_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (ah_attr->dlid == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > pd->device->phys_port_cnt) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
-
-	ret = &ah->ibah;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int ipath_destroy_ah(struct ib_ah *ibah)
-{
-	struct ipath_ibdev *dev = to_idev(ibah->device);
-	struct ipath_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct ipath_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-	ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
-
-	return 0;
-}
-
-/**
- * ipath_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_get_npkeys(struct ipath_devdata *dd)
-{
-	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-/**
- * ipath_get_pkey - return the indexed PKEY from the port PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-	unsigned ret;
-
-	/* always a kernel port, no locking needed */
-	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-		ret = 0;
-	else
-		ret = dd->ipath_pd[0]->port_pkeys[index];
-
-	return ret;
-}
-
-static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-			    u16 *pkey)
-{
-	struct ipath_ibdev *dev = to_idev(ibdev);
-	int ret;
-
-	if (index >= ipath_get_npkeys(dev->dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = ipath_get_pkey(dev->dd, index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the InfiniPath driver
- */
-
-static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
-						struct ib_udata *udata)
-{
-	struct ipath_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof *context, GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int ipath_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev);
-
-static void __verbs_timer(unsigned long arg)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-	/* Handle verbs layer timeouts. */
-	ipath_ib_timer(dd->verbs_dev);
-
-	mod_timer(&dd->verbs_timer, jiffies + 1);
-}
-
-static int enable_timer(struct ipath_devdata *dd)
-{
-	/*
-	 * Early chips had a design flaw where the chip and kernel idea
-	 * of the tail register don't always agree, and therefore we won't
-	 * get an interrupt on the next packet received.
-	 * If the board supports per packet receive interrupts, use it.
-	 * Otherwise, the timer function periodically checks for packets
-	 * to cover this case.
-	 * Either way, the timer is needed for verbs layer related
-	 * processing.
-	 */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-				 0x2074076542310ULL);
-		/* Enable GPIO bit 2 interrupt */
-		dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-	}
-
-	setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
-
-	dd->verbs_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_timer);
-
-	return 0;
-}
-
-static int disable_timer(struct ipath_devdata *dd)
-{
-	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                /* Disable GPIO bit 2 interrupt */
-		dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 dd->ipath_gpio_mask);
-		/*
-		 * We might want to undo changes to debugportselect,
-		 * but how?
-		 */
-	}
-
-	del_timer_sync(&dd->verbs_timer);
-
-	return 0;
-}
-
-static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
-			        struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = ipath_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-	return 0;
-}
-
-/**
- * ipath_register_ib_device - register our device with the infiniband core
- * @dd: the device data structure
- * Return the allocated ipath_ibdev pointer or NULL on error.
- */
-int ipath_register_ib_device(struct ipath_devdata *dd)
-{
-	struct ipath_verbs_counters cntrs;
-	struct ipath_ibdev *idev;
-	struct ib_device *dev;
-	struct ipath_verbs_txreq *tx;
-	unsigned i;
-	int ret;
-
-	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-	if (idev == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	dev = &idev->ibdev;
-
-	if (dd->ipath_sdma_descq_cnt) {
-		tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
-				   GFP_KERNEL);
-		if (tx == NULL) {
-			ret = -ENOMEM;
-			goto err_tx;
-		}
-	} else
-		tx = NULL;
-	idev->txreq_bufs = tx;
-
-	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&idev->n_pds_lock);
-	spin_lock_init(&idev->n_ahs_lock);
-	spin_lock_init(&idev->n_cqs_lock);
-	spin_lock_init(&idev->n_qps_lock);
-	spin_lock_init(&idev->n_srqs_lock);
-	spin_lock_init(&idev->n_mcast_grps_lock);
-
-	spin_lock_init(&idev->qp_table.lock);
-	spin_lock_init(&idev->lk_table.lock);
-	idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-	/* Set the prefix to the default value (see ch. 4.1.1) */
-	idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
-
-	ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
-	if (ret)
-		goto err_qp;
-
-	/*
-	 * The top ib_ipath_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
-	idev->lk_table.table = kcalloc(idev->lk_table.max,
-				       sizeof(*idev->lk_table.table),
-				       GFP_KERNEL);
-	if (idev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	INIT_LIST_HEAD(&idev->pending_mmaps);
-	spin_lock_init(&idev->pending_lock);
-	idev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&idev->mmap_offset_lock);
-	INIT_LIST_HEAD(&idev->pending[0]);
-	INIT_LIST_HEAD(&idev->pending[1]);
-	INIT_LIST_HEAD(&idev->pending[2]);
-	INIT_LIST_HEAD(&idev->piowait);
-	INIT_LIST_HEAD(&idev->rnrwait);
-	INIT_LIST_HEAD(&idev->txreq_free);
-	idev->pending_index = 0;
-	idev->port_cap_flags =
-		IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
-	if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
-		idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-	idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-	/* Snapshot current HW counters to "clear" them. */
-	ipath_get_counters(dd, &cntrs);
-	idev->z_symbol_error_counter = cntrs.symbol_error_counter;
-	idev->z_link_error_recovery_counter =
-		cntrs.link_error_recovery_counter;
-	idev->z_link_downed_counter = cntrs.link_downed_counter;
-	idev->z_port_rcv_errors = cntrs.port_rcv_errors;
-	idev->z_port_rcv_remphys_errors =
-		cntrs.port_rcv_remphys_errors;
-	idev->z_port_xmit_discards = cntrs.port_xmit_discards;
-	idev->z_port_xmit_data = cntrs.port_xmit_data;
-	idev->z_port_rcv_data = cntrs.port_rcv_data;
-	idev->z_port_xmit_packets = cntrs.port_xmit_packets;
-	idev->z_port_rcv_packets = cntrs.port_rcv_packets;
-	idev->z_local_link_integrity_errors =
-		cntrs.local_link_integrity_errors;
-	idev->z_excessive_buffer_overrun_errors =
-		cntrs.excessive_buffer_overrun_errors;
-	idev->z_vl15_dropped = cntrs.vl15_dropped;
-
-	for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
-		list_add(&tx->txreq.list, &idev->txreq_free);
-
-	/*
-	 * The system image GUID is supposed to be the same for all
-	 * IB HCAs in a single system but since there can be other
-	 * device types in the system, we can't be sure this is unique.
-	 */
-	if (!sys_image_guid)
-		sys_image_guid = dd->ipath_guid;
-	idev->sys_image_guid = sys_image_guid;
-	idev->ib_unit = dd->ipath_unit;
-	idev->dd = dd;
-
-	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
-	dev->owner = THIS_MODULE;
-	dev->node_guid = dd->ipath_guid;
-	dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
-	dev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)		|
-		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)		|
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)		|
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)		|
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = RDMA_NODE_IB_CA;
-	dev->phys_port_cnt = 1;
-	dev->num_comp_vectors = 1;
-	dev->dma_device = &dd->pcidev->dev;
-	dev->query_device = ipath_query_device;
-	dev->modify_device = ipath_modify_device;
-	dev->query_port = ipath_query_port;
-	dev->modify_port = ipath_modify_port;
-	dev->query_pkey = ipath_query_pkey;
-	dev->query_gid = ipath_query_gid;
-	dev->alloc_ucontext = ipath_alloc_ucontext;
-	dev->dealloc_ucontext = ipath_dealloc_ucontext;
-	dev->alloc_pd = ipath_alloc_pd;
-	dev->dealloc_pd = ipath_dealloc_pd;
-	dev->create_ah = ipath_create_ah;
-	dev->destroy_ah = ipath_destroy_ah;
-	dev->query_ah = ipath_query_ah;
-	dev->create_srq = ipath_create_srq;
-	dev->modify_srq = ipath_modify_srq;
-	dev->query_srq = ipath_query_srq;
-	dev->destroy_srq = ipath_destroy_srq;
-	dev->create_qp = ipath_create_qp;
-	dev->modify_qp = ipath_modify_qp;
-	dev->query_qp = ipath_query_qp;
-	dev->destroy_qp = ipath_destroy_qp;
-	dev->post_send = ipath_post_send;
-	dev->post_recv = ipath_post_receive;
-	dev->post_srq_recv = ipath_post_srq_receive;
-	dev->create_cq = ipath_create_cq;
-	dev->destroy_cq = ipath_destroy_cq;
-	dev->resize_cq = ipath_resize_cq;
-	dev->poll_cq = ipath_poll_cq;
-	dev->req_notify_cq = ipath_req_notify_cq;
-	dev->get_dma_mr = ipath_get_dma_mr;
-	dev->reg_user_mr = ipath_reg_user_mr;
-	dev->dereg_mr = ipath_dereg_mr;
-	dev->alloc_fmr = ipath_alloc_fmr;
-	dev->map_phys_fmr = ipath_map_phys_fmr;
-	dev->unmap_fmr = ipath_unmap_fmr;
-	dev->dealloc_fmr = ipath_dealloc_fmr;
-	dev->attach_mcast = ipath_multicast_attach;
-	dev->detach_mcast = ipath_multicast_detach;
-	dev->process_mad = ipath_process_mad;
-	dev->mmap = ipath_mmap;
-	dev->dma_ops = &ipath_dma_mapping_ops;
-	dev->get_port_immutable = ipath_port_immutable;
-
-	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s", init_utsname()->nodename);
-
-	ret = ib_register_device(dev, NULL);
-	if (ret)
-		goto err_reg;
-
-	ret = ipath_verbs_register_sysfs(dev);
-	if (ret)
-		goto err_class;
-
-	enable_timer(dd);
-
-	goto bail;
-
-err_class:
-	ib_unregister_device(dev);
-err_reg:
-	kfree(idev->lk_table.table);
-err_lk:
-	kfree(idev->qp_table.table);
-err_qp:
-	kfree(idev->txreq_bufs);
-err_tx:
-	ib_dealloc_device(dev);
-	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-	idev = NULL;
-
-bail:
-	dd->verbs_dev = idev;
-	return ret;
-}
-
-void ipath_unregister_ib_device(struct ipath_ibdev *dev)
-{
-	struct ib_device *ibdev = &dev->ibdev;
-	u32 qps_inuse;
-
-	ib_unregister_device(ibdev);
-
-	disable_timer(dev->dd);
-
-	if (!list_empty(&dev->pending[0]) ||
-	    !list_empty(&dev->pending[1]) ||
-	    !list_empty(&dev->pending[2]))
-		ipath_dev_err(dev->dd, "pending list not empty!\n");
-	if (!list_empty(&dev->piowait))
-		ipath_dev_err(dev->dd, "piowait list not empty!\n");
-	if (!list_empty(&dev->rnrwait))
-		ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
-	if (!ipath_mcast_tree_empty())
-		ipath_dev_err(dev->dd, "multicast table memory leak!\n");
-	/*
-	 * Note that ipath_unregister_ib_device() can be called before all
-	 * the QPs are destroyed!
-	 */
-	qps_inuse = ipath_free_all_qps(&dev->qp_table);
-	if (qps_inuse)
-		ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
-			qps_inuse);
-	kfree(dev->qp_table.table);
-	kfree(dev->lk_table.table);
-	kfree(dev->txreq_bufs);
-	ib_dealloc_device(ibdev);
-}
-
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-
-	return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
-}
-
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
-			char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-	int ret;
-
-	ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
-	if (ret < 0)
-		goto bail;
-	strcat(buf, "\n");
-	ret = strlen(buf);
-
-bail:
-	return ret;
-}
-
-static ssize_t show_stats(struct device *device, struct device_attribute *attr,
-			  char *buf)
-{
-	struct ipath_ibdev *dev =
-		container_of(device, struct ipath_ibdev, ibdev.dev);
-	int i;
-	int len;
-
-	len = sprintf(buf,
-		      "RC resends  %d\n"
-		      "RC no QACK  %d\n"
-		      "RC ACKs     %d\n"
-		      "RC SEQ NAKs %d\n"
-		      "RC RDMA seq %d\n"
-		      "RC RNR NAKs %d\n"
-		      "RC OTH NAKs %d\n"
-		      "RC timeouts %d\n"
-		      "RC RDMA dup %d\n"
-		      "piobuf wait %d\n"
-		      "unaligned   %d\n"
-		      "PKT drops   %d\n"
-		      "WQE errs    %d\n",
-		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
-		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
-		      dev->n_other_naks, dev->n_timeouts,
-		      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
-		      dev->n_pkt_drops, dev->n_wqe_errs);
-	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
-		const struct ipath_opcode_stats *si = &dev->opstats[i];
-
-		if (!si->n_packets && !si->n_bytes)
-			continue;
-		len += sprintf(buf + len, "%02x %llu/%llu\n", i,
-			       (unsigned long long) si->n_packets,
-			       (unsigned long long) si->n_bytes);
-	}
-	return len;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
-
-static struct device_attribute *ipath_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
-	&dev_attr_stats
-};
-
-static int ipath_verbs_register_sysfs(struct ib_device *dev)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
-		ret = device_create_file(&dev->dev,
-				       ipath_class_attributes[i]);
-		if (ret)
-			goto bail;
-	}
-	return 0;
-bail:
-	for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
-		device_remove_file(&dev->dev, ipath_class_attributes[i]);
-	return ret;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644
index 6c70a89667a9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IPATH_VERBS_H
-#define IPATH_VERBS_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/kref.h>
-#include <rdma/ib_pack.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "ipath_kernel.h"
-
-#define IPATH_MAX_RDMA_ATOMIC	4
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define IPATH_UVERBS_ABI_VERSION       2
-
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE	(IB_CQ_NEXT_COMP + 1)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK			0x20
-#define IB_NAK_PSN_ERROR		0x60
-#define IB_NAK_INVALID_REQUEST		0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR	0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST	0x64
-
-/* Flags for checking QP state (see ib_ipath_state_ops[]) */
-#define IPATH_POST_SEND_OK		0x01
-#define IPATH_POST_RECV_OK		0x02
-#define IPATH_PROCESS_RECV_OK		0x04
-#define IPATH_PROCESS_SEND_OK		0x08
-#define IPATH_PROCESS_NEXT_SEND_OK	0x10
-#define IPATH_FLUSH_SEND		0x20
-#define IPATH_FLUSH_RECV		0x40
-#define IPATH_PROCESS_OR_FLUSH_SEND \
-	(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
-
-/* IB Performance Manager status values */
-#define IB_PMA_SAMPLE_STATUS_DONE	0x00
-#define IB_PMA_SAMPLE_STATUS_STARTED	0x01
-#define IB_PMA_SAMPLE_STATUS_RUNNING	0x02
-
-/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA	cpu_to_be16(0x0001)
-#define IB_PMA_PORT_RCV_DATA	cpu_to_be16(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS	cpu_to_be16(0x0003)
-#define IB_PMA_PORT_RCV_PKTS	cpu_to_be16(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT	cpu_to_be16(0x0005)
-
-struct ib_reth {
-	__be64 vaddr;
-	__be32 rkey;
-	__be32 length;
-} __attribute__ ((packed));
-
-struct ib_atomic_eth {
-	__be32 vaddr[2];	/* unaligned so access as 2 32-bit words */
-	__be32 rkey;
-	__be64 swap_data;
-	__be64 compare_data;
-} __attribute__ ((packed));
-
-struct ipath_other_headers {
-	__be32 bth[3];
-	union {
-		struct {
-			__be32 deth[2];
-			__be32 imm_data;
-		} ud;
-		struct {
-			struct ib_reth reth;
-			__be32 imm_data;
-		} rc;
-		struct {
-			__be32 aeth;
-			__be32 atomic_ack_eth[2];
-		} at;
-		__be32 imm_data;
-		__be32 aeth;
-		struct ib_atomic_eth atomic_eth;
-	} u;
-} __attribute__ ((packed));
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct ipath_ib_header {
-	__be16 lrh[4];
-	union {
-		struct {
-			struct ib_grh grh;
-			struct ipath_other_headers oth;
-		} l;
-		struct ipath_other_headers oth;
-	} u;
-} __attribute__ ((packed));
-
-struct ipath_pio_header {
-	__le32 pbc[2];
-	struct ipath_ib_header hdr;
-} __attribute__ ((packed));
-
-/*
- * There is one struct ipath_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct ipath_mcast_qp.
- */
-struct ipath_mcast_qp {
-	struct list_head list;
-	struct ipath_qp *qp;
-};
-
-struct ipath_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct ipath_pd {
-	struct ib_pd ibpd;
-	int user;		/* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct ipath_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-};
-
-/*
- * This structure is used by ipath_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct ipath_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct ipath_cq_wc {
-	u32 head;		/* index of next entry to fill */
-	u32 tail;		/* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct ipath_cq {
-	struct ib_cq ibcq;
-	struct tasklet_struct comptask;
-	spinlock_t lock;
-	u8 notify;
-	u8 triggered;
-	struct ipath_cq_wc *queue;
-	struct ipath_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	struct ib_pd *pd;	/* shares refcnt of ibmr.pd */
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* kernel virtual address of segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct ipath_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct ipath_mregion mr;	/* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct ipath_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-	};
-
-	u32 psn;		/* first packet sequence number */
-	u32 lpsn;		/* last packet sequence number */
-	u32 ssn;		/* send sequence number */
-	u32 length;		/* total length of data in sg_list */
-	struct ipath_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct ipath_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct ipath_rwq {
-	u32 head;		/* new work requests posted to the head */
-	u32 tail;		/* receives pull requests from here. */
-	struct ipath_rwqe wq[0];
-};
-
-struct ipath_rq {
-	struct ipath_rwq *wq;
-	spinlock_t lock;
-	u32 size;		/* size of RWQE array */
-	u8 max_sge;
-};
-
-struct ipath_srq {
-	struct ib_srq ibsrq;
-	struct ipath_rq rq;
-	struct ipath_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;      /* next SGE to be used if any */
-	struct ipath_sge sge;   /* progress state for the current SGE */
-	u8 num_sge;
-	u8 static_rate;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct ipath_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	union {
-		struct ipath_sge_state rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct ipath_qp {
-	struct ib_qp ibqp;
-	struct ipath_qp *next;		/* link list for QPN hash table */
-	struct ipath_qp *timer_next;	/* link list for ipath_ib_timer() */
-	struct ipath_qp *pio_next;	/* link for ipath_ib_piobufavail() */
-	struct list_head piowait;	/* link for wait PIO buf */
-	struct list_head timerwait;	/* link for waiting for timeouts */
-	struct ib_ah_attr remote_ah_attr;
-	struct ipath_ib_header s_hdr;	/* next packet header to send */
-	atomic_t refcount;
-	wait_queue_head_t wait;
-	wait_queue_head_t wait_dma;
-	struct tasklet_struct s_task;
-	struct ipath_mmap_info *ip;
-	struct ipath_sge_state *s_cur_sge;
-	struct ipath_verbs_txreq *s_tx;
-	struct ipath_sge_state s_sge;	/* current send request data */
-	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
-	struct ipath_sge_state s_ack_rdma_sge;
-	struct ipath_sge_state s_rdma_read_sge;
-	struct ipath_sge_state r_sge;	/* current receive data */
-	spinlock_t s_lock;
-	atomic_t s_dma_busy;
-	u16 s_pkt_delay;
-	u16 s_hdrwords;		/* size of s_hdr in 32 bit words */
-	u32 s_cur_size;		/* size of send packet in bytes */
-	u32 s_len;		/* total length of s_sge */
-	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
-	u32 s_next_psn;		/* PSN for next request */
-	u32 s_last_psn;		/* last response PSN processed */
-	u32 s_psn;		/* current packet sequence number */
-	u32 s_ack_rdma_psn;	/* PSN for sending RDMA read responses */
-	u32 s_ack_psn;		/* PSN for acking sends and RDMA writes */
-	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
-	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
-	u64 r_wr_id;		/* ID for current receive WQE */
-	unsigned long r_aflags;
-	u32 r_len;		/* total length of r_sge */
-	u32 r_rcv_len;		/* receive data len processed */
-	u32 r_psn;		/* expected rcv packet sequence number */
-	u32 r_msn;		/* message sequence number */
-	u8 state;		/* QP state */
-	u8 s_state;		/* opcode of last packet sent */
-	u8 s_ack_state;		/* opcode of packet to ACK */
-	u8 s_nak_state;		/* non-zero if NAK is pending */
-	u8 r_state;		/* opcode of last packet received */
-	u8 r_nak_state;		/* non-zero if NAK is pending */
-	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
-	u8 r_flags;
-	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
-	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
-	u8 qp_access_flags;
-	u8 s_max_sge;		/* size of s_wq->sg_list */
-	u8 s_retry_cnt;		/* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 s_retry;		/* requester retry counter */
-	u8 s_rnr_retry;		/* requester RNR retry counter */
-	u8 s_pkey_index;	/* PKEY index to use */
-	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
-	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
-	u8 s_flags;
-	u8 s_dmult;
-	u8 s_draining;
-	u8 timeout;		/* Timeout for this QP */
-	enum ib_mtu path_mtu;
-	u32 remote_qpn;
-	u32 qkey;		/* QKEY for this QP (for UD or RD) */
-	u32 s_size;		/* send work queue size */
-	u32 s_head;		/* new entries added here */
-	u32 s_tail;		/* next entry to process */
-	u32 s_cur;		/* current work queue entry */
-	u32 s_last;		/* last un-ACK'ed entry */
-	u32 s_ssn;		/* SSN of tail entry */
-	u32 s_lsn;		/* limit sequence number (credit) */
-	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_swqe *s_wqe;
-	struct ipath_sge *r_ud_sg_list;
-	struct ipath_rq r_rq;		/* receive work queue */
-	struct ipath_sge r_sg_list[0];	/* verified SGEs */
-};
-
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define IPATH_R_WRID_VALID	0
-
-/*
- * Bit definitions for r_flags.
- */
-#define IPATH_R_REUSE_SGE	0x01
-#define IPATH_R_RDMAR_SEQ	0x02
-
-/*
- * Bit definitions for s_flags.
- *
- * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
- *			   before processing the next SWQE
- * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
- *			   before processing the next SWQE
- * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
- * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *		      next send completion entry not via send DMA.
- */
-#define IPATH_S_SIGNAL_REQ_WR	0x01
-#define IPATH_S_FENCE_PENDING	0x02
-#define IPATH_S_RDMAR_PENDING	0x04
-#define IPATH_S_ACK_PENDING	0x08
-#define IPATH_S_BUSY		0x10
-#define IPATH_S_WAITING		0x20
-#define IPATH_S_WAIT_SSN_CREDIT	0x40
-#define IPATH_S_WAIT_DMA	0x80
-
-#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
-	IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
-
-#define IPATH_PSN_CREDIT	512
-
-/*
- * Since struct ipath_swqe is not a fixed size, we can't simply index into
- * struct ipath_qp.s_wq.  This function does the array index computation.
- */
-static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
-					      unsigned n)
-{
-	return (struct ipath_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct ipath_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct ipath_sge)) * n);
-}
-
-/*
- * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rwq.wq.  This function does the array index computation.
- */
-static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
-					      unsigned n)
-{
-	return (struct ipath_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	atomic_t n_free;
-	void *page;
-};
-
-struct ipath_qp_table {
-	spinlock_t lock;
-	u32 last;		/* last QP number allocated */
-	u32 max;		/* size of the hash table */
-	u32 nmaps;		/* size of the map table */
-	struct ipath_qp **table;
-	/* bit map of free numbers */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct ipath_lkey_table {
-	spinlock_t lock;
-	u32 next;		/* next unused index (speeds search) */
-	u32 gen;		/* generation count */
-	u32 max;		/* size of the table */
-	struct ipath_mregion **table;
-};
-
-struct ipath_opcode_stats {
-	u64 n_packets;		/* number of packets */
-	u64 n_bytes;		/* total number of bytes */
-};
-
-struct ipath_ibdev {
-	struct ib_device ibdev;
-	struct ipath_devdata *dd;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock;
-	u32 mmap_offset;
-	int ib_unit;		/* This is the device number */
-	u16 sm_lid;		/* in host order */
-	u8 sm_sl;
-	u8 mkeyprot;
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-
-	/* The following fields are really per port. */
-	struct ipath_qp_table qp_table;
-	struct ipath_lkey_table lk_table;
-	struct list_head pending[3];	/* FIFO of QPs waiting for ACKs */
-	struct list_head piowait;	/* list for wait PIO buf */
-	struct list_head txreq_free;
-	void *txreq_bufs;
-	/* list of QPs waiting for RNR timer */
-	struct list_head rnrwait;
-	spinlock_t pending_lock;
-	__be64 sys_image_guid;	/* in network order */
-	__be64 gid_prefix;	/* in network order */
-	__be64 mkey;
-
-	u32 n_pds_allocated;	/* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;	/* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;	/* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;	/* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
-
-	u64 ipath_sword;	/* total dwords sent (sample result) */
-	u64 ipath_rword;	/* total dwords received (sample result) */
-	u64 ipath_spkts;	/* total packets sent (sample result) */
-	u64 ipath_rpkts;	/* total packets received (sample result) */
-	/* # of ticks no data sent (sample result) */
-	u64 ipath_xmit_wait;
-	u64 rcv_errors;		/* # of packets with SW detected rcv errs */
-	u64 n_unicast_xmit;	/* total unicast packets sent */
-	u64 n_unicast_rcv;	/* total unicast packets received */
-	u64 n_multicast_xmit;	/* total multicast packets sent */
-	u64 n_multicast_rcv;	/* total multicast packets received */
-	u64 z_symbol_error_counter;		/* starting count for PMA */
-	u64 z_link_error_recovery_counter;	/* starting count for PMA */
-	u64 z_link_downed_counter;		/* starting count for PMA */
-	u64 z_port_rcv_errors;			/* starting count for PMA */
-	u64 z_port_rcv_remphys_errors;		/* starting count for PMA */
-	u64 z_port_xmit_discards;		/* starting count for PMA */
-	u64 z_port_xmit_data;			/* starting count for PMA */
-	u64 z_port_rcv_data;			/* starting count for PMA */
-	u64 z_port_xmit_packets;		/* starting count for PMA */
-	u64 z_port_rcv_packets;			/* starting count for PMA */
-	u32 z_pkey_violations;			/* starting count for PMA */
-	u32 z_local_link_integrity_errors;	/* starting count for PMA */
-	u32 z_excessive_buffer_overrun_errors;	/* starting count for PMA */
-	u32 z_vl15_dropped;			/* starting count for PMA */
-	u32 n_rc_resends;
-	u32 n_rc_acks;
-	u32 n_rc_qacks;
-	u32 n_seq_naks;
-	u32 n_rdma_seq;
-	u32 n_rnr_naks;
-	u32 n_other_naks;
-	u32 n_timeouts;
-	u32 n_pkt_drops;
-	u32 n_vl15_dropped;
-	u32 n_wqe_errs;
-	u32 n_rdma_dup_busy;
-	u32 n_piowait;
-	u32 n_unaligned;
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 pending_index;	/* which pending queue is active */
-	u8 pma_sample_status;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
-	struct ipath_opcode_stats opstats[128];
-};
-
-struct ipath_verbs_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-	u32 vl15_dropped;
-};
-
-struct ipath_verbs_txreq {
-	struct ipath_qp         *qp;
-	struct ipath_swqe       *wqe;
-	u32                      map_len;
-	u32                      len;
-	struct ipath_sge_state  *ss;
-	struct ipath_pio_header  hdr;
-	struct ipath_sdma_txreq  txreq;
-};
-
-static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct ipath_mr, ibmr);
-}
-
-static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct ipath_pd, ibpd);
-}
-
-static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct ipath_ah, ibah);
-}
-
-static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct ipath_cq, ibcq);
-}
-
-static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct ipath_srq, ibsrq);
-}
-
-static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct ipath_qp, ibqp);
-}
-
-static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct ipath_ibdev, ibdev);
-}
-
-/*
- * This must be called with s_lock held.
- */
-static inline void ipath_schedule_send(struct ipath_qp *qp)
-{
-	if (qp->s_flags & IPATH_S_ANY_WAIT)
-		qp->s_flags &= ~IPATH_S_ANY_WAIT;
-	if (!(qp->s_flags & IPATH_S_BUSY))
-		tasklet_hi_schedule(&qp->s_task);
-}
-
-int ipath_process_mad(struct ib_device *ibdev,
-		      int mad_flags,
-		      u8 port_num,
-		      const struct ib_wc *in_wc,
-		      const struct ib_grh *in_grh,
-		      const struct ib_mad_hdr *in, size_t in_mad_size,
-		      struct ib_mad_hdr *out, size_t *out_mad_size,
-		      u16 *out_mad_pkey_index);
-
-/*
- * Compare the lower 24 bits of the two values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int ipath_cmp24(u32 a, u32 b)
-{
-	return (((int) a) - ((int) b)) << 8;
-}
-
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
-
-int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-			    u64 *rwords, u64 *spkts, u64 *rpkts,
-			    u64 *xmit_wait);
-
-int ipath_get_counters(struct ipath_devdata *dd,
-		       struct ipath_verbs_counters *cntrs);
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int ipath_mcast_tree_empty(void);
-
-__be32 ipath_compute_aeth(struct ipath_qp *qp);
-
-struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
-
-struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
-			      struct ib_qp_init_attr *init_attr,
-			      struct ib_udata *udata);
-
-int ipath_destroy_qp(struct ib_qp *ibqp);
-
-int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask, struct ib_udata *udata);
-
-int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
-
-int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-
-void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
-
-unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
-
-int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
-		     u32 hdrwords, struct ipath_sge_state *ss, u32 len);
-
-void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
-
-void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
-
-void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
-
-void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
-
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
-void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
-		  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-
-int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
-		     struct ipath_mregion *mr);
-
-void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
-
-int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc);
-
-int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			   struct ib_recv_wr **bad_wr);
-
-struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
-				struct ib_srq_init_attr *srq_init_attr,
-				struct ib_udata *udata);
-
-int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask,
-		     struct ib_udata *udata);
-
-int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int ipath_destroy_srq(struct ib_srq *ibsrq);
-
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
-int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
-			      const struct ib_cq_init_attr *attr,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata);
-
-int ipath_destroy_cq(struct ib_cq *ibcq);
-
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-				u64 virt_addr, int mr_access_flags,
-				struct ib_udata *udata);
-
-int ipath_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			       struct ib_fmr_attr *fmr_attr);
-
-int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
-		       int list_len, u64 iova);
-
-int ipath_unmap_fmr(struct list_head *fmr_list);
-
-int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
-
-void ipath_release_mmap_info(struct kref *ref);
-
-struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
-					       u32 size,
-					       struct ib_ucontext *context,
-					       void *obj);
-
-void ipath_update_mmap_info(struct ipath_ibdev *dev,
-			    struct ipath_mmap_info *ip,
-			    u32 size, void *obj);
-
-int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-void ipath_insert_rnr_queue(struct ipath_qp *qp);
-
-int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-		   u32 *lengthp, struct ipath_sge_state *ss);
-
-int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
-
-u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
-		   struct ib_global_route *grh, u32 hwords, u32 nwords);
-
-void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
-			   struct ipath_other_headers *ohdr,
-			   u32 bth0, u32 bth2);
-
-void ipath_do_send(unsigned long data);
-
-void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
-			 enum ib_wc_status status);
-
-int ipath_make_rc_req(struct ipath_qp *qp);
-
-int ipath_make_uc_req(struct ipath_qp *qp);
-
-int ipath_make_ud_req(struct ipath_qp *qp);
-
-int ipath_register_ib_device(struct ipath_devdata *);
-
-void ipath_unregister_ib_device(struct ipath_ibdev *);
-
-void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
-
-int ipath_ib_piobufavail(struct ipath_ibdev *);
-
-unsigned ipath_get_npkeys(struct ipath_devdata *);
-
-u32 ipath_get_cr_errpkey(struct ipath_devdata *);
-
-unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
-
-extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
-
-/*
- * Below converts HCA-specific LinkTrainingState to IB PhysPortState
- * values.
- */
-extern const u8 ipath_cvt_physportstate[];
-#define IB_PHYSPORTSTATE_SLEEP 1
-#define IB_PHYSPORTSTATE_POLL 2
-#define IB_PHYSPORTSTATE_DISABLED 3
-#define IB_PHYSPORTSTATE_CFG_TRAIN 4
-#define IB_PHYSPORTSTATE_LINKUP 5
-#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
-
-extern const int ib_ipath_state_ops[];
-
-extern unsigned int ib_ipath_lkey_table_size;
-
-extern unsigned int ib_ipath_max_cqes;
-
-extern unsigned int ib_ipath_max_cqs;
-
-extern unsigned int ib_ipath_max_qp_wrs;
-
-extern unsigned int ib_ipath_max_qps;
-
-extern unsigned int ib_ipath_max_sges;
-
-extern unsigned int ib_ipath_max_mcast_grps;
-
-extern unsigned int ib_ipath_max_mcast_qp_attached;
-
-extern unsigned int ib_ipath_max_srqs;
-
-extern unsigned int ib_ipath_max_srq_sges;
-
-extern unsigned int ib_ipath_max_srq_wrs;
-
-extern const u32 ib_ipath_rnr_table[];
-
-extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
-
-#endif				/* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644
index 72d476fa5b8f..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-#include <linux/slab.h>
-
-#include "ipath_verbs.h"
-
-/*
- * Global table of GID to attached QPs.
- * The table is global to all ipath devices since a send from one QP/device
- * needs to be locally routed to any locally attached QPs on the same
- * or different device.
- */
-static struct rb_root mcast_tree;
-static DEFINE_SPINLOCK(mcast_lock);
-
-/**
- * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
-{
-	struct ipath_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
-{
-	struct ipath_qp *qp = mqp->qp;
-
-	/* Notify ipath_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * ipath_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
-{
-	struct ipath_mcast *mcast;
-
-	mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void ipath_mcast_free(struct ipath_mcast *mcast)
-{
-	struct ipath_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		ipath_mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * ipath_mcast_find - search the global table for the given multicast GID
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct ipath_mcast *mcast;
-
-	spin_lock_irqsave(&mcast_lock, flags);
-	n = mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct ipath_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&mcast_lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&mcast_lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * ipath_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int ipath_mcast_add(struct ipath_ibdev *dev,
-			   struct ipath_mcast *mcast,
-			   struct ipath_mcast_qp *mqp)
-{
-	struct rb_node **n = &mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&mcast_lock);
-
-	while (*n) {
-		struct ipath_mcast *tmcast;
-		struct ipath_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&mcast_lock);
-
-	return ret;
-}
-
-int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_mcast *mcast;
-	struct ipath_mcast_qp *mqp;
-	int ret;
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = ipath_mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = ipath_mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		ipath_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	switch (ipath_mcast_add(dev, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: can't attach the same QP twice. */
-		ipath_mcast_qp_free(mqp);
-		ipath_mcast_free(mcast);
-		ret = -EINVAL;
-		goto bail;
-	case EEXIST:		/* The mcast wasn't used */
-		ipath_mcast_free(mcast);
-		break;
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		ipath_mcast_qp_free(mqp);
-		ipath_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct ipath_qp *qp = to_iqp(ibqp);
-	struct ipath_ibdev *dev = to_idev(ibqp->device);
-	struct ipath_mcast *mcast = NULL;
-	struct ipath_mcast_qp *p, *tmp;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	spin_lock_irq(&mcast_lock);
-
-	/* Find the GID in the mcast table. */
-	n = mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&mcast_lock);
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		mcast = rb_entry(n, struct ipath_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&mcast_lock);
-
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		ipath_mcast_qp_free(p);
-	}
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		ipath_mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int ipath_mcast_tree_empty(void)
-{
-	return mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644
index 1a7e20a75149..000000000000
--- a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on PowerPC only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * Nothing to do on PowerPC, so just return without error.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-	return 0;
-}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644
index 7b6e4c843e19..000000000000
--- a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file is conditionally built on x86_64 only.  Otherwise weak symbol
- * versions of the functions exported from here are used.
- */
-
-#include <linux/pci.h>
-#include <asm/processor.h>
-
-#include "ipath_kernel.h"
-
-/**
- * ipath_enable_wc - enable write combining for MMIO writes to the device
- * @dd: infinipath device
- *
- * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
- * write combining.
- */
-int ipath_enable_wc(struct ipath_devdata *dd)
-{
-	int ret = 0;
-	u64 pioaddr, piolen;
-	unsigned bits;
-	const unsigned long addr = pci_resource_start(dd->pcidev, 0);
-	const size_t len = pci_resource_len(dd->pcidev, 0);
-
-	/*
-	 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
-	 * chip.  Linux (possibly the hardware) requires it to be on a power
-	 * of 2 address matching the length (which has to be a power of 2).
-	 * For rev1, that means the base address, for rev2, it will be just
-	 * the PIO buffers themselves.
-	 * For chips with two sets of buffers, the calculations are
-	 * somewhat more complicated; we need to sum, and the piobufbase
-	 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
-	 * The buffers are still packed, so a single range covers both.
-	 */
-	if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
-		unsigned long pio2kbase, pio4kbase;
-		pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
-		pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
-		if (pio2kbase < pio4kbase) { /* all, for now */
-			pioaddr = addr + pio2kbase;
-			piolen = pio4kbase - pio2kbase +
-				dd->ipath_piobcnt4k * dd->ipath_4kalign;
-		} else {
-			pioaddr = addr + pio4kbase;
-			piolen = pio2kbase - pio4kbase +
-				dd->ipath_piobcnt2k * dd->ipath_palign;
-		}
-	} else {  /* single buffer size (2K, currently) */
-		pioaddr = addr + dd->ipath_piobufbase;
-		piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
-			dd->ipath_piobcnt4k * dd->ipath_4kalign;
-	}
-
-	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
-		/* do nothing */ ;
-
-	if (piolen != (1ULL << bits)) {
-		piolen >>= bits;
-		while (piolen >>= 1)
-			bits++;
-		piolen = 1ULL << (bits + 1);
-	}
-	if (pioaddr & (piolen - 1)) {
-		u64 atmp;
-		ipath_dbg("pioaddr %llx not on right boundary for size "
-			  "%llx, fixing\n",
-			  (unsigned long long) pioaddr,
-			  (unsigned long long) piolen);
-		atmp = pioaddr & ~(piolen - 1);
-		if (atmp < addr || (atmp + piolen) > (addr + len)) {
-			ipath_dev_err(dd, "No way to align address/size "
-				      "(%llx/%llx), no WC mtrr\n",
-				      (unsigned long long) atmp,
-				      (unsigned long long) piolen << 1);
-			ret = -ENODEV;
-		} else {
-			ipath_dbg("changing WC base from %llx to %llx, "
-				  "len from %llx to %llx\n",
-				  (unsigned long long) pioaddr,
-				  (unsigned long long) atmp,
-				  (unsigned long long) piolen,
-				  (unsigned long long) piolen << 1);
-			pioaddr = atmp;
-			piolen <<= 1;
-		}
-	}
-
-	if (!ret) {
-		dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
-		if (dd->wc_cookie < 0) {
-			ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
-			ret = -ENODEV;
-		} else if (dd->wc_cookie == 0)
-			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
-		else
-			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
-	}
-
-	return ret;
-}
-
-/**
- * ipath_disable_wc - disable write combining for MMIO writes to the device
- * @dd: infinipath device
- */
-void ipath_disable_wc(struct ipath_devdata *dd)
-{
-	arch_phys_wc_del(dd->wc_cookie);
-}