aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h34
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c37
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c275
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c56
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c25
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c94
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h207
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c38
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c490
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c360
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c363
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h22
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c301
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h25
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c93
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h1
17 files changed, 1487 insertions, 983 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bd94b0a6e9e5..4cd5428a2399 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -80,7 +80,7 @@ enum {
IPOIB_NUM_WC = 4,
IPOIB_MAX_PATH_REC_QUEUE = 3,
- IPOIB_MAX_MCAST_QUEUE = 3,
+ IPOIB_MAX_MCAST_QUEUE = 64,
IPOIB_FLAG_OPER_UP = 0,
IPOIB_FLAG_INITIALIZED = 1,
@@ -239,7 +239,7 @@ struct ipoib_cm_tx {
struct net_device *dev;
struct ipoib_neigh *neigh;
struct ipoib_path *path;
- struct ipoib_cm_tx_buf *tx_ring;
+ struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
unsigned long flags;
@@ -342,7 +342,6 @@ struct ipoib_dev_priv {
u16 pkey;
u16 pkey_index;
struct ib_pd *pd;
- struct ib_mr *mr;
struct ib_cq *recv_cq;
struct ib_cq *send_cq;
struct ib_qp *qp;
@@ -504,6 +503,33 @@ int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
+int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
+void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
+ struct ipoib_tx_buf *tx_req);
+
+static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
+ struct ipoib_tx_buf *tx_req)
+{
+ int i, off;
+ struct sk_buff *skb = tx_req->skb;
+ skb_frag_t *frags = skb_shinfo(skb)->frags;
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ u64 *mapping = tx_req->mapping;
+
+ if (skb_headlen(skb)) {
+ priv->tx_sge[0].addr = mapping[0];
+ priv->tx_sge[0].length = skb_headlen(skb);
+ off = 1;
+ } else
+ off = 0;
+
+ for (i = 0; i < nr_frags; ++i) {
+ priv->tx_sge[i + off].addr = mapping[i + off];
+ priv->tx_sge[i + off].length = skb_frag_size(&frags[i]);
+ }
+ priv->tx_wr.num_sge = nr_frags + off;
+}
+
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
@@ -522,6 +548,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index cf32a778e7d0..c78dc1638030 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -332,7 +332,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
int i;
for (i = 0; i < priv->cm.num_frags; ++i)
- sge[i].lkey = priv->mr->lkey;
+ sge[i].lkey = priv->pd->local_dma_lkey;
sge[0].length = IPOIB_CM_HEAD_SIZE;
for (i = 1; i < priv->cm.num_frags; ++i)
@@ -694,14 +694,12 @@ repost:
static inline int post_send(struct ipoib_dev_priv *priv,
struct ipoib_cm_tx *tx,
unsigned int wr_id,
- u64 addr, int len)
+ struct ipoib_tx_buf *tx_req)
{
struct ib_send_wr *bad_wr;
- priv->tx_sge[0].addr = addr;
- priv->tx_sge[0].length = len;
+ ipoib_build_sge(priv, tx_req);
- priv->tx_wr.num_sge = 1;
priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
@@ -710,8 +708,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_cm_tx_buf *tx_req;
- u64 addr;
+ struct ipoib_tx_buf *tx_req;
int rc;
if (unlikely(skb->len > tx->mtu)) {
@@ -735,24 +732,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
*/
tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
- addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+
+ if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
- tx_req->mapping = addr;
-
skb_orphan(skb);
skb_dst_drop(skb);
- rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
- addr, skb->len);
+ rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
if (unlikely(rc)) {
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
- ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
@@ -777,7 +771,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_tx *tx = wc->qp->qp_context;
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
- struct ipoib_cm_tx_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
unsigned long flags;
ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -791,7 +785,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
tx_req = &tx->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv, tx_req);
/* FIXME: is this right? Shouldn't we only increment on success? */
++dev->stats.tx_packets;
@@ -854,7 +848,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
}
ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
- 0, NULL);
+ 0);
if (ret) {
printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
IPOIB_CM_IETF_ID | priv->qp->qp_num);
@@ -1036,6 +1030,9 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;
+ if (dev->features & NETIF_F_SG)
+ attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+
tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
@@ -1170,7 +1167,7 @@ err_tx:
static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
- struct ipoib_cm_tx_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1197,8 +1194,7 @@ timeout:
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
- DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
netif_tx_lock_bh(p->dev);
@@ -1455,7 +1451,6 @@ static void ipoib_cm_stale_task(struct work_struct *work)
spin_unlock_irq(&priv->lock);
}
-
static ssize_t show_mode(struct device *d, struct device_attribute *attr,
char *buf)
{
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 63b92cbb29ad..d266667ca9b8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -263,8 +263,7 @@ repost:
"for buf %d\n", wr_id);
}
-static int ipoib_dma_map_tx(struct ib_device *ca,
- struct ipoib_tx_buf *tx_req)
+int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
{
struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping;
@@ -305,8 +304,8 @@ partial_error:
return -EIO;
}
-static void ipoib_dma_unmap_tx(struct ib_device *ca,
- struct ipoib_tx_buf *tx_req)
+void ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
+ struct ipoib_tx_buf *tx_req)
{
struct sk_buff *skb = tx_req->skb;
u64 *mapping = tx_req->mapping;
@@ -314,7 +313,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
int off;
if (skb_headlen(skb)) {
- ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+ ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb),
+ DMA_TO_DEVICE);
off = 1;
} else
off = 0;
@@ -322,8 +322,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag),
- DMA_TO_DEVICE);
+ ib_dma_unmap_page(priv->ca, mapping[i + off],
+ skb_frag_size(frag), DMA_TO_DEVICE);
}
}
@@ -389,7 +389,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
tx_req = &priv->tx_ring[wr_id];
- ipoib_dma_unmap_tx(priv->ca, tx_req);
+ ipoib_dma_unmap_tx(priv, tx_req);
++dev->stats.tx_packets;
dev->stats.tx_bytes += tx_req->skb->len;
@@ -514,24 +514,10 @@ static inline int post_send(struct ipoib_dev_priv *priv,
void *head, int hlen)
{
struct ib_send_wr *bad_wr;
- int i, off;
struct sk_buff *skb = tx_req->skb;
- skb_frag_t *frags = skb_shinfo(skb)->frags;
- int nr_frags = skb_shinfo(skb)->nr_frags;
- u64 *mapping = tx_req->mapping;
- if (skb_headlen(skb)) {
- priv->tx_sge[0].addr = mapping[0];
- priv->tx_sge[0].length = skb_headlen(skb);
- off = 1;
- } else
- off = 0;
+ ipoib_build_sge(priv, tx_req);
- for (i = 0; i < nr_frags; ++i) {
- priv->tx_sge[i + off].addr = mapping[i + off];
- priv->tx_sge[i + off].length = skb_frag_size(&frags[i]);
- }
- priv->tx_wr.num_sge = nr_frags + off;
priv->tx_wr.wr_id = wr_id;
priv->tx_wr.wr.ud.remote_qpn = qpn;
priv->tx_wr.wr.ud.ah = address;
@@ -617,7 +603,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
ipoib_warn(priv, "post_send failed, error %d\n", rc);
++dev->stats.tx_errors;
--priv->tx_outstanding;
- ipoib_dma_unmap_tx(priv->ca, tx_req);
+ ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(skb);
if (netif_queue_stopped(dev))
netif_wake_queue(dev);
@@ -868,7 +854,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail &
(ipoib_sendq_size - 1)];
- ipoib_dma_unmap_tx(priv->ca, tx_req);
+ ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
--priv->tx_outstanding;
@@ -985,20 +971,21 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
- enum ipoib_flush_level level)
+ enum ipoib_flush_level level,
+ int nesting)
{
struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
int result;
- down_read(&priv->vlan_rwsem);
+ down_read_nested(&priv->vlan_rwsem, nesting);
/*
* Flush any child interfaces too -- they might be up even if
* the parent is down.
*/
list_for_each_entry(cpriv, &priv->child_intfs, list)
- __ipoib_ib_dev_flush(cpriv, level);
+ __ipoib_ib_dev_flush(cpriv, level, nesting + 1);
up_read(&priv->vlan_rwsem);
@@ -1076,7 +1063,7 @@ void ipoib_ib_dev_flush_light(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_light);
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0);
}
void ipoib_ib_dev_flush_normal(struct work_struct *work)
@@ -1084,7 +1071,7 @@ void ipoib_ib_dev_flush_normal(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_normal);
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0);
}
void ipoib_ib_dev_flush_heavy(struct work_struct *work)
@@ -1092,7 +1079,7 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, flush_heavy);
- __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
+ __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index da149c278cb8..f74316e679d2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -48,6 +48,9 @@
#include <linux/jhash.h>
#include <net/arp.h>
+#include <net/addrconf.h>
+#include <linux/inetdevice.h>
+#include <rdma/ib_cache.h>
#define DRV_VERSION "1.0.0"
@@ -89,13 +92,18 @@ struct workqueue_struct *ipoib_workqueue;
struct ib_sa_client ipoib_sa_client;
static void ipoib_add_one(struct ib_device *device);
-static void ipoib_remove_one(struct ib_device *device);
+static void ipoib_remove_one(struct ib_device *device, void *client_data);
static void ipoib_neigh_reclaim(struct rcu_head *rp);
+static struct net_device *ipoib_get_net_dev_by_params(
+ struct ib_device *dev, u8 port, u16 pkey,
+ const union ib_gid *gid, const struct sockaddr *addr,
+ void *client_data);
static struct ib_client ipoib_client = {
.name = "ipoib",
.add = ipoib_add_one,
- .remove = ipoib_remove_one
+ .remove = ipoib_remove_one,
+ .get_net_dev_by_params = ipoib_get_net_dev_by_params,
};
int ipoib_open(struct net_device *dev)
@@ -190,7 +198,7 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
struct ipoib_dev_priv *priv = netdev_priv(dev);
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
- features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO);
+ features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
return features;
}
@@ -222,6 +230,225 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
+/* Called with an RCU read lock taken */
+static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
+ struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct in_device *in_dev;
+ struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
+ __be32 ret_addr;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ in_dev = in_dev_get(dev);
+ if (!in_dev)
+ return false;
+
+ ret_addr = inet_confirm_addr(net, in_dev, 0,
+ addr_in->sin_addr.s_addr,
+ RT_SCOPE_HOST);
+ in_dev_put(in_dev);
+ if (ret_addr)
+ return true;
+
+ break;
+ case AF_INET6:
+ if (IS_ENABLED(CONFIG_IPV6) &&
+ ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1))
+ return true;
+
+ break;
+ }
+ return false;
+}
+
+/**
+ * Find the master net_device on top of the given net_device.
+ * @dev: base IPoIB net_device
+ *
+ * Returns the master net_device with a reference held, or the same net_device
+ * if no master exists.
+ */
+static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
+{
+ struct net_device *master;
+
+ rcu_read_lock();
+ master = netdev_master_upper_dev_get_rcu(dev);
+ if (master)
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (master)
+ return master;
+
+ dev_hold(dev);
+ return dev;
+}
+
+/**
+ * Find a net_device matching the given address, which is an upper device of
+ * the given net_device.
+ * @addr: IP address to look for.
+ * @dev: base IPoIB net_device
+ *
+ * If found, returns the net_device with a reference held. Otherwise return
+ * NULL.
+ */
+static struct net_device *ipoib_get_net_dev_match_addr(
+ const struct sockaddr *addr, struct net_device *dev)
+{
+ struct net_device *upper,
+ *result = NULL;
+ struct list_head *iter;
+
+ rcu_read_lock();
+ if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
+ dev_hold(dev);
+ result = dev;
+ goto out;
+ }
+
+ netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
+ if (ipoib_is_dev_match_addr_rcu(addr, upper)) {
+ dev_hold(upper);
+ result = upper;
+ break;
+ }
+ }
+out:
+ rcu_read_unlock();
+ return result;
+}
+
+/* returns the number of IPoIB netdevs on top a given ipoib device matching a
+ * pkey_index and address, if one exists.
+ *
+ * @found_net_dev: contains a matching net_device if the return value >= 1,
+ * with a reference held. */
+static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
+ const union ib_gid *gid,
+ u16 pkey_index,
+ const struct sockaddr *addr,
+ int nesting,
+ struct net_device **found_net_dev)
+{
+ struct ipoib_dev_priv *child_priv;
+ struct net_device *net_dev = NULL;
+ int matches = 0;
+
+ if (priv->pkey_index == pkey_index &&
+ (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
+ if (!addr) {
+ net_dev = ipoib_get_master_net_dev(priv->dev);
+ } else {
+ /* Verify the net_device matches the IP address, as
+ * IPoIB child devices currently share a GID. */
+ net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev);
+ }
+ if (net_dev) {
+ if (!*found_net_dev)
+ *found_net_dev = net_dev;
+ else
+ dev_put(net_dev);
+ ++matches;
+ }
+ }
+
+ /* Check child interfaces */
+ down_read_nested(&priv->vlan_rwsem, nesting);
+ list_for_each_entry(child_priv, &priv->child_intfs, list) {
+ matches += ipoib_match_gid_pkey_addr(child_priv, gid,
+ pkey_index, addr,
+ nesting + 1,
+ found_net_dev);
+ if (matches > 1)
+ break;
+ }
+ up_read(&priv->vlan_rwsem);
+
+ return matches;
+}
+
+/* Returns the number of matching net_devs found (between 0 and 2). Also
+ * return the matching net_device in the @net_dev parameter, holding a
+ * reference to the net_device, if the number of matches >= 1 */
+static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
+ u16 pkey_index,
+ const union ib_gid *gid,
+ const struct sockaddr *addr,
+ struct net_device **net_dev)
+{
+ struct ipoib_dev_priv *priv;
+ int matches = 0;
+
+ *net_dev = NULL;
+
+ list_for_each_entry(priv, dev_list, list) {
+ if (priv->port != port)
+ continue;
+
+ matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
+ addr, 0, net_dev);
+ if (matches > 1)
+ break;
+ }
+
+ return matches;
+}
+
+static struct net_device *ipoib_get_net_dev_by_params(
+ struct ib_device *dev, u8 port, u16 pkey,
+ const union ib_gid *gid, const struct sockaddr *addr,
+ void *client_data)
+{
+ struct net_device *net_dev;
+ struct list_head *dev_list = client_data;
+ u16 pkey_index;
+ int matches;
+ int ret;
+
+ if (!rdma_protocol_ib(dev, port))
+ return NULL;
+
+ ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
+ if (ret)
+ return NULL;
+
+ if (!dev_list)
+ return NULL;
+
+ /* See if we can find a unique device matching the L2 parameters */
+ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
+ gid, NULL, &net_dev);
+
+ switch (matches) {
+ case 0:
+ return NULL;
+ case 1:
+ return net_dev;
+ }
+
+ dev_put(net_dev);
+
+ /* Couldn't find a unique device with L2 parameters only. Use L3
+ * address to uniquely match the net device */
+ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
+ gid, addr, &net_dev);
+ switch (matches) {
+ case 0:
+ return NULL;
+ default:
+ dev_warn_ratelimited(&dev->dev,
+ "duplicate IP address detected\n");
+ /* Fall through */
+ case 1:
+ return net_dev;
+ }
+}
+
int ipoib_set_mode(struct net_device *dev, const char *buf)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -232,6 +459,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
ipoib_warn(priv, "enabling connected mode "
"will cause multicast packet drops\n");
netdev_update_features(dev);
+ dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
rtnl_unlock();
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
@@ -921,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long dt;
unsigned long flags;
int i;
+ LIST_HEAD(remove_list);
+ struct ipoib_mcast *mcast, *tmcast;
+ struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
@@ -948,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
+ u8 *mgid = neigh->daddr + 4;
+
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ mcast = __ipoib_mcast_find(dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, &remove_list);
+ }
+ }
+
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
@@ -963,6 +1207,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ ipoib_mcast_leave(dev, mcast);
}
static void ipoib_reap_neigh(struct work_struct *work)
@@ -1577,7 +1823,8 @@ static struct net_device *ipoib_add_port(const char *format,
SET_NETDEV_DEV(priv->dev, hca->dma_device);
priv->dev->dev_id = port - 1;
- if (!ib_query_port(hca, port, &attr))
+ result = ib_query_port(hca, port, &attr);
+ if (!result)
priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
else {
printk(KERN_WARNING "%s: ib_query_port %d failed\n",
@@ -1598,7 +1845,8 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
}
- if (ipoib_set_dev_features(priv, hca))
+ result = ipoib_set_dev_features(priv, hca);
+ if (result)
goto device_init_failed;
/*
@@ -1684,7 +1932,7 @@ static void ipoib_add_one(struct ib_device *device)
struct list_head *dev_list;
struct net_device *dev;
struct ipoib_dev_priv *priv;
- int s, e, p;
+ int p;
int count = 0;
dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
@@ -1693,15 +1941,7 @@ static void ipoib_add_one(struct ib_device *device)
INIT_LIST_HEAD(dev_list);
- if (device->node_type == RDMA_NODE_IB_SWITCH) {
- s = 0;
- e = 0;
- } else {
- s = 1;
- e = device->phys_port_cnt;
- }
-
- for (p = s; p <= e; ++p) {
+ for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
if (!rdma_protocol_ib(device, p))
continue;
dev = ipoib_add_port("ib%d", device, p);
@@ -1720,12 +1960,11 @@ static void ipoib_add_one(struct ib_device *device)
ib_set_client_data(device, &ipoib_client, dev_list);
}
-static void ipoib_remove_one(struct ib_device *device)
+static void ipoib_remove_one(struct ib_device *device, void *client_data)
{
struct ipoib_dev_priv *priv, *tmp;
- struct list_head *dev_list;
+ struct list_head *dev_list = client_data;
- dev_list = ib_get_client_data(device, &ipoib_client);
if (!dev_list)
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 0d23e0568deb..136cbefe00f8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
-static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
@@ -393,8 +393,13 @@ static int ipoib_mcast_join_complete(int status,
goto out_locked;
}
} else {
- if (mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EAGAIN) {
+ bool silent_fail =
+ test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+ status == -EINVAL;
+
+ if (mcast->logcount < 20) {
+ if (status == -ETIMEDOUT || status == -EAGAIN ||
+ silent_fail) {
ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
mcast->mcmember.mgid.raw, status);
@@ -403,6 +408,9 @@ static int ipoib_mcast_join_complete(int status,
test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
mcast->mcmember.mgid.raw, status);
}
+
+ if (!silent_fail)
+ mcast->logcount++;
}
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
@@ -448,8 +456,7 @@ out_locked:
return status;
}
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
- int create)
+static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_sa_multicast *multicast;
@@ -471,7 +478,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
IB_SA_MCMEMBER_REC_PKEY |
IB_SA_MCMEMBER_REC_JOIN_STATE;
- if (create) {
+ if (mcast != priv->broadcast) {
+ /*
+ * RFC 4391:
+ * The MGID MUST use the same P_Key, Q_Key, SL, MTU,
+ * and HopLimit as those used in the broadcast-GID. The rest
+ * of attributes SHOULD follow the values used in the
+ * broadcast-GID as well.
+ */
comp_mask |=
IB_SA_MCMEMBER_REC_QKEY |
IB_SA_MCMEMBER_REC_MTU_SELECTOR |
@@ -492,6 +506,24 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.sl = priv->broadcast->mcmember.sl;
rec.flow_label = priv->broadcast->mcmember.flow_label;
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
+
+ /*
+ * Send-only IB Multicast joins do not work at the core
+ * IB layer yet, so we can't use them here. However,
+ * we are emulating an Ethernet multicast send, which
+ * does not require a multicast subscription and will
+ * still send properly. The most appropriate thing to
+ * do is to create the group if it doesn't exist as that
+ * most closely emulates the behavior, from a user space
+ * application perspecitive, of Ethernet multicast
+ * operation. For now, we do a full join, maybe later
+ * when the core IB layers support send only joins we
+ * will use them.
+ */
+#if 0
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+ rec.join_state = 4;
+#endif
}
multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
@@ -517,7 +549,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
struct ib_port_attr port_attr;
unsigned long delay_until = 0;
struct ipoib_mcast *mcast = NULL;
- int create = 1;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
@@ -566,7 +597,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
mcast = priv->broadcast;
- create = 0;
if (mcast->backoff > 1 &&
time_before(jiffies, mcast->delay_until)) {
delay_until = mcast->delay_until;
@@ -590,12 +620,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
/* Found the next unjoined group */
init_completion(&mcast->done);
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- create = 0;
- else
- create = 1;
spin_unlock_irq(&priv->lock);
- ipoib_mcast_join(dev, mcast, create);
+ ipoib_mcast_join(dev, mcast);
spin_lock_irq(&priv->lock);
} else if (!delay_until ||
time_before(mcast->delay_until, delay_until))
@@ -618,7 +644,7 @@ out:
}
spin_unlock_irq(&priv->lock);
if (mcast)
- ipoib_mcast_join(dev, mcast, create);
+ ipoib_mcast_join(dev, mcast);
}
int ipoib_mcast_start_thread(struct net_device *dev)
@@ -651,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
-static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 9e6ee82a8fd7..78845b6e8b81 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -152,12 +152,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
return -ENODEV;
}
- priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(priv->mr)) {
- printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
- goto out_free_pd;
- }
-
/*
* the various IPoIB tasks assume they will never race against
* themselves, so always use a single thread workqueue
@@ -165,7 +159,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->wq = create_singlethread_workqueue("ipoib_wq");
if (!priv->wq) {
printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
- goto out_free_mr;
+ goto out_free_pd;
}
size = ipoib_recvq_size + 1;
@@ -177,7 +171,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
- goto out_free_wq;
+ if (ret != -ENOSYS)
+ goto out_free_wq;
cq_attr.cqe = size;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
@@ -224,13 +219,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
- priv->tx_sge[i].lkey = priv->mr->lkey;
+ priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
priv->tx_wr.opcode = IB_WR_SEND;
priv->tx_wr.sg_list = priv->tx_sge;
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
- priv->rx_sge[0].lkey = priv->mr->lkey;
+ priv->rx_sge[0].lkey = priv->pd->local_dma_lkey;
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
@@ -253,9 +248,6 @@ out_free_wq:
destroy_workqueue(priv->wq);
priv->wq = NULL;
-out_free_mr:
- ib_dereg_mr(priv->mr);
-
out_free_pd:
ib_dealloc_pd(priv->pd);
@@ -288,12 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
priv->wq = NULL;
}
- if (ib_dereg_mr(priv->mr))
- ipoib_warn(priv, "ib_dereg_mr failed\n");
-
- if (ib_dealloc_pd(priv->pd))
- ipoib_warn(priv, "ib_dealloc_pd failed\n");
-
+ ib_dealloc_pd(priv->pd);
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 6a594aac2290..f58ff96b6cbb 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -74,34 +74,42 @@
#include "iscsi_iser.h"
+MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
+MODULE_VERSION(DRV_VER);
+
static struct scsi_host_template iscsi_iser_sht;
static struct iscsi_transport iscsi_iser_transport;
static struct scsi_transport_template *iscsi_iser_scsi_transport;
+static struct workqueue_struct *release_wq;
+struct iser_global ig;
+
+int iser_debug_level = 0;
+module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
+MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session (default:512");
-int iser_debug_level = 0;
-bool iser_pi_enable = false;
-int iser_pi_guard = 1;
-
-MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
-MODULE_VERSION(DRV_VER);
+unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
+module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024");
-module_param_named(debug_level, iser_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
+bool iser_always_reg = true;
+module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
+MODULE_PARM_DESC(always_register,
+ "Always register memory, even for continuous memory regions (default:true)");
-module_param_named(pi_enable, iser_pi_enable, bool, 0644);
+bool iser_pi_enable = false;
+module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
-module_param_named(pi_guard, iser_pi_guard, int, 0644);
+int iser_pi_guard;
+module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
-static struct workqueue_struct *release_wq;
-struct iser_global ig;
-
/*
* iscsi_iser_recv() - Process a successfull recv completion
* @conn: iscsi connection
@@ -201,10 +209,12 @@ iser_initialize_task_headers(struct iscsi_task *task,
goto out;
}
+ tx_desc->wr_idx = 0;
+ tx_desc->mapped = true;
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
- tx_desc->tx_sg[0].lkey = device->mr->lkey;
+ tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey;
iser_task->iser_conn = iser_conn;
out:
@@ -360,16 +370,19 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
static void iscsi_iser_cleanup_task(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_tx_desc *tx_desc = &iser_task->desc;
- struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct iser_conn *iser_conn = task->conn->dd_data;
struct iser_device *device = iser_conn->ib_conn.device;
/* DEVICE_REMOVAL event might have already released the device */
if (!device)
return;
- ib_dma_unmap_single(device->ib_device,
- tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ if (likely(tx_desc->mapped)) {
+ ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ tx_desc->mapped = false;
+ }
/* mgmt tasks do not need special cleanup */
if (!task->sc)
@@ -622,6 +635,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
if (ep) {
iser_conn = ep->dd_data;
max_cmds = iser_conn->max_cmds;
+ shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
+ shost->max_sectors = iser_conn->scsi_max_sectors;
mutex_lock(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_UP) {
@@ -640,6 +655,15 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
SHOST_DIX_GUARD_CRC);
}
+ /*
+ * Limit the sg_tablesize and max_sectors based on the device
+ * max fastreg page list length.
+ */
+ shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
+ ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+ shost->max_sectors = min_t(unsigned int,
+ 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
+
if (iscsi_host_add(shost,
ib_conn->device->ib_device->dma_device)) {
mutex_unlock(&iser_conn->state_mutex);
@@ -742,15 +766,9 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
- stats->custom_length = 4;
- strcpy(stats->custom[0].desc, "qp_tx_queue_full");
- stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */
- strcpy(stats->custom[1].desc, "fmr_map_not_avail");
- stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */;
- strcpy(stats->custom[2].desc, "eh_abort_cnt");
- stats->custom[2].value = conn->eh_abort_cnt;
- strcpy(stats->custom[3].desc, "fmr_unalign_cnt");
- stats->custom[3].value = conn->fmr_unalign_cnt;
+ stats->custom_length = 1;
+ strcpy(stats->custom[0].desc, "fmr_unalign_cnt");
+ stats->custom[0].value = conn->fmr_unalign_cnt;
}
static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
@@ -839,10 +857,9 @@ failure:
static int
iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
{
- struct iser_conn *iser_conn;
+ struct iser_conn *iser_conn = ep->dd_data;
int rc;
- iser_conn = ep->dd_data;
rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
msecs_to_jiffies(timeout_ms));
/* if conn establishment failed, return error code to iscsi */
@@ -854,7 +871,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
mutex_unlock(&iser_conn->state_mutex);
}
- iser_info("ib conn %p rc = %d\n", iser_conn, rc);
+ iser_info("iser conn %p rc = %d\n", iser_conn, rc);
if (rc > 0)
return 1; /* success, this is the equivalent of POLLOUT */
@@ -876,11 +893,9 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
static void
iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
{
- struct iser_conn *iser_conn;
+ struct iser_conn *iser_conn = ep->dd_data;
- iser_conn = ep->dd_data;
- iser_info("ep %p iser conn %p state %d\n",
- ep, iser_conn, iser_conn->state);
+ iser_info("ep %p iser conn %p\n", ep, iser_conn);
mutex_lock(&iser_conn->state_mutex);
iser_conn_terminate(iser_conn);
@@ -900,6 +915,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
mutex_unlock(&iser_conn->state_mutex);
iser_conn_release(iser_conn);
}
+
iscsi_destroy_endpoint(ep);
}
@@ -962,8 +978,8 @@ static struct scsi_host_template iscsi_iser_sht = {
.name = "iSCSI Initiator over iSER",
.queuecommand = iscsi_queuecommand,
.change_queue_depth = scsi_change_queue_depth,
- .sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
- .max_sectors = 1024,
+ .sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
+ .max_sectors = ISER_DEF_MAX_SECTORS,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
@@ -1074,7 +1090,7 @@ static void __exit iser_exit(void)
if (!connlist_empty) {
iser_err("Error cleanup stage completed but we still have iser "
- "connections, destroying them anyway.\n");
+ "connections, destroying them anyway\n");
list_for_each_entry_safe(iser_conn, n, &ig.connlist,
conn_list) {
iser_conn_release(iser_conn);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 262ba1f8ee50..a5edd6ede692 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -98,8 +98,13 @@
#define SHIFT_4K 12
#define SIZE_4K (1ULL << SHIFT_4K)
#define MASK_4K (~(SIZE_4K-1))
- /* support up to 512KB in one RDMA */
-#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
+
+/* Default support is 512KB I/O size */
+#define ISER_DEF_MAX_SECTORS 1024
+#define ISCSI_ISER_DEF_SG_TABLESIZE ((ISER_DEF_MAX_SECTORS * 512) >> SHIFT_4K)
+/* Maximum support is 8MB I/O size */
+#define ISCSI_ISER_MAX_SG_TABLESIZE ((16384 * 512) >> SHIFT_4K)
+
#define ISER_DEF_XMIT_CMDS_DEFAULT 512
#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
#define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
@@ -239,6 +244,7 @@ struct iser_data_buf {
struct iser_device;
struct iscsi_iser_task;
struct iscsi_endpoint;
+struct iser_reg_resources;
/**
* struct iser_mem_reg - iSER memory registration info
@@ -259,6 +265,14 @@ enum iser_desc_type {
ISCSI_TX_DATAOUT
};
+/* Maximum number of work requests per task:
+ * Data memory region local invalidate + fast registration
+ * Protection memory region local invalidate + fast registration
+ * Signature memory region local invalidate + fast registration
+ * PDU send
+ */
+#define ISER_MAX_WRS 7
+
/**
* struct iser_tx_desc - iSER TX descriptor (for send wr_id)
*
@@ -270,6 +284,12 @@ enum iser_desc_type {
* sg[1] optionally points to either of immediate data
* unsolicited data-out or control
* @num_sge: number sges used on this TX task
+ * @mapped: Is the task header mapped
+ * @wr_idx: Current WR index
+ * @wrs: Array of WRs per task
+ * @data_reg: Data buffer registration details
+ * @prot_reg: Protection buffer registration details
+ * @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
struct iser_hdr iser_header;
@@ -278,6 +298,12 @@ struct iser_tx_desc {
u64 dma_addr;
struct ib_sge tx_sg[2];
int num_sge;
+ bool mapped;
+ u8 wr_idx;
+ struct ib_send_wr wrs[ISER_MAX_WRS];
+ struct iser_mem_reg data_reg;
+ struct iser_mem_reg prot_reg;
+ struct ib_sig_attrs sig_attrs;
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
@@ -324,6 +350,33 @@ struct iser_comp {
};
/**
+ * struct iser_device - Memory registration operations
+ * per-device registration schemes
+ *
+ * @alloc_reg_res: Allocate registration resources
+ * @free_reg_res: Free registration resources
+ * @fast_reg_mem: Register memory buffers
+ * @unreg_mem: Un-register memory buffers
+ * @reg_desc_get: Get a registration descriptor for pool
+ * @reg_desc_put: Get a registration descriptor to pool
+ */
+struct iser_reg_ops {
+ int (*alloc_reg_res)(struct ib_conn *ib_conn,
+ unsigned cmds_max,
+ unsigned int size);
+ void (*free_reg_res)(struct ib_conn *ib_conn);
+ int (*reg_mem)(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_reg_resources *rsc,
+ struct iser_mem_reg *reg);
+ void (*unreg_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+ struct iser_fr_desc * (*reg_desc_get)(struct ib_conn *ib_conn);
+ void (*reg_desc_put)(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc);
+};
+
+/**
* struct iser_device - iSER device handle
*
* @ib_device: RDMA device
@@ -336,11 +389,7 @@ struct iser_comp {
* @comps_used: Number of completion contexts used, Min between online
* cpus and device max completion vectors
* @comps: Dinamically allocated array of completion handlers
- * Memory registration pool Function pointers (FMR or Fastreg):
- * @iser_alloc_rdma_reg_res: Allocation of memory regions pool
- * @iser_free_rdma_reg_res: Free of memory regions pool
- * @iser_reg_rdma_mem: Memory registration routine
- * @iser_unreg_rdma_mem: Memory deregistration routine
+ * @reg_ops: Registration ops
*/
struct iser_device {
struct ib_device *ib_device;
@@ -352,54 +401,73 @@ struct iser_device {
int refcount;
int comps_used;
struct iser_comp *comps;
- int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
- unsigned cmds_max);
- void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
- int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir);
- void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir);
+ struct iser_reg_ops *reg_ops;
};
#define ISER_CHECK_GUARD 0xc0
#define ISER_CHECK_REFTAG 0x0f
#define ISER_CHECK_APPTAG 0x30
-enum iser_reg_indicator {
- ISER_DATA_KEY_VALID = 1 << 0,
- ISER_PROT_KEY_VALID = 1 << 1,
- ISER_SIG_KEY_VALID = 1 << 2,
- ISER_FASTREG_PROTECTED = 1 << 3,
+/**
+ * struct iser_reg_resources - Fast registration recources
+ *
+ * @mr: memory region
+ * @fmr_pool: pool of fmrs
+ * @frpl: fast reg page list used by frwrs
+ * @page_vec: fast reg page list used by fmr pool
+ * @mr_valid: is mr valid indicator
+ */
+struct iser_reg_resources {
+ union {
+ struct ib_mr *mr;
+ struct ib_fmr_pool *fmr_pool;
+ };
+ union {
+ struct ib_fast_reg_page_list *frpl;
+ struct iser_page_vec *page_vec;
+ };
+ u8 mr_valid:1;
};
/**
* struct iser_pi_context - Protection information context
*
- * @prot_mr: protection memory region
- * @prot_frpl: protection fastreg page list
- * @sig_mr: signature feature enabled memory region
+ * @rsc: protection buffer registration resources
+ * @sig_mr: signature enable memory region
+ * @sig_mr_valid: is sig_mr valid indicator
+ * @sig_protected: is region protected indicator
*/
struct iser_pi_context {
- struct ib_mr *prot_mr;
- struct ib_fast_reg_page_list *prot_frpl;
+ struct iser_reg_resources rsc;
struct ib_mr *sig_mr;
+ u8 sig_mr_valid:1;
+ u8 sig_protected:1;
};
/**
- * struct fast_reg_descriptor - Fast registration descriptor
+ * struct iser_fr_desc - Fast registration descriptor
*
* @list: entry in connection fastreg pool
- * @data_mr: data memory region
- * @data_frpl: data fastreg page list
+ * @rsc: data buffer registration resources
* @pi_ctx: protection information context
- * @reg_indicators: fast registration indicators
*/
-struct fast_reg_descriptor {
+struct iser_fr_desc {
struct list_head list;
- struct ib_mr *data_mr;
- struct ib_fast_reg_page_list *data_frpl;
+ struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
- u8 reg_indicators;
+};
+
+/**
+ * struct iser_fr_pool: connection fast registration pool
+ *
+ * @list: list of fastreg descriptors
+ * @lock: protects fmr/fastreg pool
+ * @size: size of the pool
+ */
+struct iser_fr_pool {
+ struct list_head list;
+ spinlock_t lock;
+ int size;
};
/**
@@ -415,15 +483,7 @@ struct fast_reg_descriptor {
* @pi_support: Indicate device T10-PI support
* @beacon: beacon send wr to signal all flush errors were drained
* @flush_comp: completes when all connection completions consumed
- * @lock: protects fmr/fastreg pool
- * @union.fmr:
- * @pool: FMR pool for fast registrations
- * @page_vec: page vector to hold mapped commands pages
- * used for registration
- * @union.fastreg:
- * @pool: Fast registration descriptors pool for fast
- * registrations
- * @pool_size: Size of pool
+ * @fr_pool: connection fast registration poool
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
@@ -436,17 +496,7 @@ struct ib_conn {
bool pi_support;
struct ib_send_wr beacon;
struct completion flush_comp;
- spinlock_t lock;
- union {
- struct {
- struct ib_fmr_pool *pool;
- struct iser_page_vec *page_vec;
- } fmr;
- struct {
- struct list_head pool;
- int pool_size;
- } fastreg;
- };
+ struct iser_fr_pool fr_pool;
};
/**
@@ -477,6 +527,8 @@ struct ib_conn {
* @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
+ * @scsi_sg_tablesize: scsi host sg_tablesize
+ * @scsi_max_sectors: scsi host max sectors
*/
struct iser_conn {
struct ib_conn ib_conn;
@@ -501,6 +553,8 @@ struct iser_conn {
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
+ unsigned short scsi_sg_tablesize;
+ unsigned int scsi_max_sectors;
};
/**
@@ -556,6 +610,10 @@ extern struct iser_global ig;
extern int iser_debug_level;
extern bool iser_pi_enable;
extern int iser_pi_guard;
+extern unsigned int iser_max_sectors;
+extern bool iser_always_reg;
+
+int iser_assign_reg_ops(struct iser_device *device);
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task);
@@ -597,10 +655,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem(struct iscsi_iser_task *task,
+ enum iser_data_dir dir);
+void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
+ enum iser_data_dir dir);
int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *src_addr,
@@ -630,15 +688,40 @@ int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
struct iscsi_session *session);
-int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+int iser_alloc_fmr_pool(struct ib_conn *ib_conn,
+ unsigned cmds_max,
+ unsigned int size);
void iser_free_fmr_pool(struct ib_conn *ib_conn);
-int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
+ unsigned cmds_max,
+ unsigned int size);
void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector);
-struct fast_reg_descriptor *
-iser_reg_desc_get(struct ib_conn *ib_conn);
+struct iser_fr_desc *
+iser_reg_desc_get_fr(struct ib_conn *ib_conn);
void
-iser_reg_desc_put(struct ib_conn *ib_conn,
- struct fast_reg_descriptor *desc);
+iser_reg_desc_put_fr(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc);
+struct iser_fr_desc *
+iser_reg_desc_get_fmr(struct ib_conn *ib_conn);
+void
+iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc);
+
+static inline struct ib_send_wr *
+iser_tx_next_wr(struct iser_tx_desc *tx_desc)
+{
+ struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx];
+ struct ib_send_wr *last_wr;
+
+ if (tx_desc->wr_idx) {
+ last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1];
+ last_wr->next = cur_wr;
+ }
+ tx_desc->wr_idx++;
+
+ return cur_wr;
+}
+
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 3e2118e8ed87..d511879d8cdf 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,7 +49,6 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -73,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
return err;
}
- err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -103,7 +102,6 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -128,7 +126,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
- err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -170,13 +168,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
tx_desc->iser_header.flags = ISER_VER;
-
tx_desc->num_sge = 1;
-
- if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
- tx_desc->tx_sg[0].lkey = device->mr->lkey;
- iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
- }
}
static void iser_free_login_buf(struct iser_conn *iser_conn)
@@ -266,7 +258,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
- if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
+ if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max,
+ iser_conn->scsi_sg_tablesize))
goto create_rdma_reg_res_failed;
if (iser_alloc_login_buf(iser_conn))
@@ -291,7 +284,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
rx_sg = &rx_desc->rx_sg;
rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->mr->lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
iser_conn->rx_desc_head = 0;
@@ -307,7 +300,7 @@ rx_desc_dma_map_failed:
rx_desc_alloc_fail:
iser_free_login_buf(iser_conn);
alloc_login_buf_fail:
- device->iser_free_rdma_reg_res(ib_conn);
+ device->reg_ops->free_reg_res(ib_conn);
create_rdma_reg_res_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
@@ -320,8 +313,8 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn)
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- if (device->iser_free_rdma_reg_res)
- device->iser_free_rdma_reg_res(ib_conn);
+ if (device->reg_ops->free_reg_res)
+ device->reg_ops->free_reg_res(ib_conn);
rx_desc = iser_conn->rx_descs;
for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
@@ -454,7 +447,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
unsigned long buf_offset;
unsigned long data_seg_len;
uint32_t itt;
- int err = 0;
+ int err;
struct ib_sge *tx_dsg;
itt = (__force uint32_t)hdr->itt;
@@ -475,7 +468,9 @@ int iser_send_data_out(struct iscsi_conn *conn,
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
/* build the tx desc */
- iser_initialize_task_headers(task, tx_desc);
+ err = iser_initialize_task_headers(task, tx_desc);
+ if (err)
+ goto send_data_out_error;
mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1];
@@ -502,7 +497,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
send_data_out_error:
kmem_cache_free(ig.desc_cache, tx_desc);
- iser_err("conn %p failed err %d\n",conn, err);
+ iser_err("conn %p failed err %d\n", conn, err);
return err;
}
@@ -543,7 +538,7 @@ int iser_send_control(struct iscsi_conn *conn,
tx_dsg->addr = iser_conn->login_req_dma;
tx_dsg->length = task->data_count;
- tx_dsg->lkey = device->mr->lkey;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
mdesc->num_sge = 2;
}
@@ -666,7 +661,6 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- struct iser_device *device = iser_task->iser_conn->ib_conn.device;
int is_rdma_data_aligned = 1;
int is_rdma_prot_aligned = 1;
int prot_count = scsi_prot_sg_count(iser_task->sc);
@@ -703,7 +697,7 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
}
if (iser_task->dir[ISER_DIR_IN]) {
- device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
+ iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
if (is_rdma_data_aligned)
iser_dma_unmap_task_data(iser_task,
&iser_task->data[ISER_DIR_IN],
@@ -715,7 +709,7 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
}
if (iser_task->dir[ISER_DIR_OUT]) {
- device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
+ iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
if (is_rdma_data_aligned)
iser_dma_unmap_task_data(iser_task,
&iser_task->data[ISER_DIR_OUT],
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index f0cdc961eb11..4c46d67d37a1 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -38,6 +38,55 @@
#include <linux/scatterlist.h>
#include "iscsi_iser.h"
+static
+int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_reg_resources *rsc,
+ struct iser_mem_reg *mem_reg);
+static
+int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_reg_resources *rsc,
+ struct iser_mem_reg *mem_reg);
+
+static struct iser_reg_ops fastreg_ops = {
+ .alloc_reg_res = iser_alloc_fastreg_pool,
+ .free_reg_res = iser_free_fastreg_pool,
+ .reg_mem = iser_fast_reg_mr,
+ .unreg_mem = iser_unreg_mem_fastreg,
+ .reg_desc_get = iser_reg_desc_get_fr,
+ .reg_desc_put = iser_reg_desc_put_fr,
+};
+
+static struct iser_reg_ops fmr_ops = {
+ .alloc_reg_res = iser_alloc_fmr_pool,
+ .free_reg_res = iser_free_fmr_pool,
+ .reg_mem = iser_fast_reg_fmr,
+ .unreg_mem = iser_unreg_mem_fmr,
+ .reg_desc_get = iser_reg_desc_get_fmr,
+ .reg_desc_put = iser_reg_desc_put_fmr,
+};
+
+int iser_assign_reg_ops(struct iser_device *device)
+{
+ struct ib_device_attr *dev_attr = &device->dev_attr;
+
+ /* Assign function handles - based on FMR support */
+ if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+ device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ iser_info("FMR supported, using FMR for registration\n");
+ device->reg_ops = &fmr_ops;
+ } else
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ iser_info("FastReg supported, using FastReg for registration\n");
+ device->reg_ops = &fastreg_ops;
+ } else {
+ iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
+ return -1;
+ }
+
+ return 0;
+}
static void
iser_free_bounce_sg(struct iser_data_buf *data)
@@ -146,30 +195,47 @@ iser_copy_to_bounce(struct iser_data_buf *data)
iser_copy_bounce(data, true);
}
-struct fast_reg_descriptor *
-iser_reg_desc_get(struct ib_conn *ib_conn)
+struct iser_fr_desc *
+iser_reg_desc_get_fr(struct ib_conn *ib_conn)
{
- struct fast_reg_descriptor *desc;
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+ struct iser_fr_desc *desc;
unsigned long flags;
- spin_lock_irqsave(&ib_conn->lock, flags);
- desc = list_first_entry(&ib_conn->fastreg.pool,
- struct fast_reg_descriptor, list);
+ spin_lock_irqsave(&fr_pool->lock, flags);
+ desc = list_first_entry(&fr_pool->list,
+ struct iser_fr_desc, list);
list_del(&desc->list);
- spin_unlock_irqrestore(&ib_conn->lock, flags);
+ spin_unlock_irqrestore(&fr_pool->lock, flags);
return desc;
}
void
-iser_reg_desc_put(struct ib_conn *ib_conn,
- struct fast_reg_descriptor *desc)
+iser_reg_desc_put_fr(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc)
{
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
unsigned long flags;
- spin_lock_irqsave(&ib_conn->lock, flags);
- list_add(&desc->list, &ib_conn->fastreg.pool);
- spin_unlock_irqrestore(&ib_conn->lock, flags);
+ spin_lock_irqsave(&fr_pool->lock, flags);
+ list_add(&desc->list, &fr_pool->list);
+ spin_unlock_irqrestore(&fr_pool->lock, flags);
+}
+
+struct iser_fr_desc *
+iser_reg_desc_get_fmr(struct ib_conn *ib_conn)
+{
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+
+ return list_first_entry(&fr_pool->list,
+ struct iser_fr_desc, list);
+}
+
+void
+iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
+ struct iser_fr_desc *desc)
+{
}
/**
@@ -297,7 +363,8 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
* consecutive SG elements are actually fragments of the same physcial page.
*/
static int iser_data_buf_aligned_len(struct iser_data_buf *data,
- struct ib_device *ibdev)
+ struct ib_device *ibdev,
+ unsigned sg_tablesize)
{
struct scatterlist *sg, *sgl, *next_sg = NULL;
u64 start_addr, end_addr;
@@ -309,6 +376,14 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
sgl = data->sg;
start_addr = ib_sg_dma_address(ibdev, sgl);
+ if (unlikely(sgl[0].offset &&
+ data->data_len >= sg_tablesize * PAGE_SIZE)) {
+ iser_dbg("can't register length %lx with offset %x "
+ "fall to bounce buffer\n", data->data_len,
+ sgl[0].offset);
+ return 0;
+ }
+
for_each_sg(sgl, sg, data->dma_nents, i) {
if (start_check && !IS_4K_ALIGNED(start_addr))
break;
@@ -330,8 +405,11 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
break;
}
ret_len = (next_sg) ? i : i+1;
- iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
- ret_len, data->dma_nents, data);
+
+ if (unlikely(ret_len != data->dma_nents))
+ iser_warn("rdma alignment violation (%d/%d aligned)\n",
+ ret_len, data->dma_nents);
+
return ret_len;
}
@@ -393,7 +471,7 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
{
struct scatterlist *sg = mem->sg;
- reg->sge.lkey = device->mr->lkey;
+ reg->sge.lkey = device->pd->local_dma_lkey;
reg->rkey = device->mr->rkey;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
@@ -407,15 +485,12 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
- enum iser_data_dir cmd_dir,
- int aligned_len)
+ enum iser_data_dir cmd_dir)
{
struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
iscsi_conn->fmr_unalign_cnt++;
- iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
if (iser_debug_level > 0)
iser_data_buf_dump(mem, device->ib_device);
@@ -439,13 +514,15 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
* returns: 0 on success, errno code on failure
*/
static
-int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
+int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
- struct iser_page_vec *page_vec,
- struct iser_mem_reg *mem_reg)
+ struct iser_reg_resources *rsc,
+ struct iser_mem_reg *reg)
{
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
+ struct iser_page_vec *page_vec = rsc->page_vec;
+ struct ib_fmr_pool *fmr_pool = rsc->fmr_pool;
struct ib_pool_fmr *fmr;
int ret, plen;
@@ -461,7 +538,7 @@ int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
return -EINVAL;
}
- fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
+ fmr = ib_fmr_pool_map_phys(fmr_pool,
page_vec->pages,
page_vec->length,
page_vec->pages[0]);
@@ -471,11 +548,15 @@ int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
return ret;
}
- mem_reg->sge.lkey = fmr->fmr->lkey;
- mem_reg->rkey = fmr->fmr->rkey;
- mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
- mem_reg->sge.length = page_vec->data_size;
- mem_reg->mem_h = fmr;
+ reg->sge.lkey = fmr->fmr->lkey;
+ reg->rkey = fmr->fmr->rkey;
+ reg->sge.addr = page_vec->pages[0] + page_vec->offset;
+ reg->sge.length = page_vec->data_size;
+ reg->mem_h = fmr;
+
+ iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
+ " length=0x%x\n", reg->sge.lkey, reg->rkey,
+ reg->sge.addr, reg->sge.length);
return 0;
}
@@ -505,71 +586,17 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
+ struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
if (!reg->mem_h)
return;
- iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
- reg->mem_h);
+ device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn,
+ reg->mem_h);
reg->mem_h = NULL;
}
-/**
- * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
- * using FMR (if possible) obtaining rkey and va
- *
- * returns 0 on success, errno code on failure
- */
-int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
-{
- struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
- struct iser_device *device = ib_conn->device;
- struct ib_device *ibdev = device->ib_device;
- struct iser_data_buf *mem = &iser_task->data[cmd_dir];
- struct iser_mem_reg *mem_reg;
- int aligned_len;
- int err;
- int i;
-
- mem_reg = &iser_task->rdma_reg[cmd_dir];
-
- aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, mem,
- cmd_dir, aligned_len);
- if (err) {
- iser_err("failed to allocate bounce buffer\n");
- return err;
- }
- }
-
- /* if there a single dma entry, FMR is not needed */
- if (mem->dma_nents == 1) {
- return iser_reg_dma(device, mem, mem_reg);
- } else { /* use FMR for multiple dma entries */
- err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
- mem_reg);
- if (err && err != -EAGAIN) {
- iser_data_buf_dump(mem, ibdev);
- iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
- mem->dma_nents,
- ntoh24(iser_task->desc.iscsi_header.dlength));
- iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->fmr.page_vec->data_size,
- ib_conn->fmr.page_vec->length,
- ib_conn->fmr.page_vec->offset);
- for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
- iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
- }
- if (err)
- return err;
- }
- return 0;
-}
-
static void
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
struct ib_sig_domain *domain)
@@ -637,10 +664,11 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
{
u32 rkey;
- memset(inv_wr, 0, sizeof(*inv_wr));
inv_wr->opcode = IB_WR_LOCAL_INV;
inv_wr->wr_id = ISER_FASTREG_LI_WRID;
inv_wr->ex.invalidate_rkey = mr->rkey;
+ inv_wr->send_flags = 0;
+ inv_wr->num_sge = 0;
rkey = ib_inc_rkey(mr->rkey);
ib_update_fast_reg_key(mr, rkey);
@@ -648,61 +676,51 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
static int
iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
- struct fast_reg_descriptor *desc,
+ struct iser_pi_context *pi_ctx,
struct iser_mem_reg *data_reg,
struct iser_mem_reg *prot_reg,
struct iser_mem_reg *sig_reg)
{
- struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
- struct iser_pi_context *pi_ctx = desc->pi_ctx;
- struct ib_send_wr sig_wr, inv_wr;
- struct ib_send_wr *bad_wr, *wr = NULL;
- struct ib_sig_attrs sig_attrs;
+ struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+ struct ib_send_wr *wr;
int ret;
- memset(&sig_attrs, 0, sizeof(sig_attrs));
- ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+ ret = iser_set_sig_attrs(iser_task->sc, sig_attrs);
if (ret)
goto err;
- iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
+ iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
- iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
- wr = &inv_wr;
+ if (!pi_ctx->sig_mr_valid) {
+ wr = iser_tx_next_wr(tx_desc);
+ iser_inv_rkey(wr, pi_ctx->sig_mr);
}
- memset(&sig_wr, 0, sizeof(sig_wr));
- sig_wr.opcode = IB_WR_REG_SIG_MR;
- sig_wr.wr_id = ISER_FASTREG_LI_WRID;
- sig_wr.sg_list = &data_reg->sge;
- sig_wr.num_sge = 1;
- sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
- sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+ wr = iser_tx_next_wr(tx_desc);
+ wr->opcode = IB_WR_REG_SIG_MR;
+ wr->wr_id = ISER_FASTREG_LI_WRID;
+ wr->sg_list = &data_reg->sge;
+ wr->num_sge = 1;
+ wr->send_flags = 0;
+ wr->wr.sig_handover.sig_attrs = sig_attrs;
+ wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr;
if (scsi_prot_sg_count(iser_task->sc))
- sig_wr.wr.sig_handover.prot = &prot_reg->sge;
- sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
-
- if (!wr)
- wr = &sig_wr;
+ wr->wr.sig_handover.prot = &prot_reg->sge;
else
- wr->next = &sig_wr;
-
- ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
- if (ret) {
- iser_err("reg_sig_mr failed, ret:%d\n", ret);
- goto err;
- }
- desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
+ wr->wr.sig_handover.prot = NULL;
+ wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
+ pi_ctx->sig_mr_valid = 0;
sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
sig_reg->rkey = pi_ctx->sig_mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
- iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+ iser_dbg("sig reg: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
sig_reg->sge.length);
err:
@@ -711,29 +729,16 @@ err:
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
- struct fast_reg_descriptor *desc,
- enum iser_reg_indicator ind,
+ struct iser_reg_resources *rsc,
struct iser_mem_reg *reg)
{
struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
- struct ib_mr *mr;
- struct ib_fast_reg_page_list *frpl;
- struct ib_send_wr fastreg_wr, inv_wr;
- struct ib_send_wr *bad_wr, *wr = NULL;
- int ret, offset, size, plen;
-
- /* if there a single dma entry, dma mr suffices */
- if (mem->dma_nents == 1)
- return iser_reg_dma(device, mem, reg);
-
- if (ind == ISER_DATA_KEY_VALID) {
- mr = desc->data_mr;
- frpl = desc->data_frpl;
- } else {
- mr = desc->pi_ctx->prot_mr;
- frpl = desc->pi_ctx->prot_frpl;
- }
+ struct ib_mr *mr = rsc->mr;
+ struct ib_fast_reg_page_list *frpl = rsc->frpl;
+ struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_send_wr *wr;
+ int offset, size, plen;
plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
&offset, &size);
@@ -742,118 +747,157 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
return -EINVAL;
}
- if (!(desc->reg_indicators & ind)) {
- iser_inv_rkey(&inv_wr, mr);
- wr = &inv_wr;
+ if (!rsc->mr_valid) {
+ wr = iser_tx_next_wr(tx_desc);
+ iser_inv_rkey(wr, mr);
}
- /* Prepare FASTREG WR */
- memset(&fastreg_wr, 0, sizeof(fastreg_wr));
- fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
- fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
- fastreg_wr.wr.fast_reg.page_list = frpl;
- fastreg_wr.wr.fast_reg.page_list_len = plen;
- fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
- fastreg_wr.wr.fast_reg.length = size;
- fastreg_wr.wr.fast_reg.rkey = mr->rkey;
- fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
-
- if (!wr)
- wr = &fastreg_wr;
- else
- wr->next = &fastreg_wr;
-
- ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
- if (ret) {
- iser_err("fast registration failed, ret:%d\n", ret);
- return ret;
- }
- desc->reg_indicators &= ~ind;
+ wr = iser_tx_next_wr(tx_desc);
+ wr->opcode = IB_WR_FAST_REG_MR;
+ wr->wr_id = ISER_FASTREG_LI_WRID;
+ wr->send_flags = 0;
+ wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset;
+ wr->wr.fast_reg.page_list = frpl;
+ wr->wr.fast_reg.page_list_len = plen;
+ wr->wr.fast_reg.page_shift = SHIFT_4K;
+ wr->wr.fast_reg.length = size;
+ wr->wr.fast_reg.rkey = mr->rkey;
+ wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+ rsc->mr_valid = 0;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
reg->sge.addr = frpl->page_list[0] + offset;
reg->sge.length = size;
- return ret;
+ iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
+ " length=0x%x\n", reg->sge.lkey, reg->rkey,
+ reg->sge.addr, reg->sge.length);
+
+ return 0;
}
-/**
- * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
- * using Fast Registration WR (if possible) obtaining rkey and va
- *
- * returns 0 on success, errno code on failure
- */
-int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+static int
+iser_handle_unaligned_buf(struct iscsi_iser_task *task,
+ struct iser_data_buf *mem,
+ enum iser_data_dir dir)
{
- struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
- struct iser_device *device = ib_conn->device;
- struct ib_device *ibdev = device->ib_device;
- struct iser_data_buf *mem = &iser_task->data[cmd_dir];
- struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
- struct fast_reg_descriptor *desc = NULL;
+ struct iser_conn *iser_conn = task->iser_conn;
+ struct iser_device *device = iser_conn->ib_conn.device;
int err, aligned_len;
- aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
+ iser_conn->scsi_sg_tablesize);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, mem,
- cmd_dir, aligned_len);
- if (err) {
- iser_err("failed to allocate bounce buffer\n");
+ err = fall_to_bounce_buf(task, mem, dir);
+ if (err)
return err;
- }
}
- if (mem->dma_nents != 1 ||
- scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
- desc = iser_reg_desc_get(ib_conn);
- mem_reg->mem_h = desc;
+ return 0;
+}
+
+static int
+iser_reg_prot_sg(struct iscsi_iser_task *task,
+ struct iser_data_buf *mem,
+ struct iser_fr_desc *desc,
+ bool use_dma_key,
+ struct iser_mem_reg *reg)
+{
+ struct iser_device *device = task->iser_conn->ib_conn.device;
+
+ if (use_dma_key)
+ return iser_reg_dma(device, mem, reg);
+
+ return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
+}
+
+static int
+iser_reg_data_sg(struct iscsi_iser_task *task,
+ struct iser_data_buf *mem,
+ struct iser_fr_desc *desc,
+ bool use_dma_key,
+ struct iser_mem_reg *reg)
+{
+ struct iser_device *device = task->iser_conn->ib_conn.device;
+
+ if (use_dma_key)
+ return iser_reg_dma(device, mem, reg);
+
+ return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
+}
+
+int iser_reg_rdma_mem(struct iscsi_iser_task *task,
+ enum iser_data_dir dir)
+{
+ struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct iser_data_buf *mem = &task->data[dir];
+ struct iser_mem_reg *reg = &task->rdma_reg[dir];
+ struct iser_mem_reg *data_reg;
+ struct iser_fr_desc *desc = NULL;
+ bool use_dma_key;
+ int err;
+
+ err = iser_handle_unaligned_buf(task, mem, dir);
+ if (unlikely(err))
+ return err;
+
+ use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+
+ if (!use_dma_key) {
+ desc = device->reg_ops->reg_desc_get(ib_conn);
+ reg->mem_h = desc;
}
- err = iser_fast_reg_mr(iser_task, mem, desc,
- ISER_DATA_KEY_VALID, mem_reg);
- if (err)
+ if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL)
+ data_reg = reg;
+ else
+ data_reg = &task->desc.data_reg;
+
+ err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
+ if (unlikely(err))
goto err_reg;
- if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
- struct iser_mem_reg prot_reg;
-
- memset(&prot_reg, 0, sizeof(prot_reg));
- if (scsi_prot_sg_count(iser_task->sc)) {
- mem = &iser_task->prot[cmd_dir];
- aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, mem,
- cmd_dir, aligned_len);
- if (err) {
- iser_err("failed to allocate bounce buffer\n");
- return err;
- }
- }
+ if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
+ struct iser_mem_reg *prot_reg = &task->desc.prot_reg;
- err = iser_fast_reg_mr(iser_task, mem, desc,
- ISER_PROT_KEY_VALID, &prot_reg);
- if (err)
+ if (scsi_prot_sg_count(task->sc)) {
+ mem = &task->prot[dir];
+ err = iser_handle_unaligned_buf(task, mem, dir);
+ if (unlikely(err))
goto err_reg;
- }
- err = iser_reg_sig_mr(iser_task, desc, mem_reg,
- &prot_reg, mem_reg);
- if (err) {
- iser_err("Failed to register signature mr\n");
- return err;
+ err = iser_reg_prot_sg(task, mem, desc,
+ use_dma_key, prot_reg);
+ if (unlikely(err))
+ goto err_reg;
}
- desc->reg_indicators |= ISER_FASTREG_PROTECTED;
+
+ err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg,
+ prot_reg, reg);
+ if (unlikely(err))
+ goto err_reg;
+
+ desc->pi_ctx->sig_protected = 1;
}
return 0;
+
err_reg:
if (desc)
- iser_reg_desc_put(ib_conn, desc);
+ device->reg_ops->reg_desc_put(ib_conn, desc);
return err;
}
+
+void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
+ enum iser_data_dir dir)
+{
+ struct iser_device *device = task->iser_conn->ib_conn.device;
+
+ device->reg_ops->unreg_mem(task, dir);
+}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 5c9f565ea0e8..85132d867bc8 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -87,25 +87,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
return ret;
}
- /* Assign function handles - based on FMR support */
- if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
- device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
- iser_info("FMR supported, using FMR for registration\n");
- device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
- device->iser_free_rdma_reg_res = iser_free_fmr_pool;
- device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
- device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
- } else
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
- iser_info("FastReg supported, using FastReg for registration\n");
- device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
- device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
- device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
- device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
- } else {
- iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
- return -1;
- }
+ ret = iser_assign_reg_ops(device);
+ if (ret)
+ return ret;
device->comps_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
@@ -149,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device)
(unsigned long)comp);
}
- device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
- if (IS_ERR(device->mr))
- goto dma_mr_err;
+ if (!iser_always_reg) {
+ int access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ device->mr = ib_get_dma_mr(device->pd, access);
+ if (IS_ERR(device->mr))
+ goto dma_mr_err;
+ }
INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
iser_event_handler);
@@ -163,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device)
return 0;
handler_err:
- ib_dereg_mr(device->mr);
+ if (device->mr)
+ ib_dereg_mr(device->mr);
dma_mr_err:
for (i = 0; i < device->comps_used; i++)
tasklet_kill(&device->comps[i].tasklet);
@@ -189,7 +178,6 @@ comps_err:
static void iser_free_device_ib_res(struct iser_device *device)
{
int i;
- BUG_ON(device->mr == NULL);
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
@@ -200,8 +188,9 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
(void)ib_unregister_event_handler(&device->event_handler);
- (void)ib_dereg_mr(device->mr);
- (void)ib_dealloc_pd(device->pd);
+ if (device->mr)
+ (void)ib_dereg_mr(device->mr);
+ ib_dealloc_pd(device->pd);
kfree(device->comps);
device->comps = NULL;
@@ -211,28 +200,40 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
/**
- * iser_create_fmr_pool - Creates FMR pool and page_vector
+ * iser_alloc_fmr_pool - Creates FMR pool and page_vector
*
* returns 0 on success, or errno code on failure
*/
-int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
+int iser_alloc_fmr_pool(struct ib_conn *ib_conn,
+ unsigned cmds_max,
+ unsigned int size)
{
struct iser_device *device = ib_conn->device;
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+ struct iser_page_vec *page_vec;
+ struct iser_fr_desc *desc;
+ struct ib_fmr_pool *fmr_pool;
struct ib_fmr_pool_param params;
- int ret = -ENOMEM;
+ int ret;
- ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
- (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
- GFP_KERNEL);
- if (!ib_conn->fmr.page_vec)
- return ret;
+ INIT_LIST_HEAD(&fr_pool->list);
+ spin_lock_init(&fr_pool->lock);
- ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ page_vec = kmalloc(sizeof(*page_vec) + (sizeof(u64) * size),
+ GFP_KERNEL);
+ if (!page_vec) {
+ ret = -ENOMEM;
+ goto err_frpl;
+ }
+
+ page_vec->pages = (u64 *)(page_vec + 1);
params.page_shift = SHIFT_4K;
- /* when the first/last SG element are not start/end *
- * page aligned, the map whould be of N+1 pages */
- params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
+ params.max_pages_per_fmr = size;
/* make the pool size twice the max number of SCSI commands *
* the ML is expected to queue, watermark for unmap at 50% */
params.pool_size = cmds_max * 2;
@@ -243,23 +244,25 @@ int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
- if (!IS_ERR(ib_conn->fmr.pool))
- return 0;
-
- /* no FMR => no need for page_vec */
- kfree(ib_conn->fmr.page_vec);
- ib_conn->fmr.page_vec = NULL;
-
- ret = PTR_ERR(ib_conn->fmr.pool);
- ib_conn->fmr.pool = NULL;
- if (ret != -ENOSYS) {
+ fmr_pool = ib_create_fmr_pool(device->pd, &params);
+ if (IS_ERR(fmr_pool)) {
+ ret = PTR_ERR(fmr_pool);
iser_err("FMR allocation failed, err %d\n", ret);
- return ret;
- } else {
- iser_warn("FMRs are not supported, using unaligned mode\n");
- return 0;
+ goto err_fmr;
}
+
+ desc->rsc.page_vec = page_vec;
+ desc->rsc.fmr_pool = fmr_pool;
+ list_add(&desc->list, &fr_pool->list);
+
+ return 0;
+
+err_fmr:
+ kfree(page_vec);
+err_frpl:
+ kfree(desc);
+
+ return ret;
}
/**
@@ -267,26 +270,68 @@ int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
*/
void iser_free_fmr_pool(struct ib_conn *ib_conn)
{
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+ struct iser_fr_desc *desc;
+
+ desc = list_first_entry(&fr_pool->list,
+ struct iser_fr_desc, list);
+ list_del(&desc->list);
+
iser_info("freeing conn %p fmr pool %p\n",
- ib_conn, ib_conn->fmr.pool);
+ ib_conn, desc->rsc.fmr_pool);
+
+ ib_destroy_fmr_pool(desc->rsc.fmr_pool);
+ kfree(desc->rsc.page_vec);
+ kfree(desc);
+}
+
+static int
+iser_alloc_reg_res(struct ib_device *ib_device,
+ struct ib_pd *pd,
+ struct iser_reg_resources *res,
+ unsigned int size)
+{
+ int ret;
+
+ res->frpl = ib_alloc_fast_reg_page_list(ib_device, size);
+ if (IS_ERR(res->frpl)) {
+ ret = PTR_ERR(res->frpl);
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
+ ret);
+ return PTR_ERR(res->frpl);
+ }
+
+ res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size);
+ if (IS_ERR(res->mr)) {
+ ret = PTR_ERR(res->mr);
+ iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+ goto fast_reg_mr_failure;
+ }
+ res->mr_valid = 1;
- if (ib_conn->fmr.pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr.pool);
+ return 0;
- ib_conn->fmr.pool = NULL;
+fast_reg_mr_failure:
+ ib_free_fast_reg_page_list(res->frpl);
- kfree(ib_conn->fmr.page_vec);
- ib_conn->fmr.page_vec = NULL;
+ return ret;
+}
+
+static void
+iser_free_reg_res(struct iser_reg_resources *rsc)
+{
+ ib_dereg_mr(rsc->mr);
+ ib_free_fast_reg_page_list(rsc->frpl);
}
static int
-iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
- struct fast_reg_descriptor *desc)
+iser_alloc_pi_ctx(struct ib_device *ib_device,
+ struct ib_pd *pd,
+ struct iser_fr_desc *desc,
+ unsigned int size)
{
struct iser_pi_context *pi_ctx = NULL;
- struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
- .flags = IB_MR_SIGNATURE_EN};
- int ret = 0;
+ int ret;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx)
@@ -294,36 +339,25 @@ iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
pi_ctx = desc->pi_ctx;
- pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(pi_ctx->prot_frpl)) {
- ret = PTR_ERR(pi_ctx->prot_frpl);
- goto prot_frpl_failure;
- }
-
- pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
- ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(pi_ctx->prot_mr)) {
- ret = PTR_ERR(pi_ctx->prot_mr);
- goto prot_mr_failure;
+ ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size);
+ if (ret) {
+ iser_err("failed to allocate reg_resources\n");
+ goto alloc_reg_res_err;
}
- desc->reg_indicators |= ISER_PROT_KEY_VALID;
- pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+ pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
- desc->reg_indicators |= ISER_SIG_KEY_VALID;
- desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+ pi_ctx->sig_mr_valid = 1;
+ desc->pi_ctx->sig_protected = 0;
return 0;
sig_mr_failure:
- ib_dereg_mr(desc->pi_ctx->prot_mr);
-prot_mr_failure:
- ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-prot_frpl_failure:
+ iser_free_reg_res(&pi_ctx->rsc);
+alloc_reg_res_err:
kfree(desc->pi_ctx);
return ret;
@@ -332,82 +366,71 @@ prot_frpl_failure:
static void
iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
{
- ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
- ib_dereg_mr(pi_ctx->prot_mr);
- ib_destroy_mr(pi_ctx->sig_mr);
+ iser_free_reg_res(&pi_ctx->rsc);
+ ib_dereg_mr(pi_ctx->sig_mr);
kfree(pi_ctx);
}
-static int
-iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
- bool pi_enable, struct fast_reg_descriptor *desc)
+static struct iser_fr_desc *
+iser_create_fastreg_desc(struct ib_device *ib_device,
+ struct ib_pd *pd,
+ bool pi_enable,
+ unsigned int size)
{
+ struct iser_fr_desc *desc;
int ret;
- desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
- ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(desc->data_frpl)) {
- ret = PTR_ERR(desc->data_frpl);
- iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
- ret);
- return PTR_ERR(desc->data_frpl);
- }
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return ERR_PTR(-ENOMEM);
- desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(desc->data_mr)) {
- ret = PTR_ERR(desc->data_mr);
- iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
- goto fast_reg_mr_failure;
- }
- desc->reg_indicators |= ISER_DATA_KEY_VALID;
+ ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size);
+ if (ret)
+ goto reg_res_alloc_failure;
if (pi_enable) {
- ret = iser_alloc_pi_ctx(ib_device, pd, desc);
+ ret = iser_alloc_pi_ctx(ib_device, pd, desc, size);
if (ret)
goto pi_ctx_alloc_failure;
}
- return 0;
+ return desc;
+
pi_ctx_alloc_failure:
- ib_dereg_mr(desc->data_mr);
-fast_reg_mr_failure:
- ib_free_fast_reg_page_list(desc->data_frpl);
+ iser_free_reg_res(&desc->rsc);
+reg_res_alloc_failure:
+ kfree(desc);
- return ret;
+ return ERR_PTR(ret);
}
/**
- * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
+ * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors
* for fast registration work requests.
* returns 0 on success, or errno code on failure
*/
-int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
+int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
+ unsigned cmds_max,
+ unsigned int size)
{
struct iser_device *device = ib_conn->device;
- struct fast_reg_descriptor *desc;
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+ struct iser_fr_desc *desc;
int i, ret;
- INIT_LIST_HEAD(&ib_conn->fastreg.pool);
- ib_conn->fastreg.pool_size = 0;
+ INIT_LIST_HEAD(&fr_pool->list);
+ spin_lock_init(&fr_pool->lock);
+ fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
- desc = kzalloc(sizeof(*desc), GFP_KERNEL);
- if (!desc) {
- iser_err("Failed to allocate a new fast_reg descriptor\n");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = iser_create_fastreg_desc(device->ib_device, device->pd,
- ib_conn->pi_support, desc);
- if (ret) {
- iser_err("Failed to create fastreg descriptor err=%d\n",
- ret);
- kfree(desc);
+ desc = iser_create_fastreg_desc(device->ib_device, device->pd,
+ ib_conn->pi_support, size);
+ if (IS_ERR(desc)) {
+ ret = PTR_ERR(desc);
goto err;
}
- list_add_tail(&desc->list, &ib_conn->fastreg.pool);
- ib_conn->fastreg.pool_size++;
+ list_add_tail(&desc->list, &fr_pool->list);
+ fr_pool->size++;
}
return 0;
@@ -422,27 +445,27 @@ err:
*/
void iser_free_fastreg_pool(struct ib_conn *ib_conn)
{
- struct fast_reg_descriptor *desc, *tmp;
+ struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
+ struct iser_fr_desc *desc, *tmp;
int i = 0;
- if (list_empty(&ib_conn->fastreg.pool))
+ if (list_empty(&fr_pool->list))
return;
iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
+ list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
list_del(&desc->list);
- ib_free_fast_reg_page_list(desc->data_frpl);
- ib_dereg_mr(desc->data_mr);
+ iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
kfree(desc);
++i;
}
- if (i < ib_conn->fastreg.pool_size)
+ if (i < fr_pool->size)
iser_warn("pool still has %d regions registered\n",
- ib_conn->fastreg.pool_size - i);
+ fr_pool->size - i);
}
/**
@@ -738,6 +761,31 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
iser_conn->state = ISER_CONN_TERMINATING;
}
+static void
+iser_calc_scsi_params(struct iser_conn *iser_conn,
+ unsigned int max_sectors)
+{
+ struct iser_device *device = iser_conn->ib_conn.device;
+ unsigned short sg_tablesize, sup_sg_tablesize;
+
+ sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
+ sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
+ device->dev_attr.max_fast_reg_page_list_len);
+
+ if (sg_tablesize > sup_sg_tablesize) {
+ sg_tablesize = sup_sg_tablesize;
+ iser_conn->scsi_max_sectors = sg_tablesize * SIZE_4K / 512;
+ } else {
+ iser_conn->scsi_max_sectors = max_sectors;
+ }
+
+ iser_conn->scsi_sg_tablesize = sg_tablesize;
+
+ iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
+ iser_conn, iser_conn->scsi_sg_tablesize,
+ iser_conn->scsi_max_sectors);
+}
+
/**
* Called with state mutex held
**/
@@ -776,6 +824,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
}
}
+ iser_calc_scsi_params(iser_conn, iser_max_sectors);
+
ret = rdma_resolve_route(cma_id, 1000);
if (ret) {
iser_err("resolve route failed: %d\n", ret);
@@ -938,7 +988,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
init_completion(&iser_conn->ib_completion);
init_completion(&iser_conn->up_completion);
INIT_LIST_HEAD(&iser_conn->conn_list);
- spin_lock_init(&iser_conn->ib_conn.lock);
mutex_init(&iser_conn->state_mutex);
}
@@ -1017,7 +1066,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
sge.addr = iser_conn->login_resp_dma;
sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = ib_conn->device->mr->lkey;
+ sge.lkey = ib_conn->device->pd->local_dma_lkey;
rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
rx_wr.sg_list = &sge;
@@ -1072,23 +1121,24 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
bool signal)
{
- int ib_ret;
- struct ib_send_wr send_wr, *send_wr_failed;
+ struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc);
+ int ib_ret;
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN,
DMA_TO_DEVICE);
- send_wr.next = NULL;
- send_wr.wr_id = (uintptr_t)tx_desc;
- send_wr.sg_list = tx_desc->tx_sg;
- send_wr.num_sge = tx_desc->num_sge;
- send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
+ wr->next = NULL;
+ wr->wr_id = (uintptr_t)tx_desc;
+ wr->sg_list = tx_desc->tx_sg;
+ wr->num_sge = tx_desc->num_sge;
+ wr->opcode = IB_WR_SEND;
+ wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
- ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
+ ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr);
if (ib_ret)
- iser_err("ib_post_send failed, ret:%d\n", ib_ret);
+ iser_err("ib_post_send failed, ret:%d opcode:%d\n",
+ ib_ret, bad_wr->opcode);
return ib_ret;
}
@@ -1240,13 +1290,13 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
- struct fast_reg_descriptor *desc = reg->mem_h;
+ struct iser_fr_desc *desc = reg->mem_h;
unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status;
int ret;
- if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
- desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+ if (desc && desc->pi_ctx->sig_protected) {
+ desc->pi_ctx->sig_protected = 0;
ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 771700963127..aa59037d7504 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -235,11 +235,9 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
rx_sg = &rx_desc->rx_sg;
rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->mr->lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
- isert_conn->rx_desc_head = 0;
-
return 0;
dma_map_fail:
@@ -385,22 +383,12 @@ isert_create_device_ib_res(struct isert_device *device)
goto out_cq;
}
- device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(device->mr)) {
- ret = PTR_ERR(device->mr);
- isert_err("failed to create dma mr, device %p, ret=%d\n",
- device, ret);
- goto out_mr;
- }
-
/* Check signature cap */
device->pi_capable = dev_attr->device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
return 0;
-out_mr:
- ib_dealloc_pd(device->pd);
out_cq:
isert_free_comps(device);
return ret;
@@ -411,7 +399,6 @@ isert_free_device_ib_res(struct isert_device *device)
{
isert_info("device %p\n", device);
- ib_dereg_mr(device->mr);
ib_dealloc_pd(device->pd);
isert_free_comps(device);
}
@@ -491,7 +478,7 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
if (fr_desc->pi_ctx) {
ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
- ib_destroy_mr(fr_desc->pi_ctx->sig_mr);
+ ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
kfree(fr_desc->pi_ctx);
}
kfree(fr_desc);
@@ -508,7 +495,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
struct ib_device *device,
struct ib_pd *pd)
{
- struct ib_mr_init_attr mr_init_attr;
struct pi_context *pi_ctx;
int ret;
@@ -527,7 +513,8 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
goto err_pi_ctx;
}
- pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+ pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_mr)) {
isert_err("Failed to allocate prot frmr err=%ld\n",
PTR_ERR(pi_ctx->prot_mr));
@@ -536,10 +523,7 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
}
desc->ind |= ISERT_PROT_KEY_VALID;
- memset(&mr_init_attr, 0, sizeof(mr_init_attr));
- mr_init_attr.max_reg_descriptors = 2;
- mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
- pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+ pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2);
if (IS_ERR(pi_ctx->sig_mr)) {
isert_err("Failed to allocate signature enabled mr err=%ld\n",
PTR_ERR(pi_ctx->sig_mr));
@@ -577,7 +561,8 @@ isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
return PTR_ERR(fr_desc->data_frpl);
}
- fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+ fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(fr_desc->data_mr)) {
isert_err("Failed to allocate data frmr err=%ld\n",
PTR_ERR(fr_desc->data_mr));
@@ -647,7 +632,7 @@ static void
isert_init_conn(struct isert_conn *isert_conn)
{
isert_conn->state = ISER_CONN_INIT;
- INIT_LIST_HEAD(&isert_conn->accept_node);
+ INIT_LIST_HEAD(&isert_conn->node);
init_completion(&isert_conn->login_comp);
init_completion(&isert_conn->login_req_comp);
init_completion(&isert_conn->wait);
@@ -780,12 +765,10 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
if (ret)
goto out_conn_dev;
- mutex_lock(&isert_np->np_accept_mutex);
- list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list);
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_lock(&isert_np->mutex);
+ list_add_tail(&isert_conn->node, &isert_np->accepted);
+ mutex_unlock(&isert_np->mutex);
- isert_info("np %p: Allow accept_np to continue\n", np);
- up(&isert_np->np_sem);
return 0;
out_conn_dev:
@@ -833,18 +816,21 @@ static void
isert_connected_handler(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ struct isert_np *isert_np = cma_id->context;
isert_info("conn %p\n", isert_conn);
- if (!kref_get_unless_zero(&isert_conn->kref)) {
- isert_warn("conn %p connect_release is running\n", isert_conn);
- return;
- }
-
mutex_lock(&isert_conn->mutex);
- if (isert_conn->state != ISER_CONN_FULL_FEATURE)
- isert_conn->state = ISER_CONN_UP;
+ isert_conn->state = ISER_CONN_UP;
+ kref_get(&isert_conn->kref);
mutex_unlock(&isert_conn->mutex);
+
+ mutex_lock(&isert_np->mutex);
+ list_move_tail(&isert_conn->node, &isert_np->pending);
+ mutex_unlock(&isert_np->mutex);
+
+ isert_info("np %p: Allow accept_np to continue\n", isert_np);
+ up(&isert_np->sem);
}
static void
@@ -910,14 +896,14 @@ isert_np_cma_handler(struct isert_np *isert_np,
switch (event) {
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- isert_np->np_cm_id = NULL;
+ isert_np->cm_id = NULL;
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
- isert_np->np_cm_id = isert_setup_id(isert_np);
- if (IS_ERR(isert_np->np_cm_id)) {
+ isert_np->cm_id = isert_setup_id(isert_np);
+ if (IS_ERR(isert_np->cm_id)) {
isert_err("isert np %p setup id failed: %ld\n",
- isert_np, PTR_ERR(isert_np->np_cm_id));
- isert_np->np_cm_id = NULL;
+ isert_np, PTR_ERR(isert_np->cm_id));
+ isert_np->cm_id = NULL;
}
break;
default:
@@ -936,7 +922,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
struct isert_conn *isert_conn;
bool terminating = false;
- if (isert_np->np_cm_id == cma_id)
+ if (isert_np->cm_id == cma_id)
return isert_np_cma_handler(cma_id->context, event);
isert_conn = cma_id->qp->qp_context;
@@ -952,13 +938,13 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
if (terminating)
goto out;
- mutex_lock(&isert_np->np_accept_mutex);
- if (!list_empty(&isert_conn->accept_node)) {
- list_del_init(&isert_conn->accept_node);
+ mutex_lock(&isert_np->mutex);
+ if (!list_empty(&isert_conn->node)) {
+ list_del_init(&isert_conn->node);
isert_put_conn(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
}
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_unlock(&isert_np->mutex);
out:
return 0;
@@ -969,6 +955,7 @@ isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ list_del_init(&isert_conn->node);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
@@ -1013,35 +1000,51 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
static int
-isert_post_recv(struct isert_conn *isert_conn, u32 count)
+isert_post_recvm(struct isert_conn *isert_conn, u32 count)
{
struct ib_recv_wr *rx_wr, *rx_wr_failed;
int i, ret;
- unsigned int rx_head = isert_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &isert_conn->rx_descs[rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
- rx_head = (rx_head + 1) & (ISERT_QP_MAX_RECV_DTOS - 1);
+ rx_desc = &isert_conn->rx_descs[i];
+ rx_wr->wr_id = (uintptr_t)rx_desc;
+ rx_wr->sg_list = &rx_desc->rx_sg;
+ rx_wr->num_sge = 1;
+ rx_wr->next = rx_wr + 1;
}
-
rx_wr--;
rx_wr->next = NULL; /* mark end of work requests list */
isert_conn->post_recv_buf_count += count;
ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
- &rx_wr_failed);
+ &rx_wr_failed);
if (ret) {
isert_err("ib_post_recv() failed with ret: %d\n", ret);
isert_conn->post_recv_buf_count -= count;
- } else {
- isert_dbg("Posted %d RX buffers\n", count);
- isert_conn->rx_desc_head = rx_head;
}
+
+ return ret;
+}
+
+static int
+isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
+{
+ struct ib_recv_wr *rx_wr_failed, rx_wr;
+ int ret;
+
+ rx_wr.wr_id = (uintptr_t)rx_desc;
+ rx_wr.sg_list = &rx_desc->rx_sg;
+ rx_wr.num_sge = 1;
+ rx_wr.next = NULL;
+
+ isert_conn->post_recv_buf_count++;
+ ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed);
+ if (ret) {
+ isert_err("ib_post_recv() failed with ret: %d\n", ret);
+ isert_conn->post_recv_buf_count--;
+ }
+
return ret;
}
@@ -1086,8 +1089,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
- if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
- tx_desc->tx_sg[0].lkey = device->mr->lkey;
+ if (tx_desc->tx_sg[0].lkey != device->pd->local_dma_lkey) {
+ tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey;
isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
}
}
@@ -1110,7 +1113,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
- tx_desc->tx_sg[0].lkey = device->mr->lkey;
+ tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey;
isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
@@ -1143,7 +1146,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
memset(&sge, 0, sizeof(struct ib_sge));
sge.addr = isert_conn->login_req_dma;
sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = isert_conn->device->mr->lkey;
+ sge.lkey = isert_conn->device->pd->local_dma_lkey;
isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
sge.addr, sge.length, sge.lkey);
@@ -1193,7 +1196,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
tx_dsg->addr = isert_conn->login_rsp_dma;
tx_dsg->length = length;
- tx_dsg->lkey = isert_conn->device->mr->lkey;
+ tx_dsg->lkey = isert_conn->device->pd->local_dma_lkey;
tx_desc->num_sge = 2;
}
if (!login->login_failed) {
@@ -1212,7 +1215,8 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
if (ret)
return ret;
- ret = isert_post_recv(isert_conn, ISERT_MIN_POSTED_RX);
+ ret = isert_post_recvm(isert_conn,
+ ISERT_QP_MAX_RECV_DTOS);
if (ret)
return ret;
@@ -1285,7 +1289,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
}
static struct iscsi_cmd
-*isert_allocate_cmd(struct iscsi_conn *conn)
+*isert_allocate_cmd(struct iscsi_conn *conn, struct iser_rx_desc *rx_desc)
{
struct isert_conn *isert_conn = conn->context;
struct isert_cmd *isert_cmd;
@@ -1299,6 +1303,7 @@ static struct iscsi_cmd
isert_cmd = iscsit_priv_cmd(cmd);
isert_cmd->conn = isert_conn;
isert_cmd->iscsi_cmd = cmd;
+ isert_cmd->rx_desc = rx_desc;
return cmd;
}
@@ -1310,9 +1315,9 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
{
struct iscsi_conn *conn = isert_conn->conn;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf;
- struct scatterlist *sg;
int imm_data, imm_data_len, unsol_data, sg_nents, rc;
bool dump_payload = false;
+ unsigned int data_len;
rc = iscsit_setup_scsi_cmd(conn, cmd, buf);
if (rc < 0)
@@ -1321,7 +1326,10 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
imm_data = cmd->immediate_data;
imm_data_len = cmd->first_burst_len;
unsol_data = cmd->unsolicited_data;
+ data_len = cmd->se_cmd.data_length;
+ if (imm_data && imm_data_len == data_len)
+ cmd->se_cmd.se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
rc = iscsit_process_scsi_cmd(conn, cmd, hdr);
if (rc < 0) {
return 0;
@@ -1333,13 +1341,20 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
if (!imm_data)
return 0;
- sg = &cmd->se_cmd.t_data_sg[0];
- sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
-
- isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
- sg, sg_nents, &rx_desc->data[0], imm_data_len);
-
- sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len);
+ if (imm_data_len != data_len) {
+ sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
+ sg_copy_from_buffer(cmd->se_cmd.t_data_sg, sg_nents,
+ &rx_desc->data[0], imm_data_len);
+ isert_dbg("Copy Immediate sg_nents: %u imm_data_len: %d\n",
+ sg_nents, imm_data_len);
+ } else {
+ sg_init_table(&isert_cmd->sg, 1);
+ cmd->se_cmd.t_data_sg = &isert_cmd->sg;
+ cmd->se_cmd.t_data_nents = 1;
+ sg_set_buf(&isert_cmd->sg, &rx_desc->data[0], imm_data_len);
+ isert_dbg("Transfer Immediate imm_data_len: %d\n",
+ imm_data_len);
+ }
cmd->write_data_done += imm_data_len;
@@ -1414,6 +1429,15 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
if (rc < 0)
return rc;
+ /*
+ * multiple data-outs on the same command can arrive -
+ * so post the buffer before hand
+ */
+ rc = isert_post_recv(isert_conn, rx_desc);
+ if (rc) {
+ isert_err("ib_post_recv failed with %d\n", rc);
+ return rc;
+ }
return 0;
}
@@ -1486,7 +1510,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
switch (opcode) {
case ISCSI_OP_SCSI_CMD:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1500,7 +1524,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
rx_desc, (unsigned char *)hdr);
break;
case ISCSI_OP_NOOP_OUT:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1513,7 +1537,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_SCSI_TMFUNC:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1521,22 +1545,20 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_LOGOUT:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr);
break;
case ISCSI_OP_TEXT:
- if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF) {
+ if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF)
cmd = iscsit_find_cmd_from_itt(conn, hdr->itt);
- if (!cmd)
- break;
- } else {
- cmd = isert_allocate_cmd(conn);
- if (!cmd)
- break;
- }
+ else
+ cmd = isert_allocate_cmd(conn, rx_desc);
+
+ if (!cmd)
+ break;
isert_cmd = iscsit_priv_cmd(cmd);
ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd,
@@ -1596,7 +1618,7 @@ isert_rcv_completion(struct iser_rx_desc *desc,
struct ib_device *ib_dev = isert_conn->cm_id->device;
struct iscsi_hdr *hdr;
u64 rx_dma;
- int rx_buflen, outstanding;
+ int rx_buflen;
if ((char *)desc == isert_conn->login_req_buf) {
rx_dma = isert_conn->login_req_dma;
@@ -1636,22 +1658,6 @@ isert_rcv_completion(struct iser_rx_desc *desc,
DMA_FROM_DEVICE);
isert_conn->post_recv_buf_count--;
- isert_dbg("Decremented post_recv_buf_count: %d\n",
- isert_conn->post_recv_buf_count);
-
- if ((char *)desc == isert_conn->login_req_buf)
- return;
-
- outstanding = isert_conn->post_recv_buf_count;
- if (outstanding + ISERT_MIN_POSTED_RX <= ISERT_QP_MAX_RECV_DTOS) {
- int err, count = min(ISERT_QP_MAX_RECV_DTOS - outstanding,
- ISERT_MIN_POSTED_RX);
- err = isert_post_recv(isert_conn, count);
- if (err) {
- isert_err("isert_post_recv() count: %d failed, %d\n",
- count, err);
- }
- }
}
static int
@@ -2163,6 +2169,12 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
struct ib_send_wr *wr_failed;
int ret;
+ ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
+ if (ret) {
+ isert_err("ib_post_recv failed with %d\n", ret);
+ return ret;
+ }
+
ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
&wr_failed);
if (ret) {
@@ -2210,7 +2222,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->pdu_buf_len = pdu_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = pdu_len;
- tx_dsg->lkey = device->mr->lkey;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
isert_cmd->tx_desc.num_sge = 2;
}
@@ -2338,7 +2350,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = ISCSI_HDR_LEN;
- tx_dsg->lkey = device->mr->lkey;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
isert_cmd->tx_desc.num_sge = 2;
isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2379,7 +2391,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_len = txt_rsp_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = txt_rsp_len;
- tx_dsg->lkey = device->mr->lkey;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
isert_cmd->tx_desc.num_sge = 2;
}
isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2420,7 +2432,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
ib_sge->length = min_t(u32, data_left,
ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
- ib_sge->lkey = device->mr->lkey;
+ ib_sge->lkey = device->pd->local_dma_lkey;
isert_dbg("RDMA ib_sge: addr: 0x%llx length: %u lkey: %x\n",
ib_sge->addr, ib_sge->length, ib_sge->lkey);
@@ -2594,7 +2606,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
u32 page_off;
if (mem->dma_nents == 1) {
- sge->lkey = device->mr->lkey;
+ sge->lkey = device->pd->local_dma_lkey;
sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
@@ -2957,6 +2969,12 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
&isert_cmd->tx_desc.send_wr);
isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
wr->send_wr_num += 1;
+
+ rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
+ if (rc) {
+ isert_err("ib_post_recv failed with %d\n", rc);
+ return rc;
+ }
}
rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
@@ -3006,9 +3024,16 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
static int
isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
{
- int ret;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ int ret = 0;
switch (state) {
+ case ISTATE_REMOVE:
+ spin_lock_bh(&conn->cmd_lock);
+ list_del_init(&cmd->i_conn_node);
+ spin_unlock_bh(&conn->cmd_lock);
+ isert_put_cmd(isert_cmd, true);
+ break;
case ISTATE_SEND_NOPIN_WANT_RESPONSE:
ret = isert_put_nopin(cmd, conn, false);
break;
@@ -3102,7 +3127,7 @@ out:
static int
isert_setup_np(struct iscsi_np *np,
- struct __kernel_sockaddr_storage *ksockaddr)
+ struct sockaddr_storage *ksockaddr)
{
struct isert_np *isert_np;
struct rdma_cm_id *isert_lid;
@@ -3113,10 +3138,10 @@ isert_setup_np(struct iscsi_np *np,
isert_err("Unable to allocate struct isert_np\n");
return -ENOMEM;
}
- sema_init(&isert_np->np_sem, 0);
- mutex_init(&isert_np->np_accept_mutex);
- INIT_LIST_HEAD(&isert_np->np_accept_list);
- init_completion(&isert_np->np_login_comp);
+ sema_init(&isert_np->sem, 0);
+ mutex_init(&isert_np->mutex);
+ INIT_LIST_HEAD(&isert_np->accepted);
+ INIT_LIST_HEAD(&isert_np->pending);
isert_np->np = np;
/*
@@ -3124,7 +3149,7 @@ isert_setup_np(struct iscsi_np *np,
* in iscsi_target_configfs.c code..
*/
memcpy(&np->np_sockaddr, ksockaddr,
- sizeof(struct __kernel_sockaddr_storage));
+ sizeof(struct sockaddr_storage));
isert_lid = isert_setup_id(isert_np);
if (IS_ERR(isert_lid)) {
@@ -3132,7 +3157,7 @@ isert_setup_np(struct iscsi_np *np,
goto out;
}
- isert_np->np_cm_id = isert_lid;
+ isert_np->cm_id = isert_lid;
np->np_context = isert_np;
return 0;
@@ -3206,32 +3231,11 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
{
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_route *cm_route = &cm_id->route;
- struct sockaddr_in *sock_in;
- struct sockaddr_in6 *sock_in6;
conn->login_family = np->np_sockaddr.ss_family;
- if (np->np_sockaddr.ss_family == AF_INET6) {
- sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.dst_addr;
- snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
- &sock_in6->sin6_addr.in6_u);
- conn->login_port = ntohs(sock_in6->sin6_port);
-
- sock_in6 = (struct sockaddr_in6 *)&cm_route->addr.src_addr;
- snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c",
- &sock_in6->sin6_addr.in6_u);
- conn->local_port = ntohs(sock_in6->sin6_port);
- } else {
- sock_in = (struct sockaddr_in *)&cm_route->addr.dst_addr;
- sprintf(conn->login_ip, "%pI4",
- &sock_in->sin_addr.s_addr);
- conn->login_port = ntohs(sock_in->sin_port);
-
- sock_in = (struct sockaddr_in *)&cm_route->addr.src_addr;
- sprintf(conn->local_ip, "%pI4",
- &sock_in->sin_addr.s_addr);
- conn->local_port = ntohs(sock_in->sin_port);
- }
+ conn->login_sockaddr = cm_route->addr.dst_addr;
+ conn->local_sockaddr = cm_route->addr.src_addr;
}
static int
@@ -3242,7 +3246,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
int ret;
accept_wait:
- ret = down_interruptible(&isert_np->np_sem);
+ ret = down_interruptible(&isert_np->sem);
if (ret)
return -ENODEV;
@@ -3259,15 +3263,15 @@ accept_wait:
}
spin_unlock_bh(&np->np_thread_lock);
- mutex_lock(&isert_np->np_accept_mutex);
- if (list_empty(&isert_np->np_accept_list)) {
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_lock(&isert_np->mutex);
+ if (list_empty(&isert_np->pending)) {
+ mutex_unlock(&isert_np->mutex);
goto accept_wait;
}
- isert_conn = list_first_entry(&isert_np->np_accept_list,
- struct isert_conn, accept_node);
- list_del_init(&isert_conn->accept_node);
- mutex_unlock(&isert_np->np_accept_mutex);
+ isert_conn = list_first_entry(&isert_np->pending,
+ struct isert_conn, node);
+ list_del_init(&isert_conn->node);
+ mutex_unlock(&isert_np->mutex);
conn->context = isert_conn;
isert_conn->conn = conn;
@@ -3285,28 +3289,39 @@ isert_free_np(struct iscsi_np *np)
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
- if (isert_np->np_cm_id)
- rdma_destroy_id(isert_np->np_cm_id);
+ if (isert_np->cm_id)
+ rdma_destroy_id(isert_np->cm_id);
/*
* FIXME: At this point we don't have a good way to insure
* that at this point we don't have hanging connections that
* completed RDMA establishment but didn't start iscsi login
* process. So work-around this by cleaning up what ever piled
- * up in np_accept_list.
+ * up in accepted and pending lists.
*/
- mutex_lock(&isert_np->np_accept_mutex);
- if (!list_empty(&isert_np->np_accept_list)) {
- isert_info("Still have isert connections, cleaning up...\n");
+ mutex_lock(&isert_np->mutex);
+ if (!list_empty(&isert_np->pending)) {
+ isert_info("Still have isert pending connections\n");
list_for_each_entry_safe(isert_conn, n,
- &isert_np->np_accept_list,
- accept_node) {
+ &isert_np->pending,
+ node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
isert_connect_release(isert_conn);
}
}
- mutex_unlock(&isert_np->np_accept_mutex);
+
+ if (!list_empty(&isert_np->accepted)) {
+ isert_info("Still have isert accepted connections\n");
+ list_for_each_entry_safe(isert_conn, n,
+ &isert_np->accepted,
+ node) {
+ isert_info("cleaning isert_conn %p state (%d)\n",
+ isert_conn, isert_conn->state);
+ isert_connect_release(isert_conn);
+ }
+ }
+ mutex_unlock(&isert_np->mutex);
np->np_context = NULL;
kfree(isert_np);
@@ -3373,6 +3388,41 @@ isert_wait4flush(struct isert_conn *isert_conn)
wait_for_completion(&isert_conn->wait_comp_err);
}
+/**
+ * isert_put_unsol_pending_cmds() - Drop commands waiting for
+ * unsolicitate dataout
+ * @conn: iscsi connection
+ *
+ * We might still have commands that are waiting for unsolicited
+ * dataouts messages. We must put the extra reference on those
+ * before blocking on the target_wait_for_session_cmds
+ */
+static void
+isert_put_unsol_pending_cmds(struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd, *tmp;
+ static LIST_HEAD(drop_cmd_list);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry_safe(cmd, tmp, &conn->conn_cmd_list, i_conn_node) {
+ if ((cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA) &&
+ (cmd->write_data_done < conn->sess->sess_ops->FirstBurstLength) &&
+ (cmd->write_data_done < cmd->se_cmd.data_length))
+ list_move_tail(&cmd->i_conn_node, &drop_cmd_list);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ list_for_each_entry_safe(cmd, tmp, &drop_cmd_list, i_conn_node) {
+ list_del_init(&cmd->i_conn_node);
+ if (cmd->i_state != ISTATE_REMOVE) {
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+
+ isert_info("conn %p dropping cmd %p\n", conn, cmd);
+ isert_put_cmd(isert_cmd, true);
+ }
+ }
+}
+
static void isert_wait_conn(struct iscsi_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
@@ -3391,8 +3441,9 @@ static void isert_wait_conn(struct iscsi_conn *conn)
isert_conn_terminate(isert_conn);
mutex_unlock(&isert_conn->mutex);
- isert_wait4cmds(conn);
isert_wait4flush(isert_conn);
+ isert_put_unsol_pending_cmds(conn);
+ isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 9ec23a786c02..c5b99bcecbcf 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -113,7 +113,6 @@ enum {
};
struct isert_rdma_wr {
- struct list_head wr_list;
struct isert_cmd *isert_cmd;
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
@@ -134,14 +133,13 @@ struct isert_cmd {
uint64_t write_va;
u64 pdu_buf_dma;
u32 pdu_buf_len;
- u32 read_va_off;
- u32 write_va_off;
- u32 rdma_wr_num;
struct isert_conn *conn;
struct iscsi_cmd *iscsi_cmd;
struct iser_tx_desc tx_desc;
+ struct iser_rx_desc *rx_desc;
struct isert_rdma_wr rdma_wr;
struct work_struct comp_work;
+ struct scatterlist sg;
};
struct isert_device;
@@ -159,11 +157,10 @@ struct isert_conn {
u64 login_req_dma;
int login_req_len;
u64 login_rsp_dma;
- unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
- struct ib_recv_wr rx_wr[ISERT_MIN_POSTED_RX];
+ struct ib_recv_wr rx_wr[ISERT_QP_MAX_RECV_DTOS];
struct iscsi_conn *conn;
- struct list_head accept_node;
+ struct list_head node;
struct completion login_comp;
struct completion login_req_comp;
struct iser_tx_desc login_tx_desc;
@@ -209,7 +206,6 @@ struct isert_device {
int refcount;
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_mr *mr;
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
@@ -223,9 +219,9 @@ struct isert_device {
struct isert_np {
struct iscsi_np *np;
- struct semaphore np_sem;
- struct rdma_cm_id *np_cm_id;
- struct mutex np_accept_mutex;
- struct list_head np_accept_list;
- struct completion np_login_comp;
+ struct semaphore sem;
+ struct rdma_cm_id *cm_id;
+ struct mutex mutex;
+ struct list_head accepted;
+ struct list_head pending;
};
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 267dc4f75502..b481490ad257 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -55,8 +55,8 @@
#define DRV_NAME "ib_srp"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "1.0"
-#define DRV_RELDATE "July 1, 2013"
+#define DRV_VERSION "2.0"
+#define DRV_RELDATE "July 26, 2015"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
@@ -68,8 +68,8 @@ static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
-static bool prefer_fr;
-static bool register_always;
+static bool prefer_fr = true;
+static bool register_always = true;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -131,7 +131,7 @@ MODULE_PARM_DESC(ch_count,
"Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
static void srp_add_one(struct ib_device *device);
-static void srp_remove_one(struct ib_device *device);
+static void srp_remove_one(struct ib_device *device, void *client_data);
static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
@@ -161,13 +161,10 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp)
{
int tmo, res;
- if (strncmp(val, "off", 3) != 0) {
- res = kstrtoint(val, 0, &tmo);
- if (res)
- goto out;
- } else {
- tmo = -1;
- }
+ res = srp_parse_tmo(&tmo, val);
+ if (res)
+ goto out;
+
if (kp->arg == &srp_reconnect_delay)
res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
srp_dev_loss_tmo);
@@ -381,7 +378,8 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
INIT_LIST_HEAD(&pool->free_list);
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto destroy_pool;
@@ -548,7 +546,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (ret)
goto err_qp;
- if (dev->use_fast_reg && dev->has_fr) {
+ if (dev->use_fast_reg) {
fr_pool = srp_alloc_fr_pool(target);
if (IS_ERR(fr_pool)) {
ret = PTR_ERR(fr_pool);
@@ -556,10 +554,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
"FR pool allocation failed (%d)\n", ret);
goto err_qp;
}
- if (ch->fr_pool)
- srp_destroy_fr_pool(ch->fr_pool);
- ch->fr_pool = fr_pool;
- } else if (!dev->use_fast_reg && dev->has_fmr) {
+ } else if (dev->use_fmr) {
fmr_pool = srp_alloc_fmr_pool(target);
if (IS_ERR(fmr_pool)) {
ret = PTR_ERR(fmr_pool);
@@ -567,9 +562,6 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
"FMR pool allocation failed (%d)\n", ret);
goto err_qp;
}
- if (ch->fmr_pool)
- ib_destroy_fmr_pool(ch->fmr_pool);
- ch->fmr_pool = fmr_pool;
}
if (ch->qp)
@@ -583,6 +575,16 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
ch->recv_cq = recv_cq;
ch->send_cq = send_cq;
+ if (dev->use_fast_reg) {
+ if (ch->fr_pool)
+ srp_destroy_fr_pool(ch->fr_pool);
+ ch->fr_pool = fr_pool;
+ } else if (dev->use_fmr) {
+ if (ch->fmr_pool)
+ ib_destroy_fmr_pool(ch->fmr_pool);
+ ch->fmr_pool = fmr_pool;
+ }
+
kfree(init_attr);
return 0;
@@ -625,7 +627,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
if (dev->use_fast_reg) {
if (ch->fr_pool)
srp_destroy_fr_pool(ch->fr_pool);
- } else {
+ } else if (dev->use_fmr) {
if (ch->fmr_pool)
ib_destroy_fmr_pool(ch->fmr_pool);
}
@@ -1087,7 +1089,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
if (req->nmdesc)
srp_fr_pool_put(ch->fr_pool, req->fr_list,
req->nmdesc);
- } else {
+ } else if (dev->use_fmr) {
struct ib_pool_fmr **pfmr;
for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
@@ -1262,6 +1264,8 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
{
struct srp_direct_buf *desc = state->desc;
+ WARN_ON_ONCE(!dma_len);
+
desc->va = cpu_to_be64(dma_addr);
desc->key = cpu_to_be32(rkey);
desc->len = cpu_to_be32(dma_len);
@@ -1274,18 +1278,24 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
static int srp_map_finish_fmr(struct srp_map_state *state,
struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
+ struct srp_device *dev = target->srp_host->srp_dev;
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
+ if (state->fmr.next >= state->fmr.end)
+ return -ENOMEM;
+
fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
- *state->next_fmr++ = fmr;
+ *state->fmr.next++ = fmr;
state->nmdesc++;
- srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
+ srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
+ state->dma_len, fmr->fmr->rkey);
return 0;
}
@@ -1300,6 +1310,9 @@ static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_fr_desc *desc;
u32 rkey;
+ if (state->fr.next >= state->fr.end)
+ return -ENOMEM;
+
desc = srp_fr_pool_get(ch->fr_pool);
if (!desc)
return -ENOMEM;
@@ -1323,7 +1336,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
IB_ACCESS_REMOTE_WRITE);
wr.wr.fast_reg.rkey = desc->mr->lkey;
- *state->next_fr++ = desc;
+ *state->fr.next++ = desc;
state->nmdesc++;
srp_map_desc(state, state->base_dma_addr, state->dma_len,
@@ -1336,17 +1349,19 @@ static int srp_finish_mapping(struct srp_map_state *state,
struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
+ struct srp_device *dev = target->srp_host->srp_dev;
int ret = 0;
+ WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr);
+
if (state->npages == 0)
return 0;
- if (state->npages == 1 && !register_always)
+ if (state->npages == 1 && target->global_mr)
srp_map_desc(state, state->base_dma_addr, state->dma_len,
- target->rkey);
+ target->global_mr->rkey);
else
- ret = target->srp_host->srp_dev->use_fast_reg ?
- srp_map_finish_fr(state, ch) :
+ ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) :
srp_map_finish_fmr(state, ch);
if (ret == 0) {
@@ -1357,66 +1372,19 @@ static int srp_finish_mapping(struct srp_map_state *state,
return ret;
}
-static void srp_map_update_start(struct srp_map_state *state,
- struct scatterlist *sg, int sg_index,
- dma_addr_t dma_addr)
-{
- state->unmapped_sg = sg;
- state->unmapped_index = sg_index;
- state->unmapped_addr = dma_addr;
-}
-
static int srp_map_sg_entry(struct srp_map_state *state,
struct srp_rdma_ch *ch,
- struct scatterlist *sg, int sg_index,
- bool use_mr)
+ struct scatterlist *sg, int sg_index)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
- unsigned int len;
+ unsigned int len = 0;
int ret;
- if (!dma_len)
- return 0;
-
- if (!use_mr) {
- /*
- * Once we're in direct map mode for a request, we don't
- * go back to FMR or FR mode, so no need to update anything
- * other than the descriptor.
- */
- srp_map_desc(state, dma_addr, dma_len, target->rkey);
- return 0;
- }
-
- /*
- * Since not all RDMA HW drivers support non-zero page offsets for
- * FMR, if we start at an offset into a page, don't merge into the
- * current FMR mapping. Finish it out, and use the kernel's MR for
- * this sg entry.
- */
- if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
- dma_len > dev->mr_max_size) {
- ret = srp_finish_mapping(state, ch);
- if (ret)
- return ret;
-
- srp_map_desc(state, dma_addr, dma_len, target->rkey);
- srp_map_update_start(state, NULL, 0, 0);
- return 0;
- }
-
- /*
- * If this is the first sg that will be mapped via FMR or via FR, save
- * our position. We need to know the first unmapped entry, its index,
- * and the first unmapped address within that entry to be able to
- * restart mapping after an error.
- */
- if (!state->unmapped_sg)
- srp_map_update_start(state, sg, sg_index, dma_addr);
+ WARN_ON_ONCE(!dma_len);
while (dma_len) {
unsigned offset = dma_addr & ~dev->mr_page_mask;
@@ -1424,8 +1392,6 @@ static int srp_map_sg_entry(struct srp_map_state *state,
ret = srp_finish_mapping(state, ch);
if (ret)
return ret;
-
- srp_map_update_start(state, sg, sg_index, dma_addr);
}
len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
@@ -1444,11 +1410,8 @@ static int srp_map_sg_entry(struct srp_map_state *state,
* boundries.
*/
ret = 0;
- if (len != dev->mr_page_size) {
+ if (len != dev->mr_page_size)
ret = srp_finish_mapping(state, ch);
- if (!ret)
- srp_map_update_start(state, NULL, 0, 0);
- }
return ret;
}
@@ -1458,50 +1421,80 @@ static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = dev->dev;
struct scatterlist *sg;
- int i;
- bool use_mr;
+ int i, ret;
state->desc = req->indirect_desc;
state->pages = req->map_page;
if (dev->use_fast_reg) {
- state->next_fr = req->fr_list;
- use_mr = !!ch->fr_pool;
- } else {
- state->next_fmr = req->fmr_list;
- use_mr = !!ch->fmr_pool;
+ state->fr.next = req->fr_list;
+ state->fr.end = req->fr_list + target->cmd_sg_cnt;
+ } else if (dev->use_fmr) {
+ state->fmr.next = req->fmr_list;
+ state->fmr.end = req->fmr_list + target->cmd_sg_cnt;
}
- for_each_sg(scat, sg, count, i) {
- if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
- /*
- * Memory registration failed, so backtrack to the
- * first unmapped entry and continue on without using
- * memory registration.
- */
- dma_addr_t dma_addr;
- unsigned int dma_len;
-
-backtrack:
- sg = state->unmapped_sg;
- i = state->unmapped_index;
-
- dma_addr = ib_sg_dma_address(ibdev, sg);
- dma_len = ib_sg_dma_len(ibdev, sg);
- dma_len -= (state->unmapped_addr - dma_addr);
- dma_addr = state->unmapped_addr;
- use_mr = false;
- srp_map_desc(state, dma_addr, dma_len, target->rkey);
+ if (dev->use_fast_reg || dev->use_fmr) {
+ for_each_sg(scat, sg, count, i) {
+ ret = srp_map_sg_entry(state, ch, sg, i);
+ if (ret)
+ goto out;
+ }
+ ret = srp_finish_mapping(state, ch);
+ if (ret)
+ goto out;
+ } else {
+ for_each_sg(scat, sg, count, i) {
+ srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
+ ib_sg_dma_len(dev->dev, sg),
+ target->global_mr->rkey);
}
}
- if (use_mr && srp_finish_mapping(state, ch))
- goto backtrack;
-
req->nmdesc = state->nmdesc;
+ ret = 0;
- return 0;
+out:
+ return ret;
+}
+
+/*
+ * Register the indirect data buffer descriptor with the HCA.
+ *
+ * Note: since the indirect data buffer descriptor has been allocated with
+ * kmalloc() it is guaranteed that this buffer is a physically contiguous
+ * memory buffer.
+ */
+static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
+ void **next_mr, void **end_mr, u32 idb_len,
+ __be32 *idb_rkey)
+{
+ struct srp_target_port *target = ch->target;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct srp_map_state state;
+ struct srp_direct_buf idb_desc;
+ u64 idb_pages[1];
+ int ret;
+
+ memset(&state, 0, sizeof(state));
+ memset(&idb_desc, 0, sizeof(idb_desc));
+ state.gen.next = next_mr;
+ state.gen.end = end_mr;
+ state.desc = &idb_desc;
+ state.pages = idb_pages;
+ state.pages[0] = (req->indirect_dma_addr &
+ dev->mr_page_mask);
+ state.npages = 1;
+ state.base_dma_addr = req->indirect_dma_addr;
+ state.dma_len = idb_len;
+ ret = srp_finish_mapping(&state, ch);
+ if (ret < 0)
+ goto out;
+
+ *idb_rkey = idb_desc.key;
+
+out:
+ return ret;
}
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
@@ -1510,12 +1503,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_target_port *target = ch->target;
struct scatterlist *scat;
struct srp_cmd *cmd = req->cmd->buf;
- int len, nents, count;
+ int len, nents, count, ret;
struct srp_device *dev;
struct ib_device *ibdev;
struct srp_map_state state;
struct srp_indirect_buf *indirect_hdr;
- u32 table_len;
+ u32 idb_len, table_len;
+ __be32 idb_rkey;
u8 fmt;
if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
@@ -1542,7 +1536,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
fmt = SRP_DATA_DESC_DIRECT;
len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
- if (count == 1 && !register_always) {
+ if (count == 1 && target->global_mr) {
/*
* The midlayer only generated a single gather/scatter
* entry, or DMA mapping coalesced everything to a
@@ -1552,7 +1546,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_direct_buf *buf = (void *) cmd->add_data;
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
- buf->key = cpu_to_be32(target->rkey);
+ buf->key = cpu_to_be32(target->global_mr->rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
req->nmdesc = 0;
@@ -1597,6 +1591,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
count = min(state.ndesc, target->cmd_sg_cnt);
table_len = state.ndesc * sizeof (struct srp_direct_buf);
+ idb_len = sizeof(struct srp_indirect_buf) + table_len;
fmt = SRP_DATA_DESC_INDIRECT;
len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
@@ -1605,8 +1600,18 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
memcpy(indirect_hdr->desc_list, req->indirect_desc,
count * sizeof (struct srp_direct_buf));
+ if (!target->global_mr) {
+ ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
+ idb_len, &idb_rkey);
+ if (ret < 0)
+ return ret;
+ req->nmdesc++;
+ } else {
+ idb_rkey = target->global_mr->rkey;
+ }
+
indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
- indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
+ indirect_hdr->table_desc.key = idb_rkey;
indirect_hdr->table_desc.len = cpu_to_be32(table_len);
indirect_hdr->len = cpu_to_be32(state.total_len);
@@ -2174,7 +2179,7 @@ static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
}
static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
- struct srp_login_rsp *lrsp,
+ const struct srp_login_rsp *lrsp,
struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
@@ -2760,6 +2765,13 @@ static int srp_sdev_count(struct Scsi_Host *host)
return c;
}
+/*
+ * Return values:
+ * < 0 upon failure. Caller is responsible for SRP target port cleanup.
+ * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
+ * removal has been scheduled.
+ * 0 and target->state != SRP_TARGET_REMOVED upon success.
+ */
static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
{
struct srp_rport_identifiers ids;
@@ -3149,8 +3161,8 @@ static ssize_t srp_create_target(struct device *dev,
target->io_class = SRP_REV16A_IB_IO_CLASS;
target->scsi_host = target_host;
target->srp_host = host;
- target->lkey = host->srp_dev->mr->lkey;
- target->rkey = host->srp_dev->mr->rkey;
+ target->lkey = host->srp_dev->pd->local_dma_lkey;
+ target->global_mr = host->srp_dev->global_mr;
target->cmd_sg_cnt = cmd_sg_entries;
target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
target->allow_ext_sg = allow_ext_sg;
@@ -3265,7 +3277,7 @@ static ssize_t srp_create_target(struct device *dev,
srp_free_ch_ib(target, ch);
srp_free_req_data(target, ch);
target->ch_count = ch - target->ch;
- break;
+ goto connected;
}
}
@@ -3275,6 +3287,7 @@ static ssize_t srp_create_target(struct device *dev,
node_idx++;
}
+connected:
target->scsi_host->nr_hw_queues = target->ch_count;
ret = srp_add_target(host, target);
@@ -3297,6 +3310,8 @@ out:
mutex_unlock(&host->add_target_mutex);
scsi_host_put(target->scsi_host);
+ if (ret < 0)
+ scsi_host_put(target->scsi_host);
return ret;
@@ -3379,7 +3394,7 @@ static void srp_add_one(struct ib_device *device)
struct srp_device *srp_dev;
struct ib_device_attr *dev_attr;
struct srp_host *host;
- int mr_page_shift, s, e, p;
+ int mr_page_shift, p;
u64 max_pages_per_mr;
dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
@@ -3404,6 +3419,7 @@ static void srp_add_one(struct ib_device *device)
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
+ srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
/*
* Use the smallest page size supported by the HCA, down to a
@@ -3436,22 +3452,18 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->pd))
goto free_dev;
- srp_dev->mr = ib_get_dma_mr(srp_dev->pd,
- IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE);
- if (IS_ERR(srp_dev->mr))
- goto err_pd;
-
- if (device->node_type == RDMA_NODE_IB_SWITCH) {
- s = 0;
- e = 0;
+ if (!register_always || (!srp_dev->has_fmr && !srp_dev->has_fr)) {
+ srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ if (IS_ERR(srp_dev->global_mr))
+ goto err_pd;
} else {
- s = 1;
- e = device->phys_port_cnt;
+ srp_dev->global_mr = NULL;
}
- for (p = s; p <= e; ++p) {
+ for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
host = srp_add_port(srp_dev, p);
if (host)
list_add_tail(&host->list, &srp_dev->dev_list);
@@ -3471,13 +3483,13 @@ free_attr:
kfree(dev_attr);
}
-static void srp_remove_one(struct ib_device *device)
+static void srp_remove_one(struct ib_device *device, void *client_data)
{
struct srp_device *srp_dev;
struct srp_host *host, *tmp_host;
struct srp_target_port *target;
- srp_dev = ib_get_client_data(device, &srp_client);
+ srp_dev = client_data;
if (!srp_dev)
return;
@@ -3506,7 +3518,8 @@ static void srp_remove_one(struct ib_device *device)
kfree(host);
}
- ib_dereg_mr(srp_dev->mr);
+ if (srp_dev->global_mr)
+ ib_dereg_mr(srp_dev->global_mr);
ib_dealloc_pd(srp_dev->pd);
kfree(srp_dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 17ee3f80ba55..3608f2e4819c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -95,13 +95,14 @@ struct srp_device {
struct list_head dev_list;
struct ib_device *dev;
struct ib_pd *pd;
- struct ib_mr *mr;
+ struct ib_mr *global_mr;
u64 mr_page_mask;
int mr_page_size;
int mr_max_size;
int max_pages_per_mr;
bool has_fmr;
bool has_fr;
+ bool use_fmr;
bool use_fast_reg;
};
@@ -182,10 +183,10 @@ struct srp_target_port {
spinlock_t lock;
/* read only in the hot path */
+ struct ib_mr *global_mr;
struct srp_rdma_ch *ch;
u32 ch_count;
u32 lkey;
- u32 rkey;
enum srp_target_state state;
unsigned int max_iu_len;
unsigned int cmd_sg_cnt;
@@ -276,14 +277,21 @@ struct srp_fr_pool {
* @npages: Number of page addresses in the pages[] array.
* @nmdesc: Number of FMR or FR memory descriptors used for mapping.
* @ndesc: Number of SRP buffer descriptors that have been filled in.
- * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR.
- * @unmapped_index: Index of the first element mapped via FMR or FR.
- * @unmapped_addr: DMA address of the first element mapped via FMR or FR.
*/
struct srp_map_state {
union {
- struct ib_pool_fmr **next_fmr;
- struct srp_fr_desc **next_fr;
+ struct {
+ struct ib_pool_fmr **next;
+ struct ib_pool_fmr **end;
+ } fmr;
+ struct {
+ struct srp_fr_desc **next;
+ struct srp_fr_desc **end;
+ } fr;
+ struct {
+ void **next;
+ void **end;
+ } gen;
};
struct srp_direct_buf *desc;
u64 *pages;
@@ -293,9 +301,6 @@ struct srp_map_state {
unsigned int npages;
unsigned int nmdesc;
unsigned int ndesc;
- struct scatterlist *unmapped_sg;
- int unmapped_index;
- dma_addr_t unmapped_addr;
};
#endif /* IB_SRP_H */
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 82897ca17f32..f6fe0414139b 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -302,7 +302,7 @@ static void srpt_get_iou(struct ib_dm_mad *mad)
int i;
ioui = (struct ib_dm_iou_info *)mad->data;
- ioui->change_id = __constant_cpu_to_be16(1);
+ ioui->change_id = cpu_to_be16(1);
ioui->max_controllers = 16;
/* set present for slot 1 and empty for the rest */
@@ -330,13 +330,13 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
if (!slot || slot > 16) {
mad->mad_hdr.status
- = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
+ = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
return;
}
if (slot > 2) {
mad->mad_hdr.status
- = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
+ = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
return;
}
@@ -348,10 +348,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
iocp->subsys_device_id = 0x0;
- iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
- iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS);
- iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL);
- iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION);
+ iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
+ iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
+ iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
+ iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
iocp->send_queue_depth = cpu_to_be16(sdev->srq_size);
iocp->rdma_read_depth = 4;
iocp->send_size = cpu_to_be32(srp_max_req_size);
@@ -379,13 +379,13 @@ static void srpt_get_svc_entries(u64 ioc_guid,
if (!slot || slot > 16) {
mad->mad_hdr.status
- = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
+ = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
return;
}
if (slot > 2 || lo > hi || hi > 1) {
mad->mad_hdr.status
- = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
+ = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
return;
}
@@ -436,7 +436,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
break;
default:
rsp_mad->mad_hdr.status =
- __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
+ cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
break;
}
}
@@ -493,11 +493,11 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
break;
case IB_MGMT_METHOD_SET:
dm_mad->mad_hdr.status =
- __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
+ cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
break;
default:
dm_mad->mad_hdr.status =
- __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
+ cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
break;
}
@@ -783,7 +783,7 @@ static int srpt_post_recv(struct srpt_device *sdev,
list.addr = ioctx->ioctx.dma;
list.length = srp_max_req_size;
- list.lkey = sdev->mr->lkey;
+ list.lkey = sdev->pd->local_dma_lkey;
wr.next = NULL;
wr.sg_list = &list;
@@ -818,7 +818,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
list.addr = ioctx->ioctx.dma;
list.length = len;
- list.lkey = sdev->mr->lkey;
+ list.lkey = sdev->pd->local_dma_lkey;
wr.next = NULL;
wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
@@ -1206,7 +1206,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
while (rsize > 0 && tsize > 0) {
sge->addr = dma_addr;
- sge->lkey = ch->sport->sdev->mr->lkey;
+ sge->lkey = ch->sport->sdev->pd->local_dma_lkey;
if (rsize >= dma_len) {
sge->length =
@@ -1535,7 +1535,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
memset(srp_rsp, 0, sizeof *srp_rsp);
srp_rsp->opcode = SRP_RSP;
srp_rsp->req_lim_delta =
- __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
+ cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
srp_rsp->tag = tag;
srp_rsp->status = status;
@@ -1585,8 +1585,8 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
memset(srp_rsp, 0, sizeof *srp_rsp);
srp_rsp->opcode = SRP_RSP;
- srp_rsp->req_lim_delta = __constant_cpu_to_be32(1
- + atomic_xchg(&ch->req_lim_delta, 0));
+ srp_rsp->req_lim_delta =
+ cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
srp_rsp->tag = tag;
srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
@@ -1630,7 +1630,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
switch (len) {
case 8:
if ((*((__be64 *)lun) &
- __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
+ cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
goto out_err;
break;
case 4:
@@ -2449,8 +2449,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
}
if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
ret = -EINVAL;
pr_err("rejected SRP_LOGIN_REQ because its"
" length (%d bytes) is out of range (%d .. %d)\n",
@@ -2459,8 +2459,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
}
if (!sport->enabled) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
ret = -EINVAL;
pr_err("rejected SRP_LOGIN_REQ because the target port"
" has not yet been enabled\n");
@@ -2505,8 +2505,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
|| *(__be64 *)(req->target_port_id + 8) !=
cpu_to_be64(srpt_service_guid)) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
ret = -ENOMEM;
pr_err("rejected SRP_LOGIN_REQ because it"
" has an invalid target port identifier.\n");
@@ -2515,8 +2515,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ch = kzalloc(sizeof *ch, GFP_KERNEL);
if (!ch) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
ret = -ENOMEM;
goto reject;
@@ -2552,8 +2552,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ret = srpt_create_ch_ib(ch);
if (ret) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because creating"
" a new RDMA channel failed.\n");
goto free_ring;
@@ -2561,8 +2561,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ret = srpt_ch_qp_rtr(ch, ch->qp);
if (ret) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because enabling"
" RTR failed (error code = %d)\n", ret);
goto destroy_ib;
@@ -2580,15 +2579,15 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
if (!nacl) {
pr_info("Rejected login because no ACL has been"
" configured yet for initiator %s.\n", ch->sess_name);
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
goto destroy_ib;
}
ch->sess = transport_init_session(TARGET_PROT_NORMAL);
if (IS_ERR(ch->sess)) {
- rej->reason = __constant_cpu_to_be32(
- SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
+ rej->reason = cpu_to_be32(
+ SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_debug("Failed to create session\n");
goto deregister_session;
}
@@ -2604,8 +2603,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
rsp->max_it_iu_len = req->req_it_iu_len;
rsp->max_ti_iu_len = req->req_it_iu_len;
ch->max_ti_iu_len = it_iu_len;
- rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
- | SRP_BUF_FORMAT_INDIRECT);
+ rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
+ | SRP_BUF_FORMAT_INDIRECT);
rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
atomic_set(&ch->req_lim, ch->rq_size);
atomic_set(&ch->req_lim_delta, 0);
@@ -2655,8 +2654,8 @@ free_ch:
reject:
rej->opcode = SRP_LOGIN_REJ;
rej->tag = req->tag;
- rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
- | SRP_BUF_FORMAT_INDIRECT);
+ rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT
+ | SRP_BUF_FORMAT_INDIRECT);
ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
(void *)rej, sizeof *rej);
@@ -3212,10 +3211,6 @@ static void srpt_add_one(struct ib_device *device)
if (IS_ERR(sdev->pd))
goto free_dev;
- sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(sdev->mr))
- goto err_pd;
-
sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
srq_attr.event_handler = srpt_srq_event;
@@ -3227,7 +3222,7 @@ static void srpt_add_one(struct ib_device *device)
sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
if (IS_ERR(sdev->srq))
- goto err_mr;
+ goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
__func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
@@ -3251,7 +3246,7 @@ static void srpt_add_one(struct ib_device *device)
* in the system as service_id; therefore, the target_id will change
* if this HCA is gone bad and replaced by different HCA
*/
- if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
+ if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0))
goto err_cm;
INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
@@ -3312,8 +3307,6 @@ err_cm:
ib_destroy_cm_id(sdev->cm_id);
err_srq:
ib_destroy_srq(sdev->srq);
-err_mr:
- ib_dereg_mr(sdev->mr);
err_pd:
ib_dealloc_pd(sdev->pd);
free_dev:
@@ -3327,12 +3320,11 @@ err:
/**
* srpt_remove_one() - InfiniBand device removal callback function.
*/
-static void srpt_remove_one(struct ib_device *device)
+static void srpt_remove_one(struct ib_device *device, void *client_data)
{
- struct srpt_device *sdev;
+ struct srpt_device *sdev = client_data;
int i;
- sdev = ib_get_client_data(device, &srpt_client);
if (!sdev) {
pr_info("%s(%s): nothing to do.\n", __func__, device->name);
return;
@@ -3359,7 +3351,6 @@ static void srpt_remove_one(struct ib_device *device)
srpt_release_sdev(sdev);
ib_destroy_srq(sdev->srq);
- ib_dereg_mr(sdev->mr);
ib_dealloc_pd(sdev->pd);
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 21f8df67522a..5faad8acd789 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -393,7 +393,6 @@ struct srpt_port {
struct srpt_device {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_mr *mr;
struct ib_srq *srq;
struct ib_cm_id *cm_id;
struct ib_device_attr dev_attr;