aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h19
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c239
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c22
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c107
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h30
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c102
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c91
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c44
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1102
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h75
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c8
15 files changed, 1193 insertions, 746 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562beb5423..8ba80a6d3a46 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,9 +98,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1,
- IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
+ /*
+ * For IPOIB_MCAST_FLAG_BUSY
+ * When set, in flight join and mcast->mc is unreliable
+ * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+ * haven't started yet
+ * When clear and mcast->mc is valid pointer, join was successful
+ */
+ IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3,
- IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
@@ -317,6 +323,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
+ struct workqueue_struct *wq;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efcea0d03..56959adb6c7d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
}
spin_lock_irq(&priv->lock);
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags);
- queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ queue_work(priv->wq, &priv->cm.rx_reap_task);
}
return;
}
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
- queue_work(ipoib_workqueue, &priv->cm.start_task);
+ queue_work(priv->wq, &priv->cm.start_task);
return tx;
}
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list);
- queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4);
tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
- queue_work(ipoib_workqueue, &priv->cm.skb_task);
+ queue_work(priv->wq, &priv->cm.skb_task);
}
static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
}
if (!list_empty(&priv->cm.passive_ids))
- queue_delayed_work(ipoib_workqueue,
+ queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c348174..fe65abb5150c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
drain_tx_cq((struct net_device *)ctx);
}
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
- ipoib_ib_dev_stop(dev, flush);
+ ipoib_ib_dev_stop(dev);
return -1;
}
@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
- ipoib_mcast_stop_thread(dev, flush);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -880,8 +880,7 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
- if (flush)
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
begin = jiffies;
@@ -918,7 +917,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
@@ -1040,12 +1039,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
}
if (level >= IPOIB_FLUSH_NORMAL)
- ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
- ipoib_ib_dev_stop(dev, 0);
- if (ipoib_ib_dev_open(dev, 0) != 0)
+ ipoib_ib_dev_stop(dev);
+ if (ipoib_ib_dev_open(dev) != 0)
return;
if (netif_queue_stopped(dev))
netif_start_queue(dev);
@@ -1097,7 +1096,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_flush_paths(dev);
- ipoib_mcast_stop_thread(dev, 1);
+ ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 58b5aa3b6f2d..6bad17d4d588 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
- if (ipoib_ib_dev_open(dev, 1)) {
+ if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0;
err_stop:
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_stop(dev);
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_down(dev);
+ ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return;
}
- queue_work(ipoib_workqueue, &priv->restart_task);
+ queue_work(priv->wq, &priv->restart_task);
}
static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
}
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
return 0;
@@ -1262,15 +1262,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (ipoib_neigh_hash_init(priv) < 0)
- goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
- goto out_neigh_hash_cleanup;
+ goto out;
}
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1283,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;
+ /*
+ * Must be after ipoib_ib_dev_init so we can allocate a per
+ * device wq there and use it here
+ */
+ if (ipoib_neigh_hash_init(priv) < 0)
+ goto out_dev_uninit;
+
return 0;
+out_dev_uninit:
+ ipoib_ib_dev_cleanup(dev);
+
out_tx_ring_cleanup:
vfree(priv->tx_ring);
out_rx_ring_cleanup:
kfree(priv->rx_ring);
-out_neigh_hash_cleanup:
- ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
@@ -1317,6 +1323,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
+ /*
+ * Must be before ipoib_ib_dev_cleanup or we delete an in use
+ * work queue
+ */
+ ipoib_neigh_hash_uninit(dev);
+
ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring);
@@ -1324,8 +1336,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
-
- ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
@@ -1636,7 +1646,7 @@ register_failed:
/* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
event_failed:
ipoib_dev_cleanup(priv->dev);
@@ -1707,7 +1717,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
free_netdev(priv->dev);
@@ -1748,8 +1758,13 @@ static int __init ipoib_init_module(void)
* unregister_netdev() and linkwatch_event take the rtnl lock,
* so flush_scheduled_work() can deadlock during device
* removal.
+ *
+ * In addition, bringing one device up and another down at the
+ * same time can deadlock a single workqueue, so we have this
+ * global fallback workqueue, but we also attempt to open a
+ * per device workqueue each time we bring an interface up
*/
- ipoib_workqueue = create_singlethread_workqueue("ipoib");
+ ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) {
ret = -ENOMEM;
goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5f7e80..bc50dd0d0e4d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -190,12 +190,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1;
-
- if (!ipoib_cm_admin_enabled(dev)) {
- rtnl_lock();
- dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
- rtnl_unlock();
- }
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -277,16 +271,27 @@ ipoib_mcast_sendonly_join_complete(int status,
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
+ /*
+ * We have to take the mutex to force mcast_sendonly_join to
+ * return from ib_sa_multicast_join and set mcast->mc to a
+ * valid value. Otherwise we were racing with ourselves in
+ * that we might fail here, but get a valid return from
+ * ib_sa_multicast_join after we had cleared mcast->mc here,
+ * resulting in mis-matched joins and leaves and a deadlock
+ */
+ mutex_lock(&mcast_mutex);
+
/* We trap for port events ourselves. */
if (status == -ENETRESET)
- return 0;
+ goto out;
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (status) {
if (mcast->logcount++ < 20)
- ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
+ ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
+ "join failed for %pI6, status %d\n",
mcast->mcmember.mgid.raw, status);
/* Flush out any queued packets */
@@ -296,11 +301,15 @@ ipoib_mcast_sendonly_join_complete(int status,
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
netif_tx_unlock_bh(dev);
-
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
- &mcast->flags);
}
+out:
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (status)
+ mcast->mc = NULL;
+ complete(&mcast->done);
+ if (status == -ENETRESET)
+ status = 0;
+ mutex_unlock(&mcast_mutex);
return status;
}
@@ -318,12 +327,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
int ret = 0;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
- ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
+ ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
+ "multicast joins\n");
return -ENODEV;
}
- if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
- ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+ ipoib_dbg_mcast(priv, "multicast entry busy, skipping "
+ "sendonly join\n");
return -EBUSY;
}
@@ -331,6 +342,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
+ mutex_lock(&mcast_mutex);
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID |
@@ -343,12 +357,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
if (IS_ERR(mcast->mc)) {
ret = PTR_ERR(mcast->mc);
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
- ret);
+ complete(&mcast->done);
+ ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
+ "failed (ret = %d)\n", ret);
} else {
- ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
- mcast->mcmember.mgid.raw);
+ ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
+ "sendonly join\n", mcast->mcmember.mgid.raw);
}
+ mutex_unlock(&mcast_mutex);
return ret;
}
@@ -359,18 +375,29 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
carrier_on_task);
struct ib_port_attr attr;
- /*
- * Take rtnl_lock to avoid racing with ipoib_stop() and
- * turning the carrier back on while a device is being
- * removed.
- */
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
- rtnl_lock();
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop() and
+ * turning the carrier back on while a device is being
+ * removed. However, ipoib_stop() will attempt to flush
+ * the workqueue while holding the rtnl lock, so loop
+ * on trylock until either we get the lock or we see
+ * FLAG_ADMIN_UP go away as that signals that we are bailing
+ * and can safely ignore the carrier on work.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ return;
+ else
+ msleep(20);
+ }
+ if (!ipoib_cm_admin_enabled(priv->dev))
+ dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
netif_carrier_on(priv->dev);
rtnl_unlock();
}
@@ -385,60 +412,63 @@ static int ipoib_mcast_join_complete(int status,
ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
mcast->mcmember.mgid.raw, status);
+ /*
+ * We have to take the mutex to force mcast_join to
+ * return from ib_sa_multicast_join and set mcast->mc to a
+ * valid value. Otherwise we were racing with ourselves in
+ * that we might fail here, but get a valid return from
+ * ib_sa_multicast_join after we had cleared mcast->mc here,
+ * resulting in mis-matched joins and leaves and a deadlock
+ */
+ mutex_lock(&mcast_mutex);
+
/* We trap for port events ourselves. */
- if (status == -ENETRESET) {
- status = 0;
+ if (status == -ENETRESET)
goto out;
- }
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
if (!status) {
mcast->backoff = 1;
- mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, 0);
- mutex_unlock(&mcast_mutex);
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
/*
- * Defer carrier on work to ipoib_workqueue to avoid a
+ * Defer carrier on work to priv->wq to avoid a
* deadlock on rtnl_lock here.
*/
if (mcast == priv->broadcast)
- queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
- status = 0;
- goto out;
- }
-
- if (mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EAGAIN) {
- ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
- } else {
- ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
- mcast->mcmember.mgid.raw, status);
+ queue_work(priv->wq, &priv->carrier_on_task);
+ } else {
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT || status == -EAGAIN) {
+ ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
+ mcast->mcmember.mgid.raw, status);
+ } else {
+ ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
+ mcast->mcmember.mgid.raw, status);
+ }
}
- }
-
- mcast->backoff *= 2;
- if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
- mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- /* Clear the busy flag so we try again */
- status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
- mutex_lock(&mcast_mutex);
+ mcast->backoff *= 2;
+ if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+ mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+ }
+out:
spin_lock_irq(&priv->lock);
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ if (status)
+ mcast->mc = NULL;
+ complete(&mcast->done);
+ if (status == -ENETRESET)
+ status = 0;
+ if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(priv->wq, &priv->mcast_task,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
-out:
- complete(&mcast->done);
+
return status;
}
@@ -487,10 +517,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mutex_lock(&mcast_mutex);
init_completion(&mcast->done);
- set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
@@ -504,13 +533,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task,
+ queue_delayed_work(priv->wq, &priv->mcast_task,
mcast->backoff * HZ);
- mutex_unlock(&mcast_mutex);
}
+ mutex_unlock(&mcast_mutex);
}
void ipoib_mcast_join_task(struct work_struct *work)
@@ -547,8 +574,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
ipoib_warn(priv, "failed to allocate broadcast group\n");
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue,
- &priv->mcast_task, HZ);
+ queue_delayed_work(priv->wq, &priv->mcast_task,
+ HZ);
mutex_unlock(&mcast_mutex);
return;
}
@@ -563,7 +590,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
- if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+ if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
ipoib_mcast_join(dev, priv->broadcast, 0);
return;
}
@@ -571,23 +599,33 @@ void ipoib_mcast_join_task(struct work_struct *work)
while (1) {
struct ipoib_mcast *mcast = NULL;
+ /*
+ * Need the mutex so our flags are consistent, need the
+ * priv->lock so we don't race with list removals in either
+ * mcast_dev_flush or mcast_restart_task
+ */
+ mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
list_for_each_entry(mcast, &priv->multicast_list, list) {
- if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
- && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ if (IS_ERR_OR_NULL(mcast->mc) &&
+ !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+ !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
/* Found the next unjoined group */
break;
}
}
spin_unlock_irq(&priv->lock);
+ mutex_unlock(&mcast_mutex);
if (&mcast->list == &priv->multicast_list) {
/* All done */
break;
}
- ipoib_mcast_join(dev, mcast, 1);
+ if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+ ipoib_mcast_sendonly_join(mcast);
+ else
+ ipoib_mcast_join(dev, mcast, 1);
return;
}
@@ -604,13 +642,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
mutex_lock(&mcast_mutex);
if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
return 0;
}
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -621,8 +659,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
cancel_delayed_work(&priv->mcast_task);
mutex_unlock(&mcast_mutex);
- if (flush)
- flush_workqueue(ipoib_workqueue);
+ flush_workqueue(priv->wq);
return 0;
}
@@ -633,6 +670,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
int ret = 0;
if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+ if (!IS_ERR_OR_NULL(mcast->mc))
ib_sa_free_multicast(mcast->mc);
if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -685,6 +725,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
+ if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
if (!mcast->ah) {
@@ -698,8 +740,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n");
- else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- ipoib_mcast_sendonly_join(mcast);
/*
* If lookup completes between here and out:, don't
@@ -759,9 +799,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
- /* seperate between the wait to the leave*/
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
- if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -794,8 +837,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "restarting multicast task\n");
- ipoib_mcast_stop_thread(dev, 0);
-
local_irq_save(flags);
netif_addr_lock(dev);
spin_lock(&priv->lock);
@@ -880,14 +921,38 @@ void ipoib_mcast_restart_task(struct work_struct *work)
netif_addr_unlock(dev);
local_irq_restore(flags);
- /* We have to cancel outside of the spinlock */
+ /*
+ * make sure the in-flight joins have finished before we attempt
+ * to leave
+ */
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ wait_for_completion(&mcast->done);
+
+ /*
+ * We have to cancel outside of the spinlock, but we have to
+ * take the rtnl lock or else we race with the removal of
+ * entries from the remove list in mcast_dev_flush as part
+ * of ipoib_stop(). We detect the drop of the ADMIN_UP flag
+ * to signal that we have hit this particular race, and we
+ * return since we know we don't need to do anything else
+ * anyway.
+ */
+ while (!rtnl_trylock()) {
+ if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+ return;
+ else
+ msleep(20);
+ }
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
-
- if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
- ipoib_mcast_start_thread(dev);
+ /*
+ * Restart our join task if needed
+ */
+ ipoib_mcast_start_thread(dev);
+ rtnl_unlock();
}
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d44c53b..b72a753eb41d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,10 +145,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
int ret, size;
int i;
+ /*
+ * the various IPoIB tasks assume they will never race against
+ * themselves, so always use a single thread workqueue
+ */
+ priv->wq = create_singlethread_workqueue("ipoib_wq");
+ if (!priv->wq) {
+ printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+ return -ENODEV;
+ }
+
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
- return -ENODEV;
+ goto out_free_wq;
}
priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@ out_free_mr:
out_free_pd:
ib_dealloc_pd(priv->pd);
+
+out_free_wq:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
return -ENODEV;
}
@@ -270,6 +284,12 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
+ if (priv->wq) {
+ flush_workqueue(priv->wq);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+ }
}
void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index f42ab14105ac..6a594aac2290 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
static struct workqueue_struct *release_wq;
struct iser_global ig;
@@ -164,18 +164,42 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
return 0;
}
-int iser_initialize_task_headers(struct iscsi_task *task,
- struct iser_tx_desc *tx_desc)
+/**
+ * iser_initialize_task_headers() - Initialize task headers
+ * @task: iscsi task
+ * @tx_desc: iser tx descriptor
+ *
+ * Notes:
+ * This routine may race with iser teardown flow for scsi
+ * error handling TMFs. So for TMF we should acquire the
+ * state mutex to avoid dereferencing the IB device which
+ * may have already been terminated.
+ */
+int
+iser_initialize_task_headers(struct iscsi_task *task,
+ struct iser_tx_desc *tx_desc)
{
- struct iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_conn *iser_conn = task->conn->dd_data;
struct iser_device *device = iser_conn->ib_conn.device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
+ const bool mgmt_task = !task->sc && !in_interrupt();
+ int ret = 0;
+
+ if (unlikely(mgmt_task))
+ mutex_lock(&iser_conn->state_mutex);
+
+ if (unlikely(iser_conn->state != ISER_CONN_UP)) {
+ ret = -ENODEV;
+ goto out;
+ }
dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(device->ib_device, dma_addr))
- return -ENOMEM;
+ if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
+ ret = -ENOMEM;
+ goto out;
+ }
tx_desc->dma_addr = dma_addr;
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
@@ -183,7 +207,11 @@ int iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].lkey = device->mr->lkey;
iser_task->iser_conn = iser_conn;
- return 0;
+out:
+ if (unlikely(mgmt_task))
+ mutex_unlock(&iser_conn->state_mutex);
+
+ return ret;
}
/**
@@ -199,9 +227,14 @@ static int
iscsi_iser_task_init(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ int ret;
- if (iser_initialize_task_headers(task, &iser_task->desc))
- return -ENOMEM;
+ ret = iser_initialize_task_headers(task, &iser_task->desc);
+ if (ret) {
+ iser_err("Failed to init task %p, err = %d\n",
+ iser_task, ret);
+ return ret;
+ }
/* mgmt task */
if (!task->sc)
@@ -508,8 +541,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
*/
if (iser_conn) {
mutex_lock(&iser_conn->state_mutex);
- iscsi_conn_stop(cls_conn, flag);
iser_conn_terminate(iser_conn);
+ iscsi_conn_stop(cls_conn, flag);
/* unbind */
iser_conn->iscsi_conn = NULL;
@@ -541,12 +574,13 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
static inline unsigned int
iser_dif_prot_caps(int prot_caps)
{
- return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
- SHOST_DIX_TYPE1_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
- SHOST_DIX_TYPE2_PROTECTION : 0) |
- ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
- SHOST_DIX_TYPE3_PROTECTION : 0);
+ return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
+ SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
+ SHOST_DIX_TYPE1_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
+ SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
+ SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
}
/**
@@ -569,6 +603,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct Scsi_Host *shost;
struct iser_conn *iser_conn = NULL;
struct ib_conn *ib_conn;
+ u16 max_cmds;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
@@ -586,26 +621,41 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
*/
if (ep) {
iser_conn = ep->dd_data;
+ max_cmds = iser_conn->max_cmds;
+
+ mutex_lock(&iser_conn->state_mutex);
+ if (iser_conn->state != ISER_CONN_UP) {
+ iser_err("iser conn %p already started teardown\n",
+ iser_conn);
+ mutex_unlock(&iser_conn->state_mutex);
+ goto free_host;
+ }
+
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
- if (iser_pi_guard)
- scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
- else
- scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+ scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
+ SHOST_DIX_GUARD_CRC);
}
- }
- if (iscsi_host_add(shost, ep ?
- ib_conn->device->ib_device->dma_device : NULL))
- goto free_host;
+ if (iscsi_host_add(shost,
+ ib_conn->device->ib_device->dma_device)) {
+ mutex_unlock(&iser_conn->state_mutex);
+ goto free_host;
+ }
+ mutex_unlock(&iser_conn->state_mutex);
+ } else {
+ max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+ if (iscsi_host_add(shost, NULL))
+ goto free_host;
+ }
- if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n",
- cmds_max, ISER_DEF_XMIT_CMDS_MAX);
- cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ cmds_max, max_cmds);
+ cmds_max = max_cmds;
}
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
@@ -911,7 +961,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.module = THIS_MODULE,
.name = "iSCSI Initiator over iSER",
.queuecommand = iscsi_queuecommand,
- .change_queue_depth = iscsi_change_queue_depth,
+ .change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
.max_sectors = 1024,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
@@ -922,6 +972,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "iscsi_iser",
.this_id = -1,
+ .track_queue_depth = 1,
};
static struct iscsi_transport iscsi_iser_transport = {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index cd4174ca9a76..5ce26817e7e1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,34 +69,31 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.4.8"
+#define DRV_VER "1.5"
#define iser_dbg(fmt, arg...) \
do { \
- if (iser_debug_level > 2) \
+ if (unlikely(iser_debug_level > 2)) \
printk(KERN_DEBUG PFX "%s: " fmt,\
__func__ , ## arg); \
} while (0)
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (unlikely(iser_debug_level > 0)) \
pr_warn(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (unlikely(iser_debug_level > 1)) \
pr_info(PFX "%s: " fmt, \
__func__ , ## arg); \
} while (0)
-#define iser_err(fmt, arg...) \
- do { \
- printk(KERN_ERR PFX "%s: " fmt, \
- __func__ , ## arg); \
- } while (0)
+#define iser_err(fmt, arg...) \
+ pr_err(PFX "%s: " fmt, __func__ , ## arg)
#define SHIFT_4K 12
#define SIZE_4K (1ULL << SHIFT_4K)
@@ -144,6 +141,11 @@
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
+#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \
+ - ISER_MAX_TX_MISC_PDUS \
+ - ISER_MAX_RX_MISC_PDUS) / \
+ (1 + ISER_INFLIGHT_DATAOUTS))
+
#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
@@ -247,7 +249,6 @@ struct iscsi_endpoint;
* @va: MR start address (buffer va)
* @len: MR length
* @mem_h: pointer to registration context (FMR/Fastreg)
- * @is_mr: indicates weather we registered the buffer
*/
struct iser_mem_reg {
u32 lkey;
@@ -255,7 +256,6 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
- int is_mr;
};
/**
@@ -323,8 +323,6 @@ struct iser_rx_desc {
char pad[ISER_RX_PAD_SIZE];
} __attribute__((packed));
-#define ISER_MAX_CQ 4
-
struct iser_conn;
struct ib_conn;
struct iscsi_iser_task;
@@ -375,7 +373,7 @@ struct iser_device {
struct list_head ig_list;
int refcount;
int comps_used;
- struct iser_comp comps[ISER_MAX_CQ];
+ struct iser_comp *comps;
int (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
unsigned cmds_max);
void (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
@@ -432,6 +430,7 @@ struct fast_reg_descriptor {
* @cma_id: rdma_cm connection maneger handle
* @qp: Connection Queue-pair
* @post_recv_buf_count: post receive counter
+ * @sig_count: send work request signal count
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
@@ -452,6 +451,7 @@ struct ib_conn {
struct rdma_cm_id *cma_id;
struct ib_qp *qp;
int post_recv_buf_count;
+ u8 sig_count;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
@@ -482,6 +482,7 @@ struct ib_conn {
* to max number of post recvs
* @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
* @min_posted_rx: (qp_max_recv_dtos >> 2)
+ * @max_cmds: maximum cmds allowed for this connection
* @name: connection peer portal
* @release_work: deffered work for release job
* @state_mutex: protects iser onnection state
@@ -507,6 +508,7 @@ struct iser_conn {
unsigned qp_max_recv_dtos;
unsigned qp_max_recv_dtos_mask;
unsigned min_posted_rx;
+ u16 max_cmds;
char name[ISER_OBJECT_NAME_SIZE];
struct work_struct release_work;
struct mutex state_mutex;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5a489ea63732..3821633f1065 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -369,7 +369,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
return 0;
}
-static inline bool iser_signal_comp(int sig_count)
+static inline bool iser_signal_comp(u8 sig_count)
{
return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
}
@@ -388,7 +388,7 @@ int iser_send_command(struct iscsi_conn *conn,
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
- static unsigned sig_count;
+ u8 sig_count = ++iser_conn->ib_conn.sig_count;
edtl = ntohl(hdr->data_length);
@@ -435,7 +435,7 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
err = iser_post_send(&iser_conn->ib_conn, tx_desc,
- iser_signal_comp(++sig_count));
+ iser_signal_comp(sig_count));
if (!err)
return 0;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 6c5ce357fba6..abce9339333f 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,7 +73,6 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
if (cmd_dir == ISER_DIR_OUT) {
/* copy the unaligned sg the buffer which is used for RDMA */
- int i;
char *p, *from;
sgl = (struct scatterlist *)data->buf;
@@ -409,7 +408,6 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -440,13 +438,13 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
-static inline void
+static void
iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
struct ib_sig_domain *domain)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
- domain->sig.dif.pi_interval = sc->device->sector_size;
- domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+ domain->sig.dif.pi_interval = scsi_prot_interval(sc);
+ domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
/*
* At the moment we hard code those, but in the future
* we will take them from sc.
@@ -454,8 +452,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
domain->sig.dif.apptag_check_mask = 0xffff;
domain->sig.dif.app_escape = true;
domain->sig.dif.ref_escape = true;
- if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
- scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+ if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
domain->sig.dif.ref_remap = true;
};
@@ -473,26 +470,16 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
case SCSI_PROT_WRITE_STRIP:
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
- /*
- * At the moment we use this modparam to tell what is
- * the memory bg_type, in the future we will take it
- * from sc.
- */
- sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
- IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+ IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
case SCSI_PROT_READ_PASS:
case SCSI_PROT_WRITE_PASS:
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
- /*
- * At the moment we use this modparam to tell what is
- * the memory bg_type, in the future we will take it
- * from sc.
- */
- sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
- IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+ IB_T10DIF_CSUM : IB_T10DIF_CRC;
break;
default:
iser_err("Unsupported PI operation %d\n",
@@ -503,26 +490,28 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
return 0;
}
-static int
+static inline void
iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
- switch (scsi_get_prot_type(sc)) {
- case SCSI_PROT_DIF_TYPE0:
- break;
- case SCSI_PROT_DIF_TYPE1:
- case SCSI_PROT_DIF_TYPE2:
- *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
- break;
- case SCSI_PROT_DIF_TYPE3:
- *mask = ISER_CHECK_GUARD;
- break;
- default:
- iser_err("Unsupported protection type %d\n",
- scsi_get_prot_type(sc));
- return -EINVAL;
- }
+ *mask = 0;
+ if (sc->prot_flags & SCSI_PROT_REF_CHECK)
+ *mask |= ISER_CHECK_REFTAG;
+ if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
+ *mask |= ISER_CHECK_GUARD;
+}
- return 0;
+static void
+iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+ u32 rkey;
+
+ memset(inv_wr, 0, sizeof(*inv_wr));
+ inv_wr->opcode = IB_WR_LOCAL_INV;
+ inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->ex.invalidate_rkey = mr->rkey;
+
+ rkey = ib_inc_rkey(mr->rkey);
+ ib_update_fast_reg_key(mr, rkey);
}
static int
@@ -536,26 +525,17 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
struct ib_send_wr *bad_wr, *wr = NULL;
struct ib_sig_attrs sig_attrs;
int ret;
- u32 key;
memset(&sig_attrs, 0, sizeof(sig_attrs));
ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
if (ret)
goto err;
- ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
- if (ret)
- goto err;
+ iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
- memset(&inv_wr, 0, sizeof(inv_wr));
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.wr_id = ISER_FASTREG_LI_WRID;
- inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+ iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
wr = &inv_wr;
- /* Bump the key */
- key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
}
memset(&sig_wr, 0, sizeof(sig_wr));
@@ -585,12 +565,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
sig_sge->lkey = pi_ctx->sig_mr->lkey;
sig_sge->addr = 0;
- sig_sge->length = data_sge->length + prot_sge->length;
- if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
- scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
- sig_sge->length += (data_sge->length /
- iser_task->sc->device->sector_size) * 8;
- }
+ sig_sge->length = scsi_transfer_length(iser_task->sc);
iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
sig_sge->addr, sig_sge->length,
@@ -613,7 +588,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
- u8 key;
int ret, offset, size, plen;
/* if there a single dma entry, dma mr suffices */
@@ -645,14 +619,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
}
if (!(desc->reg_indicators & ind)) {
- memset(&inv_wr, 0, sizeof(inv_wr));
- inv_wr.wr_id = ISER_FASTREG_LI_WRID;
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.ex.invalidate_rkey = mr->rkey;
+ iser_inv_rkey(&inv_wr, mr);
wr = &inv_wr;
- /* Bump the key */
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
}
/* Prepare FASTREG WR */
@@ -770,15 +738,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
regd_buf->reg.va = sig_sge.addr;
regd_buf->reg.len = sig_sge.length;
- regd_buf->reg.is_mr = 1;
} else {
- if (desc) {
+ if (desc)
regd_buf->reg.rkey = desc->data_mr->rkey;
- regd_buf->reg.is_mr = 1;
- } else {
+ else
regd_buf->reg.rkey = device->mr->rkey;
- regd_buf->reg.is_mr = 0;
- }
regd_buf->reg.lkey = data_sge.lkey;
regd_buf->reg.va = data_sge.addr;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 67225bb82bb5..695a2704bd43 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -76,7 +76,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
static int iser_create_device_ib_res(struct iser_device *device)
{
struct ib_device_attr *dev_attr = &device->dev_attr;
- int ret, i;
+ int ret, i, max_cqe;
ret = ib_query_device(device->ib_device, dev_attr);
if (ret) {
@@ -104,11 +104,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
return -1;
}
- device->comps_used = min(ISER_MAX_CQ,
+ device->comps_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
- iser_info("using %d CQs, device %s supports %d vectors\n",
+
+ device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
+ GFP_KERNEL);
+ if (!device->comps)
+ goto comps_err;
+
+ max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+
+ iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors);
+ device->ib_device->num_comp_vectors, max_cqe);
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
@@ -122,7 +130,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
iser_cq_callback,
iser_cq_event_callback,
(void *)comp,
- ISER_MAX_CQ_LEN, i);
+ max_cqe, i);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
@@ -162,6 +170,8 @@ cq_err:
}
ib_dealloc_pd(device->pd);
pd_err:
+ kfree(device->comps);
+comps_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -187,6 +197,9 @@ static void iser_free_device_ib_res(struct iser_device *device)
(void)ib_dereg_mr(device->mr);
(void)ib_dealloc_pd(device->pd);
+ kfree(device->comps);
+ device->comps = NULL;
+
device->mr = NULL;
device->pd = NULL;
}
@@ -425,7 +438,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
+ struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+ ib_conn);
struct iser_device *device;
+ struct ib_device_attr *dev_attr;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
@@ -433,6 +449,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
+ dev_attr = &device->dev_attr;
memset(&init_attr, 0, sizeof init_attr);
@@ -460,8 +477,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
if (ib_conn->pi_support) {
init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
+ if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
+ } else {
+ init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ iser_conn->max_cmds =
+ ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ iser_dbg("device %s supports max_send_wr %d\n",
+ device->ib_device->name, dev_attr->max_qp_wr);
+ }
}
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -475,7 +504,11 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
return ret;
out_err:
+ mutex_lock(&ig.connlist_mutex);
+ ib_conn->comp->active_qps--;
+ mutex_unlock(&ig.connlist_mutex);
iser_err("unable to alloc mem or create resource, err %d\n", ret);
+
return ret;
}
@@ -610,9 +643,11 @@ void iser_conn_release(struct iser_conn *iser_conn)
mutex_unlock(&ig.connlist_mutex);
mutex_lock(&iser_conn->state_mutex);
- if (iser_conn->state != ISER_CONN_DOWN)
+ if (iser_conn->state != ISER_CONN_DOWN) {
iser_warn("iser conn %p state %d, expected state down.\n",
iser_conn, iser_conn->state);
+ iser_conn->state = ISER_CONN_DOWN;
+ }
/*
* In case we never got to bind stage, we still need to
* release IB resources (which is safe to call more than once).
@@ -662,8 +697,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
/* post an indication that all flush errors were consumed */
err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
- if (err)
+ if (err) {
iser_err("conn %p failed to post beacon", ib_conn);
+ return 1;
+ }
wait_for_completion(&ib_conn->flush_comp);
}
@@ -846,20 +883,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_ADDR_CHANGE:
- iser_disconnected_handler(cma_id);
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ iser_cleanup_handler(cma_id, false);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
/*
* we *must* destroy the device as we cannot rely
* on iscsid to be around to initiate error handling.
- * also implicitly destroy the cma_id.
+ * also if we are not in state DOWN implicitly destroy
+ * the cma_id.
*/
iser_cleanup_handler(cma_id, true);
- iser_conn->ib_conn.cma_id = NULL;
- ret = 1;
- break;
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- iser_cleanup_handler(cma_id, false);
+ if (iser_conn->state != ISER_CONN_DOWN) {
+ iser_conn->ib_conn.cma_id = NULL;
+ ret = 1;
+ }
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
@@ -981,7 +1019,6 @@ int iser_reg_page_vec(struct ib_conn *ib_conn,
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -1008,7 +1045,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
- if (!reg->is_mr)
+ if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -1028,11 +1065,10 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
- if (!reg->is_mr)
+ if (!desc)
return;
reg->mem_h = NULL;
- reg->is_mr = 0;
spin_lock_bh(&ib_conn->lock);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
@@ -1049,7 +1085,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
sge.length = ISER_RX_LOGIN_SIZE;
sge.lkey = ib_conn->device->mr->lkey;
- rx_wr.wr_id = (unsigned long)iser_conn->login_resp_buf;
+ rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
rx_wr.sg_list = &sge;
rx_wr.num_sge = 1;
rx_wr.next = NULL;
@@ -1073,7 +1109,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (unsigned long)rx_desc;
+ rx_wr->wr_id = (uintptr_t)rx_desc;
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
@@ -1110,7 +1146,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
DMA_TO_DEVICE);
send_wr.next = NULL;
- send_wr.wr_id = (unsigned long)tx_desc;
+ send_wr.wr_id = (uintptr_t)tx_desc;
send_wr.sg_list = tx_desc->tx_sg;
send_wr.num_sge = tx_desc->num_sge;
send_wr.opcode = IB_WR_SEND;
@@ -1160,6 +1196,7 @@ static void
iser_handle_comp_error(struct ib_conn *ib_conn,
struct ib_wc *wc)
{
+ void *wr_id = (void *)(uintptr_t)wc->wr_id;
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
ib_conn);
@@ -1168,8 +1205,8 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
- if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
- struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+ if (is_iser_tx_desc(iser_conn, wr_id)) {
+ struct iser_tx_desc *desc = wr_id;
if (desc->type == ISCSI_TX_DATAOUT)
kmem_cache_free(ig.desc_cache, desc);
@@ -1193,14 +1230,14 @@ static void iser_handle_wc(struct ib_wc *wc)
struct iser_rx_desc *rx_desc;
ib_conn = wc->qp->qp_context;
- if (wc->status == IB_WC_SUCCESS) {
+ if (likely(wc->status == IB_WC_SUCCESS)) {
if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)wc->wr_id;
+ rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
iser_rcv_completion(rx_desc, wc->byte_len,
ib_conn);
} else
if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)wc->wr_id;
+ tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
iser_snd_completion(tx_desc, ib_conn);
} else {
iser_err("Unknown wc opcode %d\n", wc->opcode);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 3effa931fce2..10641b7816f4 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -115,9 +115,12 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS;
/*
* FIXME: Use devattr.max_sge - 2 for max_send_sge as
- * work-around for RDMA_READ..
+ * work-around for RDMA_READs with ConnectX-2.
+ *
+ * Also, still make sure to have at least two SGEs for
+ * outgoing control PDU responses.
*/
- attr.cap.max_send_sge = device->dev_attr.max_sge - 2;
+ attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
isert_conn->max_sge = attr.cap.max_send_sge;
attr.cap.max_recv_sge = 1;
@@ -225,12 +228,16 @@ isert_create_device_ib_res(struct isert_device *device)
struct isert_cq_desc *cq_desc;
struct ib_device_attr *dev_attr;
int ret = 0, i, j;
+ int max_rx_cqe, max_tx_cqe;
dev_attr = &device->dev_attr;
ret = isert_query_device(ib_dev, dev_attr);
if (ret)
return ret;
+ max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe);
+ max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe);
+
/* asign function handlers */
if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
@@ -272,7 +279,7 @@ isert_create_device_ib_res(struct isert_device *device)
isert_cq_rx_callback,
isert_cq_event_callback,
(void *)&cq_desc[i],
- ISER_MAX_RX_CQ_LEN, i);
+ max_rx_cqe, i);
if (IS_ERR(device->dev_rx_cq[i])) {
ret = PTR_ERR(device->dev_rx_cq[i]);
device->dev_rx_cq[i] = NULL;
@@ -284,7 +291,7 @@ isert_create_device_ib_res(struct isert_device *device)
isert_cq_tx_callback,
isert_cq_event_callback,
(void *)&cq_desc[i],
- ISER_MAX_TX_CQ_LEN, i);
+ max_tx_cqe, i);
if (IS_ERR(device->dev_tx_cq[i])) {
ret = PTR_ERR(device->dev_tx_cq[i]);
device->dev_tx_cq[i] = NULL;
@@ -803,14 +810,25 @@ wake_up:
complete(&isert_conn->conn_wait);
}
-static void
+static int
isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
{
- struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context;
+ struct isert_conn *isert_conn;
+
+ if (!cma_id->qp) {
+ struct isert_np *isert_np = cma_id->context;
+
+ isert_np->np_cm_id = NULL;
+ return -1;
+ }
+
+ isert_conn = (struct isert_conn *)cma_id->context;
isert_conn->disconnect = disconnect;
INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
schedule_work(&isert_conn->conn_logout_work);
+
+ return 0;
}
static int
@@ -825,6 +843,9 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
ret = isert_connect_request(cma_id, event);
+ if (ret)
+ pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n",
+ event->event, ret);
break;
case RDMA_CM_EVENT_ESTABLISHED:
isert_connected_handler(cma_id);
@@ -834,7 +855,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
disconnect = true;
case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */
- isert_disconnected_handler(cma_id, disconnect);
+ ret = isert_disconnected_handler(cma_id, disconnect);
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
default:
@@ -842,12 +863,6 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
break;
}
- if (ret != 0) {
- pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n",
- event->event, ret);
- dump_stack();
- }
-
return ret;
}
@@ -3190,7 +3205,8 @@ isert_free_np(struct iscsi_np *np)
{
struct isert_np *isert_np = (struct isert_np *)np->np_context;
- rdma_destroy_id(isert_np->np_cm_id);
+ if (isert_np->np_cm_id)
+ rdma_destroy_id(isert_np->np_cm_id);
np->np_context = NULL;
kfree(isert_np);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 62d2a18e1b41..db3c8c851af1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -123,10 +123,15 @@ MODULE_PARM_DESC(dev_loss_tmo,
" if fast_io_fail_tmo has not been set. \"off\" means that"
" this functionality is disabled.");
+static unsigned ch_count;
+module_param(ch_count, uint, 0444);
+MODULE_PARM_DESC(ch_count,
+ "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
-static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
+static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
@@ -262,7 +267,7 @@ static int srp_init_qp(struct srp_target_port *target,
ret = ib_find_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port,
- be16_to_cpu(target->path.pkey),
+ be16_to_cpu(target->pkey),
&attr->pkey_index);
if (ret)
goto out;
@@ -283,18 +288,23 @@ out:
return ret;
}
-static int srp_new_cm_id(struct srp_target_port *target)
+static int srp_new_cm_id(struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct ib_cm_id *new_cm_id;
new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
- srp_cm_handler, target);
+ srp_cm_handler, ch);
if (IS_ERR(new_cm_id))
return PTR_ERR(new_cm_id);
- if (target->cm_id)
- ib_destroy_cm_id(target->cm_id);
- target->cm_id = new_cm_id;
+ if (ch->cm_id)
+ ib_destroy_cm_id(ch->cm_id);
+ ch->cm_id = new_cm_id;
+ ch->path.sgid = target->sgid;
+ ch->path.dgid = target->orig_dgid;
+ ch->path.pkey = target->pkey;
+ ch->path.service_id = target->service_id;
return 0;
}
@@ -443,8 +453,44 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
-static int srp_create_target_ib(struct srp_target_port *target)
+/**
+ * srp_destroy_qp() - destroy an RDMA queue pair
+ * @ch: SRP RDMA channel.
+ *
+ * Change a queue pair into the error state and wait until all receive
+ * completions have been processed before destroying it. This avoids that
+ * the receive completion handler can access the queue pair while it is
+ * being destroyed.
+ */
+static void srp_destroy_qp(struct srp_rdma_ch *ch)
+{
+ struct srp_target_port *target = ch->target;
+ static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+ struct ib_recv_wr *bad_wr;
+ int ret;
+
+ /* Destroying a QP and reusing ch->done is only safe if not connected */
+ WARN_ON_ONCE(target->connected);
+
+ ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
+ WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
+ if (ret)
+ goto out;
+
+ init_completion(&ch->done);
+ ret = ib_post_recv(ch->qp, &wr, &bad_wr);
+ WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
+ if (ret == 0)
+ wait_for_completion(&ch->done);
+
+out:
+ ib_destroy_qp(ch->qp);
+}
+
+static int srp_create_ch_ib(struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_qp_init_attr *init_attr;
struct ib_cq *recv_cq, *send_cq;
@@ -458,15 +504,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
if (!init_attr)
return -ENOMEM;
- recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target,
- target->queue_size, target->comp_vector);
+ /* + 1 for SRP_LAST_WR_ID */
+ recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
+ target->queue_size + 1, ch->comp_vector);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
- m * target->queue_size, target->comp_vector);
+ send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
+ m * target->queue_size, ch->comp_vector);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
@@ -476,7 +523,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = m * target->queue_size;
- init_attr->cap.max_recv_wr = target->queue_size;
+ init_attr->cap.max_recv_wr = target->queue_size + 1;
init_attr->cap.max_recv_sge = 1;
init_attr->cap.max_send_sge = 1;
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -502,9 +549,9 @@ static int srp_create_target_ib(struct srp_target_port *target)
"FR pool allocation failed (%d)\n", ret);
goto err_qp;
}
- if (target->fr_pool)
- srp_destroy_fr_pool(target->fr_pool);
- target->fr_pool = fr_pool;
+ if (ch->fr_pool)
+ srp_destroy_fr_pool(ch->fr_pool);
+ ch->fr_pool = fr_pool;
} else if (!dev->use_fast_reg && dev->has_fmr) {
fmr_pool = srp_alloc_fmr_pool(target);
if (IS_ERR(fmr_pool)) {
@@ -513,21 +560,21 @@ static int srp_create_target_ib(struct srp_target_port *target)
"FMR pool allocation failed (%d)\n", ret);
goto err_qp;
}
- if (target->fmr_pool)
- ib_destroy_fmr_pool(target->fmr_pool);
- target->fmr_pool = fmr_pool;
+ if (ch->fmr_pool)
+ ib_destroy_fmr_pool(ch->fmr_pool);
+ ch->fmr_pool = fmr_pool;
}
- if (target->qp)
- ib_destroy_qp(target->qp);
- if (target->recv_cq)
- ib_destroy_cq(target->recv_cq);
- if (target->send_cq)
- ib_destroy_cq(target->send_cq);
+ if (ch->qp)
+ srp_destroy_qp(ch);
+ if (ch->recv_cq)
+ ib_destroy_cq(ch->recv_cq);
+ if (ch->send_cq)
+ ib_destroy_cq(ch->send_cq);
- target->qp = qp;
- target->recv_cq = recv_cq;
- target->send_cq = send_cq;
+ ch->qp = qp;
+ ch->recv_cq = recv_cq;
+ ch->send_cq = send_cq;
kfree(init_attr);
return 0;
@@ -548,93 +595,117 @@ err:
/*
* Note: this function may be called without srp_alloc_iu_bufs() having been
- * invoked. Hence the target->[rt]x_ring checks.
+ * invoked. Hence the ch->[rt]x_ring checks.
*/
-static void srp_free_target_ib(struct srp_target_port *target)
+static void srp_free_ch_ib(struct srp_target_port *target,
+ struct srp_rdma_ch *ch)
{
struct srp_device *dev = target->srp_host->srp_dev;
int i;
+ if (!ch->target)
+ return;
+
+ if (ch->cm_id) {
+ ib_destroy_cm_id(ch->cm_id);
+ ch->cm_id = NULL;
+ }
+
+ /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
+ if (!ch->qp)
+ return;
+
if (dev->use_fast_reg) {
- if (target->fr_pool)
- srp_destroy_fr_pool(target->fr_pool);
+ if (ch->fr_pool)
+ srp_destroy_fr_pool(ch->fr_pool);
} else {
- if (target->fmr_pool)
- ib_destroy_fmr_pool(target->fmr_pool);
+ if (ch->fmr_pool)
+ ib_destroy_fmr_pool(ch->fmr_pool);
}
- ib_destroy_qp(target->qp);
- ib_destroy_cq(target->send_cq);
- ib_destroy_cq(target->recv_cq);
+ srp_destroy_qp(ch);
+ ib_destroy_cq(ch->send_cq);
+ ib_destroy_cq(ch->recv_cq);
+
+ /*
+ * Avoid that the SCSI error handler tries to use this channel after
+ * it has been freed. The SCSI error handler can namely continue
+ * trying to perform recovery actions after scsi_remove_host()
+ * returned.
+ */
+ ch->target = NULL;
- target->qp = NULL;
- target->send_cq = target->recv_cq = NULL;
+ ch->qp = NULL;
+ ch->send_cq = ch->recv_cq = NULL;
- if (target->rx_ring) {
+ if (ch->rx_ring) {
for (i = 0; i < target->queue_size; ++i)
- srp_free_iu(target->srp_host, target->rx_ring[i]);
- kfree(target->rx_ring);
- target->rx_ring = NULL;
+ srp_free_iu(target->srp_host, ch->rx_ring[i]);
+ kfree(ch->rx_ring);
+ ch->rx_ring = NULL;
}
- if (target->tx_ring) {
+ if (ch->tx_ring) {
for (i = 0; i < target->queue_size; ++i)
- srp_free_iu(target->srp_host, target->tx_ring[i]);
- kfree(target->tx_ring);
- target->tx_ring = NULL;
+ srp_free_iu(target->srp_host, ch->tx_ring[i]);
+ kfree(ch->tx_ring);
+ ch->tx_ring = NULL;
}
}
static void srp_path_rec_completion(int status,
struct ib_sa_path_rec *pathrec,
- void *target_ptr)
+ void *ch_ptr)
{
- struct srp_target_port *target = target_ptr;
+ struct srp_rdma_ch *ch = ch_ptr;
+ struct srp_target_port *target = ch->target;
- target->status = status;
+ ch->status = status;
if (status)
shost_printk(KERN_ERR, target->scsi_host,
PFX "Got failed path rec status %d\n", status);
else
- target->path = *pathrec;
- complete(&target->done);
+ ch->path = *pathrec;
+ complete(&ch->done);
}
-static int srp_lookup_path(struct srp_target_port *target)
+static int srp_lookup_path(struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
int ret;
- target->path.numb_path = 1;
-
- init_completion(&target->done);
-
- target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
- target->srp_host->srp_dev->dev,
- target->srp_host->port,
- &target->path,
- IB_SA_PATH_REC_SERVICE_ID |
- IB_SA_PATH_REC_DGID |
- IB_SA_PATH_REC_SGID |
- IB_SA_PATH_REC_NUMB_PATH |
- IB_SA_PATH_REC_PKEY,
- SRP_PATH_REC_TIMEOUT_MS,
- GFP_KERNEL,
- srp_path_rec_completion,
- target, &target->path_query);
- if (target->path_query_id < 0)
- return target->path_query_id;
-
- ret = wait_for_completion_interruptible(&target->done);
+ ch->path.numb_path = 1;
+
+ init_completion(&ch->done);
+
+ ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+ target->srp_host->srp_dev->dev,
+ target->srp_host->port,
+ &ch->path,
+ IB_SA_PATH_REC_SERVICE_ID |
+ IB_SA_PATH_REC_DGID |
+ IB_SA_PATH_REC_SGID |
+ IB_SA_PATH_REC_NUMB_PATH |
+ IB_SA_PATH_REC_PKEY,
+ SRP_PATH_REC_TIMEOUT_MS,
+ GFP_KERNEL,
+ srp_path_rec_completion,
+ ch, &ch->path_query);
+ if (ch->path_query_id < 0)
+ return ch->path_query_id;
+
+ ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
return ret;
- if (target->status < 0)
+ if (ch->status < 0)
shost_printk(KERN_WARNING, target->scsi_host,
PFX "Path record query failed\n");
- return target->status;
+ return ch->status;
}
-static int srp_send_req(struct srp_target_port *target)
+static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
{
+ struct srp_target_port *target = ch->target;
struct {
struct ib_cm_req_param param;
struct srp_login_req priv;
@@ -645,11 +716,11 @@ static int srp_send_req(struct srp_target_port *target)
if (!req)
return -ENOMEM;
- req->param.primary_path = &target->path;
+ req->param.primary_path = &ch->path;
req->param.alternate_path = NULL;
req->param.service_id = target->service_id;
- req->param.qp_num = target->qp->qp_num;
- req->param.qp_type = target->qp->qp_type;
+ req->param.qp_num = ch->qp->qp_num;
+ req->param.qp_type = ch->qp->qp_type;
req->param.private_data = &req->priv;
req->param.private_data_len = sizeof req->priv;
req->param.flow_control = 1;
@@ -673,6 +744,8 @@ static int srp_send_req(struct srp_target_port *target)
req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
+ req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
+ SRP_MULTICHAN_SINGLE);
/*
* In the published SRP specification (draft rev. 16a), the
* port identifier format is 8 bytes of ID extension followed
@@ -684,7 +757,7 @@ static int srp_send_req(struct srp_target_port *target)
*/
if (target->io_class == SRP_REV10_IB_IO_CLASS) {
memcpy(req->priv.initiator_port_id,
- &target->path.sgid.global.interface_id, 8);
+ &target->sgid.global.interface_id, 8);
memcpy(req->priv.initiator_port_id + 8,
&target->initiator_ext, 8);
memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
@@ -693,7 +766,7 @@ static int srp_send_req(struct srp_target_port *target)
memcpy(req->priv.initiator_port_id,
&target->initiator_ext, 8);
memcpy(req->priv.initiator_port_id + 8,
- &target->path.sgid.global.interface_id, 8);
+ &target->sgid.global.interface_id, 8);
memcpy(req->priv.target_port_id, &target->id_ext, 8);
memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
}
@@ -713,7 +786,7 @@ static int srp_send_req(struct srp_target_port *target)
&target->srp_host->srp_dev->dev->node_guid, 8);
}
- status = ib_send_cm_req(target->cm_id, &req->param);
+ status = ib_send_cm_req(ch->cm_id, &req->param);
kfree(req);
@@ -754,28 +827,35 @@ static bool srp_change_conn_state(struct srp_target_port *target,
static void srp_disconnect_target(struct srp_target_port *target)
{
+ struct srp_rdma_ch *ch;
+ int i;
+
if (srp_change_conn_state(target, false)) {
/* XXX should send SRP_I_LOGOUT request */
- if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
- shost_printk(KERN_DEBUG, target->scsi_host,
- PFX "Sending CM DREQ failed\n");
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
+ shost_printk(KERN_DEBUG, target->scsi_host,
+ PFX "Sending CM DREQ failed\n");
+ }
}
}
}
-static void srp_free_req_data(struct srp_target_port *target)
+static void srp_free_req_data(struct srp_target_port *target,
+ struct srp_rdma_ch *ch)
{
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
struct srp_request *req;
int i;
- if (!target->req_ring)
+ if (!ch->target || !ch->req_ring)
return;
for (i = 0; i < target->req_ring_size; ++i) {
- req = &target->req_ring[i];
+ req = &ch->req_ring[i];
if (dev->use_fast_reg)
kfree(req->fr_list);
else
@@ -789,12 +869,13 @@ static void srp_free_req_data(struct srp_target_port *target)
kfree(req->indirect_desc);
}
- kfree(target->req_ring);
- target->req_ring = NULL;
+ kfree(ch->req_ring);
+ ch->req_ring = NULL;
}
-static int srp_alloc_req_data(struct srp_target_port *target)
+static int srp_alloc_req_data(struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *srp_dev = target->srp_host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
struct srp_request *req;
@@ -802,15 +883,13 @@ static int srp_alloc_req_data(struct srp_target_port *target)
dma_addr_t dma_addr;
int i, ret = -ENOMEM;
- INIT_LIST_HEAD(&target->free_reqs);
-
- target->req_ring = kzalloc(target->req_ring_size *
- sizeof(*target->req_ring), GFP_KERNEL);
- if (!target->req_ring)
+ ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
+ GFP_KERNEL);
+ if (!ch->req_ring)
goto out;
for (i = 0; i < target->req_ring_size; ++i) {
- req = &target->req_ring[i];
+ req = &ch->req_ring[i];
mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
GFP_KERNEL);
if (!mr_list)
@@ -834,8 +913,6 @@ static int srp_alloc_req_data(struct srp_target_port *target)
goto out;
req->indirect_dma_addr = dma_addr;
- req->index = i;
- list_add_tail(&req->list, &target->free_reqs);
}
ret = 0;
@@ -860,6 +937,9 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
static void srp_remove_target(struct srp_target_port *target)
{
+ struct srp_rdma_ch *ch;
+ int i;
+
WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
srp_del_scsi_host_attr(target->scsi_host);
@@ -868,11 +948,18 @@ static void srp_remove_target(struct srp_target_port *target)
scsi_remove_host(target->scsi_host);
srp_stop_rport_timers(target->rport);
srp_disconnect_target(target);
- ib_destroy_cm_id(target->cm_id);
- srp_free_target_ib(target);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ srp_free_ch_ib(target, ch);
+ }
cancel_work_sync(&target->tl_err_work);
srp_rport_put(target->rport);
- srp_free_req_data(target);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ srp_free_req_data(target, ch);
+ }
+ kfree(target->ch);
+ target->ch = NULL;
spin_lock(&target->srp_host->target_lock);
list_del(&target->list);
@@ -898,25 +985,25 @@ static void srp_rport_delete(struct srp_rport *rport)
srp_queue_remove_work(target);
}
-static int srp_connect_target(struct srp_target_port *target)
+static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
{
- int retries = 3;
+ struct srp_target_port *target = ch->target;
int ret;
- WARN_ON_ONCE(target->connected);
+ WARN_ON_ONCE(!multich && target->connected);
target->qp_in_error = false;
- ret = srp_lookup_path(target);
+ ret = srp_lookup_path(ch);
if (ret)
return ret;
while (1) {
- init_completion(&target->done);
- ret = srp_send_req(target);
+ init_completion(&ch->done);
+ ret = srp_send_req(ch, multich);
if (ret)
return ret;
- ret = wait_for_completion_interruptible(&target->done);
+ ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
return ret;
@@ -926,13 +1013,13 @@ static int srp_connect_target(struct srp_target_port *target)
* back, or SRP_DLID_REDIRECT if we get a lid/qp
* redirect REJ back.
*/
- switch (target->status) {
+ switch (ch->status) {
case 0:
srp_change_conn_state(target, true);
return 0;
case SRP_PORT_REDIRECT:
- ret = srp_lookup_path(target);
+ ret = srp_lookup_path(ch);
if (ret)
return ret;
break;
@@ -941,27 +1028,18 @@ static int srp_connect_target(struct srp_target_port *target)
break;
case SRP_STALE_CONN:
- /* Our current CM id was stale, and is now in timewait.
- * Try to reconnect with a new one.
- */
- if (!retries-- || srp_new_cm_id(target)) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "giving up on stale connection\n");
- target->status = -ECONNRESET;
- return target->status;
- }
-
shost_printk(KERN_ERR, target->scsi_host, PFX
- "retrying stale connection\n");
- break;
+ "giving up on stale connection\n");
+ ch->status = -ECONNRESET;
+ return ch->status;
default:
- return target->status;
+ return ch->status;
}
}
}
-static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
+static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
@@ -973,13 +1051,14 @@ static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
.ex.invalidate_rkey = rkey,
};
- return ib_post_send(target->qp, &wr, &bad_wr);
+ return ib_post_send(ch->qp, &wr, &bad_wr);
}
static void srp_unmap_data(struct scsi_cmnd *scmnd,
- struct srp_target_port *target,
+ struct srp_rdma_ch *ch,
struct srp_request *req)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
int i, res;
@@ -993,7 +1072,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_fr_desc **pfr;
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
- res = srp_inv_rkey(target, (*pfr)->mr->rkey);
+ res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1003,7 +1082,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
}
}
if (req->nmdesc)
- srp_fr_pool_put(target->fr_pool, req->fr_list,
+ srp_fr_pool_put(ch->fr_pool, req->fr_list,
req->nmdesc);
} else {
struct ib_pool_fmr **pfmr;
@@ -1018,7 +1097,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
/**
* srp_claim_req - Take ownership of the scmnd associated with a request.
- * @target: SRP target port.
+ * @ch: SRP RDMA channel.
* @req: SRP request.
* @sdev: If not NULL, only take ownership for this SCSI device.
* @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
@@ -1027,14 +1106,14 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
* Return value:
* Either NULL or a pointer to the SCSI command the caller became owner of.
*/
-static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
+static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
struct srp_request *req,
struct scsi_device *sdev,
struct scsi_cmnd *scmnd)
{
unsigned long flags;
- spin_lock_irqsave(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
if (req->scmnd &&
(!sdev || req->scmnd->device == sdev) &&
(!scmnd || req->scmnd == scmnd)) {
@@ -1043,40 +1122,37 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
} else {
scmnd = NULL;
}
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_unlock_irqrestore(&ch->lock, flags);
return scmnd;
}
/**
* srp_free_req() - Unmap data and add request to the free request list.
- * @target: SRP target port.
+ * @ch: SRP RDMA channel.
* @req: Request to be freed.
* @scmnd: SCSI command associated with @req.
* @req_lim_delta: Amount to be added to @target->req_lim.
*/
-static void srp_free_req(struct srp_target_port *target,
- struct srp_request *req, struct scsi_cmnd *scmnd,
- s32 req_lim_delta)
+static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
+ struct scsi_cmnd *scmnd, s32 req_lim_delta)
{
unsigned long flags;
- srp_unmap_data(scmnd, target, req);
+ srp_unmap_data(scmnd, ch, req);
- spin_lock_irqsave(&target->lock, flags);
- target->req_lim += req_lim_delta;
- list_add_tail(&req->list, &target->free_reqs);
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
+ ch->req_lim += req_lim_delta;
+ spin_unlock_irqrestore(&ch->lock, flags);
}
-static void srp_finish_req(struct srp_target_port *target,
- struct srp_request *req, struct scsi_device *sdev,
- int result)
+static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
+ struct scsi_device *sdev, int result)
{
- struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
+ struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
if (scmnd) {
- srp_free_req(target, req, scmnd, 0);
+ srp_free_req(ch, req, scmnd, 0);
scmnd->result = result;
scmnd->scsi_done(scmnd);
}
@@ -1085,9 +1161,10 @@ static void srp_finish_req(struct srp_target_port *target,
static void srp_terminate_io(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
+ struct srp_rdma_ch *ch;
struct Scsi_Host *shost = target->scsi_host;
struct scsi_device *sdev;
- int i;
+ int i, j;
/*
* Invoking srp_terminate_io() while srp_queuecommand() is running
@@ -1096,9 +1173,15 @@ static void srp_terminate_io(struct srp_rport *rport)
shost_for_each_device(sdev, shost)
WARN_ON_ONCE(sdev->request_queue->request_fn_active);
- for (i = 0; i < target->req_ring_size; ++i) {
- struct srp_request *req = &target->req_ring[i];
- srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+
+ for (j = 0; j < target->req_ring_size; ++j) {
+ struct srp_request *req = &ch->req_ring[j];
+
+ srp_finish_req(ch, req, NULL,
+ DID_TRANSPORT_FAILFAST << 16);
+ }
}
}
@@ -1114,34 +1197,61 @@ static void srp_terminate_io(struct srp_rport *rport)
static int srp_rport_reconnect(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
- int i, ret;
+ struct srp_rdma_ch *ch;
+ int i, j, ret = 0;
+ bool multich = false;
srp_disconnect_target(target);
+
+ if (target->state == SRP_TARGET_SCANNING)
+ return -ENODEV;
+
/*
* Now get a new local CM ID so that we avoid confusing the target in
* case things are really fouled up. Doing so also ensures that all CM
* callbacks will have finished before a new QP is allocated.
*/
- ret = srp_new_cm_id(target);
-
- for (i = 0; i < target->req_ring_size; ++i) {
- struct srp_request *req = &target->req_ring[i];
- srp_finish_req(target, req, NULL, DID_RESET << 16);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ if (!ch->target)
+ break;
+ ret += srp_new_cm_id(ch);
}
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ if (!ch->target)
+ break;
+ for (j = 0; j < target->req_ring_size; ++j) {
+ struct srp_request *req = &ch->req_ring[j];
- /*
- * Whether or not creating a new CM ID succeeded, create a new
- * QP. This guarantees that all callback functions for the old QP have
- * finished before any send requests are posted on the new QP.
- */
- ret += srp_create_target_ib(target);
-
- INIT_LIST_HEAD(&target->free_tx);
- for (i = 0; i < target->queue_size; ++i)
- list_add(&target->tx_ring[i]->list, &target->free_tx);
+ srp_finish_req(ch, req, NULL, DID_RESET << 16);
+ }
+ }
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ if (!ch->target)
+ break;
+ /*
+ * Whether or not creating a new CM ID succeeded, create a new
+ * QP. This guarantees that all completion callback function
+ * invocations have finished before request resetting starts.
+ */
+ ret += srp_create_ch_ib(ch);
- if (ret == 0)
- ret = srp_connect_target(target);
+ INIT_LIST_HEAD(&ch->free_tx);
+ for (j = 0; j < target->queue_size; ++j)
+ list_add(&ch->tx_ring[j]->list, &ch->free_tx);
+ }
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ if (ret || !ch->target) {
+ if (i > 1)
+ ret = 0;
+ break;
+ }
+ ret = srp_connect_ch(ch, multich);
+ multich = true;
+ }
if (ret == 0)
shost_printk(KERN_INFO, target->scsi_host,
@@ -1165,12 +1275,12 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
}
static int srp_map_finish_fmr(struct srp_map_state *state,
- struct srp_target_port *target)
+ struct srp_rdma_ch *ch)
{
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
+ fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
@@ -1184,15 +1294,16 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
}
static int srp_map_finish_fr(struct srp_map_state *state,
- struct srp_target_port *target)
+ struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_send_wr *bad_wr;
struct ib_send_wr wr;
struct srp_fr_desc *desc;
u32 rkey;
- desc = srp_fr_pool_get(target->fr_pool);
+ desc = srp_fr_pool_get(ch->fr_pool);
if (!desc)
return -ENOMEM;
@@ -1221,12 +1332,13 @@ static int srp_map_finish_fr(struct srp_map_state *state,
srp_map_desc(state, state->base_dma_addr, state->dma_len,
desc->mr->rkey);
- return ib_post_send(target->qp, &wr, &bad_wr);
+ return ib_post_send(ch->qp, &wr, &bad_wr);
}
static int srp_finish_mapping(struct srp_map_state *state,
- struct srp_target_port *target)
+ struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
int ret = 0;
if (state->npages == 0)
@@ -1237,8 +1349,8 @@ static int srp_finish_mapping(struct srp_map_state *state,
target->rkey);
else
ret = target->srp_host->srp_dev->use_fast_reg ?
- srp_map_finish_fr(state, target) :
- srp_map_finish_fmr(state, target);
+ srp_map_finish_fr(state, ch) :
+ srp_map_finish_fmr(state, ch);
if (ret == 0) {
state->npages = 0;
@@ -1258,10 +1370,11 @@ static void srp_map_update_start(struct srp_map_state *state,
}
static int srp_map_sg_entry(struct srp_map_state *state,
- struct srp_target_port *target,
+ struct srp_rdma_ch *ch,
struct scatterlist *sg, int sg_index,
bool use_mr)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
@@ -1290,7 +1403,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
*/
if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
dma_len > dev->mr_max_size) {
- ret = srp_finish_mapping(state, target);
+ ret = srp_finish_mapping(state, ch);
if (ret)
return ret;
@@ -1311,7 +1424,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
while (dma_len) {
unsigned offset = dma_addr & ~dev->mr_page_mask;
if (state->npages == dev->max_pages_per_mr || offset != 0) {
- ret = srp_finish_mapping(state, target);
+ ret = srp_finish_mapping(state, ch);
if (ret)
return ret;
@@ -1335,17 +1448,18 @@ static int srp_map_sg_entry(struct srp_map_state *state,
*/
ret = 0;
if (len != dev->mr_page_size) {
- ret = srp_finish_mapping(state, target);
+ ret = srp_finish_mapping(state, ch);
if (!ret)
srp_map_update_start(state, NULL, 0, 0);
}
return ret;
}
-static int srp_map_sg(struct srp_map_state *state,
- struct srp_target_port *target, struct srp_request *req,
- struct scatterlist *scat, int count)
+static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
+ struct srp_request *req, struct scatterlist *scat,
+ int count)
{
+ struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
struct scatterlist *sg;
@@ -1356,14 +1470,14 @@ static int srp_map_sg(struct srp_map_state *state,
state->pages = req->map_page;
if (dev->use_fast_reg) {
state->next_fr = req->fr_list;
- use_mr = !!target->fr_pool;
+ use_mr = !!ch->fr_pool;
} else {
state->next_fmr = req->fmr_list;
- use_mr = !!target->fmr_pool;
+ use_mr = !!ch->fmr_pool;
}
for_each_sg(scat, sg, count, i) {
- if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
+ if (srp_map_sg_entry(state, ch, sg, i, use_mr)) {
/*
* Memory registration failed, so backtrack to the
* first unmapped entry and continue on without using
@@ -1385,7 +1499,7 @@ backtrack:
}
}
- if (use_mr && srp_finish_mapping(state, target))
+ if (use_mr && srp_finish_mapping(state, ch))
goto backtrack;
req->nmdesc = state->nmdesc;
@@ -1393,9 +1507,10 @@ backtrack:
return 0;
}
-static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
+static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
+ struct srp_target_port *target = ch->target;
struct scatterlist *scat;
struct srp_cmd *cmd = req->cmd->buf;
int len, nents, count;
@@ -1457,7 +1572,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state));
- srp_map_sg(&state, target, req, scat, count);
+ srp_map_sg(&state, ch, req, scat, count);
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -1518,20 +1633,20 @@ map_complete:
/*
* Return an IU and possible credit to the free pool
*/
-static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu,
+static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
enum srp_iu_type iu_type)
{
unsigned long flags;
- spin_lock_irqsave(&target->lock, flags);
- list_add(&iu->list, &target->free_tx);
+ spin_lock_irqsave(&ch->lock, flags);
+ list_add(&iu->list, &ch->free_tx);
if (iu_type != SRP_IU_RSP)
- ++target->req_lim;
- spin_unlock_irqrestore(&target->lock, flags);
+ ++ch->req_lim;
+ spin_unlock_irqrestore(&ch->lock, flags);
}
/*
- * Must be called with target->lock held to protect req_lim and free_tx.
+ * Must be called with ch->lock held to protect req_lim and free_tx.
* If IU is not sent, it must be returned using srp_put_tx_iu().
*
* Note:
@@ -1543,35 +1658,36 @@ static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu,
* - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
* one unanswered SRP request to an initiator.
*/
-static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,
+static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
enum srp_iu_type iu_type)
{
+ struct srp_target_port *target = ch->target;
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
- srp_send_completion(target->send_cq, target);
+ srp_send_completion(ch->send_cq, ch);
- if (list_empty(&target->free_tx))
+ if (list_empty(&ch->free_tx))
return NULL;
/* Initiator responses to target requests do not consume credits */
if (iu_type != SRP_IU_RSP) {
- if (target->req_lim <= rsv) {
+ if (ch->req_lim <= rsv) {
++target->zero_req_lim;
return NULL;
}
- --target->req_lim;
+ --ch->req_lim;
}
- iu = list_first_entry(&target->free_tx, struct srp_iu, list);
+ iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
list_del(&iu->list);
return iu;
}
-static int srp_post_send(struct srp_target_port *target,
- struct srp_iu *iu, int len)
+static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
+ struct srp_target_port *target = ch->target;
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@@ -1586,11 +1702,12 @@ static int srp_post_send(struct srp_target_port *target,
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
- return ib_post_send(target->qp, &wr, &bad_wr);
+ return ib_post_send(ch->qp, &wr, &bad_wr);
}
-static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu)
+static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
{
+ struct srp_target_port *target = ch->target;
struct ib_recv_wr wr, *bad_wr;
struct ib_sge list;
@@ -1603,35 +1720,39 @@ static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu)
wr.sg_list = &list;
wr.num_sge = 1;
- return ib_post_recv(target->qp, &wr, &bad_wr);
+ return ib_post_recv(ch->qp, &wr, &bad_wr);
}
-static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
+static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
{
+ struct srp_target_port *target = ch->target;
struct srp_request *req;
struct scsi_cmnd *scmnd;
unsigned long flags;
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
- spin_lock_irqsave(&target->lock, flags);
- target->req_lim += be32_to_cpu(rsp->req_lim_delta);
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
+ ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ spin_unlock_irqrestore(&ch->lock, flags);
- target->tsk_mgmt_status = -1;
+ ch->tsk_mgmt_status = -1;
if (be32_to_cpu(rsp->resp_data_len) >= 4)
- target->tsk_mgmt_status = rsp->data[3];
- complete(&target->tsk_mgmt_done);
+ ch->tsk_mgmt_status = rsp->data[3];
+ complete(&ch->tsk_mgmt_done);
} else {
- req = &target->req_ring[rsp->tag];
- scmnd = srp_claim_req(target, req, NULL, NULL);
+ scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
+ if (scmnd) {
+ req = (void *)scmnd->host_scribble;
+ scmnd = srp_claim_req(ch, req, NULL, scmnd);
+ }
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
- "Null scmnd for RSP w/tag %016llx\n",
- (unsigned long long) rsp->tag);
+ "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
+ rsp->tag, ch - target->ch, ch->qp->qp_num);
- spin_lock_irqsave(&target->lock, flags);
- target->req_lim += be32_to_cpu(rsp->req_lim_delta);
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
+ ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ spin_unlock_irqrestore(&ch->lock, flags);
return;
}
@@ -1653,7 +1774,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
- srp_free_req(target, req, scmnd,
+ srp_free_req(ch, req, scmnd,
be32_to_cpu(rsp->req_lim_delta));
scmnd->host_scribble = NULL;
@@ -1661,18 +1782,19 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
}
}
-static int srp_response_common(struct srp_target_port *target, s32 req_delta,
+static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
void *rsp, int len)
{
+ struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
unsigned long flags;
struct srp_iu *iu;
int err;
- spin_lock_irqsave(&target->lock, flags);
- target->req_lim += req_delta;
- iu = __srp_get_tx_iu(target, SRP_IU_RSP);
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
+ ch->req_lim += req_delta;
+ iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
+ spin_unlock_irqrestore(&ch->lock, flags);
if (!iu) {
shost_printk(KERN_ERR, target->scsi_host, PFX
@@ -1684,17 +1806,17 @@ static int srp_response_common(struct srp_target_port *target, s32 req_delta,
memcpy(iu->buf, rsp, len);
ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
- err = srp_post_send(target, iu, len);
+ err = srp_post_send(ch, iu, len);
if (err) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"unable to post response: %d\n", err);
- srp_put_tx_iu(target, iu, SRP_IU_RSP);
+ srp_put_tx_iu(ch, iu, SRP_IU_RSP);
}
return err;
}
-static void srp_process_cred_req(struct srp_target_port *target,
+static void srp_process_cred_req(struct srp_rdma_ch *ch,
struct srp_cred_req *req)
{
struct srp_cred_rsp rsp = {
@@ -1703,14 +1825,15 @@ static void srp_process_cred_req(struct srp_target_port *target,
};
s32 delta = be32_to_cpu(req->req_lim_delta);
- if (srp_response_common(target, delta, &rsp, sizeof rsp))
- shost_printk(KERN_ERR, target->scsi_host, PFX
+ if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
+ shost_printk(KERN_ERR, ch->target->scsi_host, PFX
"problems processing SRP_CRED_REQ\n");
}
-static void srp_process_aer_req(struct srp_target_port *target,
+static void srp_process_aer_req(struct srp_rdma_ch *ch,
struct srp_aer_req *req)
{
+ struct srp_target_port *target = ch->target;
struct srp_aer_rsp rsp = {
.opcode = SRP_AER_RSP,
.tag = req->tag,
@@ -1720,19 +1843,20 @@ static void srp_process_aer_req(struct srp_target_port *target,
shost_printk(KERN_ERR, target->scsi_host, PFX
"ignoring AER for LUN %llu\n", be64_to_cpu(req->lun));
- if (srp_response_common(target, delta, &rsp, sizeof rsp))
+ if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
shost_printk(KERN_ERR, target->scsi_host, PFX
"problems processing SRP_AER_REQ\n");
}
-static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
+static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
{
+ struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
int res;
u8 opcode;
- ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,
+ ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
opcode = *(u8 *) iu->buf;
@@ -1746,15 +1870,15 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
switch (opcode) {
case SRP_RSP:
- srp_process_rsp(target, iu->buf);
+ srp_process_rsp(ch, iu->buf);
break;
case SRP_CRED_REQ:
- srp_process_cred_req(target, iu->buf);
+ srp_process_cred_req(ch, iu->buf);
break;
case SRP_AER_REQ:
- srp_process_aer_req(target, iu->buf);
+ srp_process_aer_req(ch, iu->buf);
break;
case SRP_T_LOGOUT:
@@ -1769,10 +1893,10 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
break;
}
- ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len,
+ ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
- res = srp_post_recv(target, iu);
+ res = srp_post_recv(ch, iu);
if (res != 0)
shost_printk(KERN_ERR, target->scsi_host,
PFX "Recv failed with error code %d\n", res);
@@ -1795,8 +1919,15 @@ static void srp_tl_err_work(struct work_struct *work)
}
static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
- bool send_err, struct srp_target_port *target)
+ bool send_err, struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
+
+ if (wr_id == SRP_LAST_WR_ID) {
+ complete(&ch->done);
+ return;
+ }
+
if (target->connected && !target->qp_in_error) {
if (wr_id & LOCAL_INV_WR_ID_MASK) {
shost_printk(KERN_ERR, target->scsi_host, PFX
@@ -1817,33 +1948,33 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
target->qp_in_error = true;
}
-static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
+static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
{
- struct srp_target_port *target = target_ptr;
+ struct srp_rdma_ch *ch = ch_ptr;
struct ib_wc wc;
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (likely(wc.status == IB_WC_SUCCESS)) {
- srp_handle_recv(target, &wc);
+ srp_handle_recv(ch, &wc);
} else {
- srp_handle_qp_err(wc.wr_id, wc.status, false, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
}
}
}
-static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
+static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
{
- struct srp_target_port *target = target_ptr;
+ struct srp_rdma_ch *ch = ch_ptr;
struct ib_wc wc;
struct srp_iu *iu;
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (likely(wc.status == IB_WC_SUCCESS)) {
iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
- list_add(&iu->list, &target->free_tx);
+ list_add(&iu->list, &ch->free_tx);
} else {
- srp_handle_qp_err(wc.wr_id, wc.status, true, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
}
}
}
@@ -1852,11 +1983,14 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
struct srp_rport *rport = target->rport;
+ struct srp_rdma_ch *ch;
struct srp_request *req;
struct srp_iu *iu;
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
+ u32 tag;
+ u16 idx;
int len, ret;
const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
@@ -1873,15 +2007,22 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
if (unlikely(scmnd->result))
goto err;
- spin_lock_irqsave(&target->lock, flags);
- iu = __srp_get_tx_iu(target, SRP_IU_CMD);
- if (!iu)
- goto err_unlock;
+ WARN_ON_ONCE(scmnd->request->tag < 0);
+ tag = blk_mq_unique_tag(scmnd->request);
+ ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
+ idx = blk_mq_unique_tag_to_tag(tag);
+ WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
+ dev_name(&shost->shost_gendev), tag, idx,
+ target->req_ring_size);
- req = list_first_entry(&target->free_reqs, struct srp_request, list);
- list_del(&req->list);
- spin_unlock_irqrestore(&target->lock, flags);
+ spin_lock_irqsave(&ch->lock, flags);
+ iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
+ spin_unlock_irqrestore(&ch->lock, flags);
+ if (!iu)
+ goto err;
+
+ req = &ch->req_ring[idx];
dev = target->srp_host->srp_dev->dev;
ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
@@ -1893,13 +2034,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
cmd->opcode = SRP_CMD;
cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48);
- cmd->tag = req->index;
+ cmd->tag = tag;
memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
req->scmnd = scmnd;
req->cmd = iu;
- len = srp_map_data(scmnd, target, req);
+ len = srp_map_data(scmnd, ch, req);
if (len < 0) {
shost_printk(KERN_ERR, target->scsi_host,
PFX "Failed to map data (%d)\n", len);
@@ -1917,7 +2058,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
- if (srp_post_send(target, iu, len)) {
+ if (srp_post_send(ch, iu, len)) {
shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
goto err_unmap;
}
@@ -1931,10 +2072,10 @@ unlock_rport:
return ret;
err_unmap:
- srp_unmap_data(scmnd, target, req);
+ srp_unmap_data(scmnd, ch, req);
err_iu:
- srp_put_tx_iu(target, iu, SRP_IU_CMD);
+ srp_put_tx_iu(ch, iu, SRP_IU_CMD);
/*
* Avoid that the loops that iterate over the request ring can
@@ -1942,12 +2083,6 @@ err_iu:
*/
req->scmnd = NULL;
- spin_lock_irqsave(&target->lock, flags);
- list_add(&req->list, &target->free_reqs);
-
-err_unlock:
- spin_unlock_irqrestore(&target->lock, flags);
-
err:
if (scmnd->result) {
scmnd->scsi_done(scmnd);
@@ -1961,53 +2096,54 @@ err:
/*
* Note: the resources allocated in this function are freed in
- * srp_free_target_ib().
+ * srp_free_ch_ib().
*/
-static int srp_alloc_iu_bufs(struct srp_target_port *target)
+static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
int i;
- target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
- GFP_KERNEL);
- if (!target->rx_ring)
+ ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
+ GFP_KERNEL);
+ if (!ch->rx_ring)
goto err_no_ring;
- target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
- GFP_KERNEL);
- if (!target->tx_ring)
+ ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
+ GFP_KERNEL);
+ if (!ch->tx_ring)
goto err_no_ring;
for (i = 0; i < target->queue_size; ++i) {
- target->rx_ring[i] = srp_alloc_iu(target->srp_host,
- target->max_ti_iu_len,
- GFP_KERNEL, DMA_FROM_DEVICE);
- if (!target->rx_ring[i])
+ ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
+ ch->max_ti_iu_len,
+ GFP_KERNEL, DMA_FROM_DEVICE);
+ if (!ch->rx_ring[i])
goto err;
}
for (i = 0; i < target->queue_size; ++i) {
- target->tx_ring[i] = srp_alloc_iu(target->srp_host,
- target->max_iu_len,
- GFP_KERNEL, DMA_TO_DEVICE);
- if (!target->tx_ring[i])
+ ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
+ target->max_iu_len,
+ GFP_KERNEL, DMA_TO_DEVICE);
+ if (!ch->tx_ring[i])
goto err;
- list_add(&target->tx_ring[i]->list, &target->free_tx);
+ list_add(&ch->tx_ring[i]->list, &ch->free_tx);
}
return 0;
err:
for (i = 0; i < target->queue_size; ++i) {
- srp_free_iu(target->srp_host, target->rx_ring[i]);
- srp_free_iu(target->srp_host, target->tx_ring[i]);
+ srp_free_iu(target->srp_host, ch->rx_ring[i]);
+ srp_free_iu(target->srp_host, ch->tx_ring[i]);
}
err_no_ring:
- kfree(target->tx_ring);
- target->tx_ring = NULL;
- kfree(target->rx_ring);
- target->rx_ring = NULL;
+ kfree(ch->tx_ring);
+ ch->tx_ring = NULL;
+ kfree(ch->rx_ring);
+ ch->rx_ring = NULL;
return -ENOMEM;
}
@@ -2041,23 +2177,24 @@ static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
struct srp_login_rsp *lrsp,
- struct srp_target_port *target)
+ struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct ib_qp_attr *qp_attr = NULL;
int attr_mask = 0;
int ret;
int i;
if (lrsp->opcode == SRP_LOGIN_RSP) {
- target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
- target->req_lim = be32_to_cpu(lrsp->req_lim_delta);
+ ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
+ ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
/*
* Reserve credits for task management so we don't
* bounce requests back to the SCSI mid-layer.
*/
target->scsi_host->can_queue
- = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
+ = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
target->scsi_host->can_queue);
target->scsi_host->cmd_per_lun
= min_t(int, target->scsi_host->can_queue,
@@ -2069,8 +2206,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
goto error;
}
- if (!target->rx_ring) {
- ret = srp_alloc_iu_bufs(target);
+ if (!ch->rx_ring) {
+ ret = srp_alloc_iu_bufs(ch);
if (ret)
goto error;
}
@@ -2085,13 +2222,14 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (ret)
goto error_free;
- ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
if (ret)
goto error_free;
for (i = 0; i < target->queue_size; i++) {
- struct srp_iu *iu = target->rx_ring[i];
- ret = srp_post_recv(target, iu);
+ struct srp_iu *iu = ch->rx_ring[i];
+
+ ret = srp_post_recv(ch, iu);
if (ret)
goto error_free;
}
@@ -2103,7 +2241,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
- ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
if (ret)
goto error_free;
@@ -2113,13 +2251,14 @@ error_free:
kfree(qp_attr);
error:
- target->status = ret;
+ ch->status = ret;
}
static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event,
- struct srp_target_port *target)
+ struct srp_rdma_ch *ch)
{
+ struct srp_target_port *target = ch->target;
struct Scsi_Host *shost = target->scsi_host;
struct ib_class_port_info *cpi;
int opcode;
@@ -2127,12 +2266,12 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
switch (event->param.rej_rcvd.reason) {
case IB_CM_REJ_PORT_CM_REDIRECT:
cpi = event->param.rej_rcvd.ari;
- target->path.dlid = cpi->redirect_lid;
- target->path.pkey = cpi->redirect_pkey;
+ ch->path.dlid = cpi->redirect_lid;
+ ch->path.pkey = cpi->redirect_pkey;
cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
- memcpy(target->path.dgid.raw, cpi->redirect_gid, 16);
+ memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
- target->status = target->path.dlid ?
+ ch->status = ch->path.dlid ?
SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
break;
@@ -2143,26 +2282,26 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
* reject reason code 25 when they mean 24
* (port redirect).
*/
- memcpy(target->path.dgid.raw,
+ memcpy(ch->path.dgid.raw,
event->param.rej_rcvd.ari, 16);
shost_printk(KERN_DEBUG, shost,
PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
- (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix),
- (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id));
+ be64_to_cpu(ch->path.dgid.global.subnet_prefix),
+ be64_to_cpu(ch->path.dgid.global.interface_id));
- target->status = SRP_PORT_REDIRECT;
+ ch->status = SRP_PORT_REDIRECT;
} else {
shost_printk(KERN_WARNING, shost,
" REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
- target->status = -ECONNRESET;
+ ch->status = -ECONNRESET;
}
break;
case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
shost_printk(KERN_WARNING, shost,
" REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
- target->status = -ECONNRESET;
+ ch->status = -ECONNRESET;
break;
case IB_CM_REJ_CONSUMER_DEFINED:
@@ -2177,30 +2316,31 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
else
shost_printk(KERN_WARNING, shost, PFX
"SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
- target->path.sgid.raw,
- target->orig_dgid, reason);
+ target->sgid.raw,
+ target->orig_dgid.raw, reason);
} else
shost_printk(KERN_WARNING, shost,
" REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
" opcode 0x%02x\n", opcode);
- target->status = -ECONNRESET;
+ ch->status = -ECONNRESET;
break;
case IB_CM_REJ_STALE_CONN:
shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
- target->status = SRP_STALE_CONN;
+ ch->status = SRP_STALE_CONN;
break;
default:
shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
event->param.rej_rcvd.reason);
- target->status = -ECONNRESET;
+ ch->status = -ECONNRESET;
}
}
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
- struct srp_target_port *target = cm_id->context;
+ struct srp_rdma_ch *ch = cm_id->context;
+ struct srp_target_port *target = ch->target;
int comp = 0;
switch (event->event) {
@@ -2208,19 +2348,19 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
shost_printk(KERN_DEBUG, target->scsi_host,
PFX "Sending CM REQ failed\n");
comp = 1;
- target->status = -ECONNRESET;
+ ch->status = -ECONNRESET;
break;
case IB_CM_REP_RECEIVED:
comp = 1;
- srp_cm_rep_handler(cm_id, event->private_data, target);
+ srp_cm_rep_handler(cm_id, event->private_data, ch);
break;
case IB_CM_REJ_RECEIVED:
shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
comp = 1;
- srp_cm_rej_handler(cm_id, event, target);
+ srp_cm_rej_handler(cm_id, event, ch);
break;
case IB_CM_DREQ_RECEIVED:
@@ -2238,7 +2378,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
PFX "connection closed\n");
comp = 1;
- target->status = 0;
+ ch->status = 0;
break;
case IB_CM_MRA_RECEIVED:
@@ -2253,65 +2393,30 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
}
if (comp)
- complete(&target->done);
+ complete(&ch->done);
return 0;
}
/**
- * srp_change_queue_type - changing device queue tag type
- * @sdev: scsi device struct
- * @tag_type: requested tag type
- *
- * Returns queue tag type.
- */
-static int
-srp_change_queue_type(struct scsi_device *sdev, int tag_type)
-{
- if (sdev->tagged_supported) {
- scsi_set_tag_type(sdev, tag_type);
- if (tag_type)
- scsi_activate_tcq(sdev, sdev->queue_depth);
- else
- scsi_deactivate_tcq(sdev, sdev->queue_depth);
- } else
- tag_type = 0;
-
- return tag_type;
-}
-
-/**
* srp_change_queue_depth - setting device queue depth
* @sdev: scsi device struct
* @qdepth: requested queue depth
- * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
- * (see include/scsi/scsi_host.h for definition)
*
* Returns queue depth.
*/
static int
-srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
{
- struct Scsi_Host *shost = sdev->host;
- int max_depth;
- if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
- max_depth = shost->can_queue;
- if (!sdev->tagged_supported)
- max_depth = 1;
- if (qdepth > max_depth)
- qdepth = max_depth;
- scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
- } else if (reason == SCSI_QDEPTH_QFULL)
- scsi_track_queue_full(sdev, qdepth);
- else
- return -EOPNOTSUPP;
-
- return sdev->queue_depth;
+ if (!sdev->tagged_supported)
+ qdepth = 1;
+ return scsi_change_queue_depth(sdev, qdepth);
}
-static int srp_send_tsk_mgmt(struct srp_target_port *target,
- u64 req_tag, unsigned int lun, u8 func)
+static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag,
+ unsigned int lun, u8 func)
{
+ struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
@@ -2320,16 +2425,16 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
if (!target->connected || target->qp_in_error)
return -1;
- init_completion(&target->tsk_mgmt_done);
+ init_completion(&ch->tsk_mgmt_done);
/*
- * Lock the rport mutex to avoid that srp_create_target_ib() is
+ * Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent.
*/
mutex_lock(&rport->mutex);
- spin_lock_irq(&target->lock);
- iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
- spin_unlock_irq(&target->lock);
+ spin_lock_irq(&ch->lock);
+ iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
+ spin_unlock_irq(&ch->lock);
if (!iu) {
mutex_unlock(&rport->mutex);
@@ -2350,15 +2455,15 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
- if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
- srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
+ if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
+ srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
mutex_unlock(&rport->mutex);
return -1;
}
mutex_unlock(&rport->mutex);
- if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
+ if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
return -1;
@@ -2369,20 +2474,32 @@ static int srp_abort(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
+ u32 tag;
+ u16 ch_idx;
+ struct srp_rdma_ch *ch;
int ret;
shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
- if (!req || !srp_claim_req(target, req, NULL, scmnd))
+ if (!req)
+ return SUCCESS;
+ tag = blk_mq_unique_tag(scmnd->request);
+ ch_idx = blk_mq_unique_tag_to_hwq(tag);
+ if (WARN_ON_ONCE(ch_idx >= target->ch_count))
return SUCCESS;
- if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
+ ch = &target->ch[ch_idx];
+ if (!srp_claim_req(ch, req, NULL, scmnd))
+ return SUCCESS;
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Sending SRP abort for tag %#x\n", tag);
+ if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
SRP_TSK_ABORT_TASK) == 0)
ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
else
ret = FAILED;
- srp_free_req(target, req, scmnd, 0);
+ srp_free_req(ch, req, scmnd, 0);
scmnd->result = DID_ABORT << 16;
scmnd->scsi_done(scmnd);
@@ -2392,19 +2509,25 @@ static int srp_abort(struct scsi_cmnd *scmnd)
static int srp_reset_device(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_rdma_ch *ch;
int i;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
- if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun,
+ ch = &target->ch[0];
+ if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
SRP_TSK_LUN_RESET))
return FAILED;
- if (target->tsk_mgmt_status)
+ if (ch->tsk_mgmt_status)
return FAILED;
- for (i = 0; i < target->req_ring_size; ++i) {
- struct srp_request *req = &target->req_ring[i];
- srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ for (i = 0; i < target->req_ring_size; ++i) {
+ struct srp_request *req = &ch->req_ring[i];
+
+ srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
+ }
}
return SUCCESS;
@@ -2466,7 +2589,7 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
+ return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
}
static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
@@ -2474,15 +2597,16 @@ static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%pI6\n", target->path.sgid.raw);
+ return sprintf(buf, "%pI6\n", target->sgid.raw);
}
static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
+ struct srp_rdma_ch *ch = &target->ch[0];
- return sprintf(buf, "%pI6\n", target->path.dgid.raw);
+ return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
}
static ssize_t show_orig_dgid(struct device *dev,
@@ -2490,15 +2614,21 @@ static ssize_t show_orig_dgid(struct device *dev,
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
- return sprintf(buf, "%pI6\n", target->orig_dgid);
+ return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
}
static ssize_t show_req_lim(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct srp_target_port *target = host_to_target(class_to_shost(dev));
+ struct srp_rdma_ch *ch;
+ int i, req_lim = INT_MAX;
- return sprintf(buf, "%d\n", target->req_lim);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ req_lim = min(req_lim, ch->req_lim);
+ }
+ return sprintf(buf, "%d\n", req_lim);
}
static ssize_t show_zero_req_lim(struct device *dev,
@@ -2525,6 +2655,14 @@ static ssize_t show_local_ib_device(struct device *dev,
return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
}
+static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+ return sprintf(buf, "%d\n", target->ch_count);
+}
+
static ssize_t show_comp_vector(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -2568,6 +2706,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
+static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
@@ -2585,6 +2724,7 @@ static struct device_attribute *srp_host_attrs[] = {
&dev_attr_zero_req_lim,
&dev_attr_local_ib_port,
&dev_attr_local_ib_device,
+ &dev_attr_ch_count,
&dev_attr_comp_vector,
&dev_attr_tl_retry_count,
&dev_attr_cmd_sg_entries,
@@ -2600,7 +2740,7 @@ static struct scsi_host_template srp_template = {
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.change_queue_depth = srp_change_queue_depth,
- .change_queue_type = srp_change_queue_type,
+ .change_queue_type = scsi_change_queue_type,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
@@ -2610,14 +2750,28 @@ static struct scsi_host_template srp_template = {
.this_id = -1,
.cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
.use_clustering = ENABLE_CLUSTERING,
- .shost_attrs = srp_host_attrs
+ .shost_attrs = srp_host_attrs,
+ .use_blk_tags = 1,
+ .track_queue_depth = 1,
};
+static int srp_sdev_count(struct Scsi_Host *host)
+{
+ struct scsi_device *sdev;
+ int c = 0;
+
+ shost_for_each_device(sdev, host)
+ c++;
+
+ return c;
+}
+
static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
{
struct srp_rport_identifiers ids;
struct srp_rport *rport;
+ target->state = SRP_TARGET_SCANNING;
sprintf(target->target_name, "SRP.T10:%016llX",
(unsigned long long) be64_to_cpu(target->id_ext));
@@ -2640,11 +2794,26 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
list_add_tail(&target->list, &host->target_list);
spin_unlock(&host->target_lock);
- target->state = SRP_TARGET_LIVE;
-
scsi_scan_target(&target->scsi_host->shost_gendev,
0, target->scsi_id, SCAN_WILD_CARD, 0);
+ if (!target->connected || target->qp_in_error) {
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "SCSI scan failed - removing SCSI host\n");
+ srp_queue_remove_work(target);
+ goto out;
+ }
+
+ pr_debug(PFX "%s: SCSI scan succeeded - detected %d LUNs\n",
+ dev_name(&target->scsi_host->shost_gendev),
+ srp_sdev_count(target->scsi_host));
+
+ spin_lock_irq(&target->lock);
+ if (target->state == SRP_TARGET_SCANNING)
+ target->state = SRP_TARGET_LIVE;
+ spin_unlock_irq(&target->lock);
+
+out:
return 0;
}
@@ -2760,7 +2929,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
return -ENOMEM;
sep_opt = options;
- while ((p = strsep(&sep_opt, ",")) != NULL) {
+ while ((p = strsep(&sep_opt, ",\n")) != NULL) {
if (!*p)
continue;
@@ -2801,11 +2970,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
}
for (i = 0; i < 16; ++i) {
- strlcpy(dgid, p + i * 2, 3);
- target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
+ strlcpy(dgid, p + i * 2, sizeof(dgid));
+ if (sscanf(dgid, "%hhx",
+ &target->orig_dgid.raw[i]) < 1) {
+ ret = -EINVAL;
+ kfree(p);
+ goto out;
+ }
}
kfree(p);
- memcpy(target->orig_dgid, target->path.dgid.raw, 16);
break;
case SRP_OPT_PKEY:
@@ -2813,7 +2986,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
pr_warn("bad P_Key parameter '%s'\n", p);
goto out;
}
- target->path.pkey = cpu_to_be16(token);
+ target->pkey = cpu_to_be16(token);
break;
case SRP_OPT_SERVICE_ID:
@@ -2823,7 +2996,6 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
goto out;
}
target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
- target->path.service_id = target->service_id;
kfree(p);
break;
@@ -2960,9 +3132,11 @@ static ssize_t srp_create_target(struct device *dev,
container_of(dev, struct srp_host, dev);
struct Scsi_Host *target_host;
struct srp_target_port *target;
+ struct srp_rdma_ch *ch;
struct srp_device *srp_dev = host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
- int ret;
+ int ret, node_idx, node, cpu, i;
+ bool multich = false;
target_host = scsi_host_alloc(&srp_template,
sizeof (struct srp_target_port));
@@ -2988,12 +3162,22 @@ static ssize_t srp_create_target(struct device *dev,
target->tl_retry_count = 7;
target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
+ /*
+ * Avoid that the SCSI host can be removed by srp_remove_target()
+ * before this function returns.
+ */
+ scsi_host_get(target->scsi_host);
+
mutex_lock(&host->add_target_mutex);
ret = srp_parse_options(buf, target);
if (ret)
goto err;
+ ret = scsi_init_shared_tag_map(target_host, target_host->can_queue);
+ if (ret)
+ goto err;
+
target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
if (!srp_conn_unique(target->srp_host, target)) {
@@ -3022,59 +3206,115 @@ static ssize_t srp_create_target(struct device *dev,
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
- INIT_LIST_HEAD(&target->free_tx);
- ret = srp_alloc_req_data(target);
+ ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
if (ret)
- goto err_free_mem;
+ goto err;
- ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
- if (ret)
- goto err_free_mem;
+ ret = -ENOMEM;
+ target->ch_count = max_t(unsigned, num_online_nodes(),
+ min(ch_count ? :
+ min(4 * num_online_nodes(),
+ ibdev->num_comp_vectors),
+ num_online_cpus()));
+ target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
+ GFP_KERNEL);
+ if (!target->ch)
+ goto err;
- ret = srp_create_target_ib(target);
- if (ret)
- goto err_free_mem;
+ node_idx = 0;
+ for_each_online_node(node) {
+ const int ch_start = (node_idx * target->ch_count /
+ num_online_nodes());
+ const int ch_end = ((node_idx + 1) * target->ch_count /
+ num_online_nodes());
+ const int cv_start = (node_idx * ibdev->num_comp_vectors /
+ num_online_nodes() + target->comp_vector)
+ % ibdev->num_comp_vectors;
+ const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
+ num_online_nodes() + target->comp_vector)
+ % ibdev->num_comp_vectors;
+ int cpu_idx = 0;
+
+ for_each_online_cpu(cpu) {
+ if (cpu_to_node(cpu) != node)
+ continue;
+ if (ch_start + cpu_idx >= ch_end)
+ continue;
+ ch = &target->ch[ch_start + cpu_idx];
+ ch->target = target;
+ ch->comp_vector = cv_start == cv_end ? cv_start :
+ cv_start + cpu_idx % (cv_end - cv_start);
+ spin_lock_init(&ch->lock);
+ INIT_LIST_HEAD(&ch->free_tx);
+ ret = srp_new_cm_id(ch);
+ if (ret)
+ goto err_disconnect;
- ret = srp_new_cm_id(target);
- if (ret)
- goto err_free_ib;
+ ret = srp_create_ch_ib(ch);
+ if (ret)
+ goto err_disconnect;
- ret = srp_connect_target(target);
- if (ret) {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection failed\n");
- goto err_cm_id;
+ ret = srp_alloc_req_data(ch);
+ if (ret)
+ goto err_disconnect;
+
+ ret = srp_connect_ch(ch, multich);
+ if (ret) {
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "Connection %d/%d failed\n",
+ ch_start + cpu_idx,
+ target->ch_count);
+ if (node_idx == 0 && cpu_idx == 0) {
+ goto err_disconnect;
+ } else {
+ srp_free_ch_ib(target, ch);
+ srp_free_req_data(target, ch);
+ target->ch_count = ch - target->ch;
+ break;
+ }
+ }
+
+ multich = true;
+ cpu_idx++;
+ }
+ node_idx++;
}
+ target->scsi_host->nr_hw_queues = target->ch_count;
+
ret = srp_add_target(host, target);
if (ret)
goto err_disconnect;
- shost_printk(KERN_DEBUG, target->scsi_host, PFX
- "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
- be64_to_cpu(target->id_ext),
- be64_to_cpu(target->ioc_guid),
- be16_to_cpu(target->path.pkey),
- be64_to_cpu(target->service_id),
- target->path.sgid.raw, target->path.dgid.raw);
+ if (target->state != SRP_TARGET_REMOVED) {
+ shost_printk(KERN_DEBUG, target->scsi_host, PFX
+ "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ be16_to_cpu(target->pkey),
+ be64_to_cpu(target->service_id),
+ target->sgid.raw, target->orig_dgid.raw);
+ }
ret = count;
out:
mutex_unlock(&host->add_target_mutex);
+
+ scsi_host_put(target->scsi_host);
+
return ret;
err_disconnect:
srp_disconnect_target(target);
-err_cm_id:
- ib_destroy_cm_id(target->cm_id);
-
-err_free_ib:
- srp_free_target_ib(target);
+ for (i = 0; i < target->ch_count; i++) {
+ ch = &target->ch[i];
+ srp_free_ch_ib(target, ch);
+ srp_free_req_data(target, ch);
+ }
-err_free_mem:
- srp_free_req_data(target);
+ kfree(target->ch);
err:
scsi_host_put(target_host);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e46ecb15aa0d..a611556406ac 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -70,9 +70,12 @@ enum {
LOCAL_INV_WR_ID_MASK = 1,
FAST_REG_WR_ID_MASK = 2,
+
+ SRP_LAST_WR_ID = 0xfffffffcU,
};
enum srp_target_state {
+ SRP_TARGET_SCANNING,
SRP_TARGET_LIVE,
SRP_TARGET_REMOVED,
};
@@ -115,7 +118,6 @@ struct srp_host {
};
struct srp_request {
- struct list_head list;
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
union {
@@ -126,24 +128,62 @@ struct srp_request {
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
short nmdesc;
- short index;
};
-struct srp_target_port {
+/**
+ * struct srp_rdma_ch
+ * @comp_vector: Completion vector used by this RDMA channel.
+ */
+struct srp_rdma_ch {
/* These are RW in the hot path, and commonly used together */
struct list_head free_tx;
- struct list_head free_reqs;
spinlock_t lock;
s32 req_lim;
/* These are read-only in the hot path */
- struct ib_cq *send_cq ____cacheline_aligned_in_smp;
+ struct srp_target_port *target ____cacheline_aligned_in_smp;
+ struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_qp *qp;
union {
struct ib_fmr_pool *fmr_pool;
struct srp_fr_pool *fr_pool;
};
+
+ /* Everything above this point is used in the hot path of
+ * command processing. Try to keep them packed into cachelines.
+ */
+
+ struct completion done;
+ int status;
+
+ struct ib_sa_path_rec path;
+ struct ib_sa_query *path_query;
+ int path_query_id;
+
+ struct ib_cm_id *cm_id;
+ struct srp_iu **tx_ring;
+ struct srp_iu **rx_ring;
+ struct srp_request *req_ring;
+ int max_ti_iu_len;
+ int comp_vector;
+
+ struct completion tsk_mgmt_done;
+ u8 tsk_mgmt_status;
+};
+
+/**
+ * struct srp_target_port
+ * @comp_vector: Completion vector used by the first RDMA channel created for
+ * this target port.
+ */
+struct srp_target_port {
+ /* read and written in the hot path */
+ spinlock_t lock;
+
+ /* read only in the hot path */
+ struct srp_rdma_ch *ch;
+ u32 ch_count;
u32 lkey;
u32 rkey;
enum srp_target_state state;
@@ -152,10 +192,8 @@ struct srp_target_port {
unsigned int indirect_size;
bool allow_ext_sg;
- /* Everything above this point is used in the hot path of
- * command processing. Try to keep them packed into cachelines.
- */
-
+ /* other member variables */
+ union ib_gid sgid;
__be64 id_ext;
__be64 ioc_guid;
__be64 service_id;
@@ -172,34 +210,19 @@ struct srp_target_port {
int comp_vector;
int tl_retry_count;
- struct ib_sa_path_rec path;
- __be16 orig_dgid[8];
- struct ib_sa_query *path_query;
- int path_query_id;
+ union ib_gid orig_dgid;
+ __be16 pkey;
u32 rq_tmo_jiffies;
bool connected;
- struct ib_cm_id *cm_id;
-
- int max_ti_iu_len;
-
int zero_req_lim;
- struct srp_iu **tx_ring;
- struct srp_iu **rx_ring;
- struct srp_request *req_ring;
-
struct work_struct tl_err_work;
struct work_struct remove_work;
struct list_head list;
- struct completion done;
- int status;
bool qp_in_error;
-
- struct completion tsk_mgmt_done;
- u8 tsk_mgmt_status;
};
struct srp_iu {
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7206547c13ce..dc829682701a 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2092,6 +2092,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
if (!qp_init)
goto out;
+retry:
ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
ch->rq_size + srp_sq_size, 0);
if (IS_ERR(ch->cq)) {
@@ -2115,6 +2116,13 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
ch->qp = ib_create_qp(sdev->pd, qp_init);
if (IS_ERR(ch->qp)) {
ret = PTR_ERR(ch->qp);
+ if (ret == -ENOMEM) {
+ srp_sq_size /= 2;
+ if (srp_sq_size >= MIN_SRPT_SQ_SIZE) {
+ ib_destroy_cq(ch->cq);
+ goto retry;
+ }
+ }
printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
goto err_destroy_cq;
}