aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-10-06 17:10:28 +0200
committerIngo Molnar <mingo@kernel.org>2015-10-06 17:10:28 +0200
commit82fc167c392a1700f9adbde639730ee8c8122474 (patch)
tree373ab737a040dd21e5f98425e7c82a6cc4a83568 /drivers/infiniband
parentatomic: Implement atomic_read_ctrl() (diff)
parentLinux 4.3-rc4 (diff)
downloadlinux-dev-82fc167c392a1700f9adbde639730ee8c8122474.tar.xz
linux-dev-82fc167c392a1700f9adbde639730ee8c8122474.zip
Merge tag 'v4.3-rc4' into locking/core, to pick up fixes before applying new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx5/main.c67
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c26
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c5
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h1
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c18
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c21
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c293
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h21
12 files changed, 263 insertions, 217 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 41d6911e244e..f1ccd40beae9 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -245,7 +245,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (MLX5_CAP_GEN(mdev, apm))
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
- props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -795,53 +794,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
-static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
-{
- struct mlx5_create_mkey_mbox_in *in;
- struct mlx5_mkey_seg *seg;
- struct mlx5_core_mr mr;
- int err;
-
- in = kzalloc(sizeof(*in), GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- seg = &in->seg;
- seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
- seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- seg->start_addr = 0;
-
- err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
- NULL, NULL, NULL);
- if (err) {
- mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
- goto err_in;
- }
-
- kfree(in);
- *key = mr.key;
-
- return 0;
-
-err_in:
- kfree(in);
-
- return err;
-}
-
-static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
-{
- struct mlx5_core_mr mr;
- int err;
-
- memset(&mr, 0, sizeof(mr));
- mr.key = key;
- err = mlx5_core_destroy_mkey(dev->mdev, &mr);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
-}
-
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
@@ -867,13 +819,6 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
kfree(pd);
return ERR_PTR(-EFAULT);
}
- } else {
- err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
- if (err) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
- kfree(pd);
- return ERR_PTR(err);
- }
}
return &pd->ibpd;
@@ -884,9 +829,6 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- if (!pd->uobject)
- free_pa_mkey(mdev, mpd->pa_lkey);
-
mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
kfree(mpd);
@@ -1245,18 +1187,10 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
- u32 rsvd_lkey;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
- ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey);
- if (ret) {
- pr_err("Failed to query special context %d\n", ret);
- return ret;
- }
- dev->ib_dev.local_dma_lkey = rsvd_lkey;
-
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
@@ -1418,6 +1352,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bb8cda79e881..22123b79d550 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -103,7 +103,6 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
- u32 pa_lkey;
};
/* Use macros here so that don't have to duplicate
@@ -213,7 +212,6 @@ struct mlx5_ib_qp {
int uuarn;
int create_type;
- u32 pa_lkey;
/* Store signature errors */
bool signature_en;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index c745c6c5e10d..6f521a3418e8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -925,8 +925,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
- else
- qp->pa_lkey = to_mpd(pd)->pa_lkey;
}
if (err)
@@ -2045,7 +2043,7 @@ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
dseg->addr = cpu_to_be64(mfrpl->map);
dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
- dseg->lkey = cpu_to_be32(pd->pa_lkey);
+ dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
static __be32 send_ieth(struct ib_send_wr *wr)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca2873698d75..4cd5428a2399 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -80,7 +80,7 @@ enum {
IPOIB_NUM_WC = 4,
IPOIB_MAX_PATH_REC_QUEUE = 3,
- IPOIB_MAX_MCAST_QUEUE = 3,
+ IPOIB_MAX_MCAST_QUEUE = 64,
IPOIB_FLAG_OPER_UP = 0,
IPOIB_FLAG_INITIALIZED = 1,
@@ -548,6 +548,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 36536ce5a3e2..f74316e679d2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1149,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long dt;
unsigned long flags;
int i;
+ LIST_HEAD(remove_list);
+ struct ipoib_mcast *mcast, *tmcast;
+ struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
@@ -1176,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
+ u8 *mgid = neigh->daddr + 4;
+
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ mcast = __ipoib_mcast_find(dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, &remove_list);
+ }
+ }
+
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
@@ -1191,6 +1207,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ ipoib_mcast_leave(dev, mcast);
}
static void ipoib_reap_neigh(struct work_struct *work)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 09a1748f9d13..136cbefe00f8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
-static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
@@ -508,17 +508,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
/*
- * Historically Linux IPoIB has never properly supported SEND
- * ONLY join. It emulated it by not providing all the required
- * attributes, which is enough to prevent group creation and
- * detect if there are full members or not. A major problem
- * with supporting SEND ONLY is detecting when the group is
- * auto-destroyed as IPoIB will cache the MLID..
+ * Send-only IB Multicast joins do not work at the core
+ * IB layer yet, so we can't use them here. However,
+ * we are emulating an Ethernet multicast send, which
+ * does not require a multicast subscription and will
+ * still send properly. The most appropriate thing to
+ * do is to create the group if it doesn't exist as that
+ * most closely emulates the behavior, from a user space
+ * application perspecitive, of Ethernet multicast
+ * operation. For now, we do a full join, maybe later
+ * when the core IB layers support send only joins we
+ * will use them.
*/
-#if 1
- if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
- comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
-#else
+#if 0
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
rec.join_state = 4;
#endif
@@ -675,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
-static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1ace5d83a4d7..f58ff96b6cbb 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -97,6 +97,11 @@ unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024");
+bool iser_always_reg = true;
+module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
+MODULE_PARM_DESC(always_register,
+ "Always register memory, even for continuous memory regions (default:true)");
+
bool iser_pi_enable = false;
module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 86f6583485ef..a5edd6ede692 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -611,6 +611,7 @@ extern int iser_debug_level;
extern bool iser_pi_enable;
extern int iser_pi_guard;
extern unsigned int iser_max_sectors;
+extern bool iser_always_reg;
int iser_assign_reg_ops(struct iser_device *device);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2493cc748db8..4c46d67d37a1 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -803,11 +803,12 @@ static int
iser_reg_prot_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
+ bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;
- if (mem->dma_nents == 1)
+ if (use_dma_key)
return iser_reg_dma(device, mem, reg);
return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
@@ -817,11 +818,12 @@ static int
iser_reg_data_sg(struct iscsi_iser_task *task,
struct iser_data_buf *mem,
struct iser_fr_desc *desc,
+ bool use_dma_key,
struct iser_mem_reg *reg)
{
struct iser_device *device = task->iser_conn->ib_conn.device;
- if (mem->dma_nents == 1)
+ if (use_dma_key)
return iser_reg_dma(device, mem, reg);
return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
@@ -836,14 +838,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
struct iser_mem_reg *reg = &task->rdma_reg[dir];
struct iser_mem_reg *data_reg;
struct iser_fr_desc *desc = NULL;
+ bool use_dma_key;
int err;
err = iser_handle_unaligned_buf(task, mem, dir);
if (unlikely(err))
return err;
- if (mem->dma_nents != 1 ||
- scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
+ use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+
+ if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
reg->mem_h = desc;
}
@@ -853,7 +858,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
else
data_reg = &task->desc.data_reg;
- err = iser_reg_data_sg(task, mem, desc, data_reg);
+ err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
if (unlikely(err))
goto err_reg;
@@ -866,7 +871,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
if (unlikely(err))
goto err_reg;
- err = iser_reg_prot_sg(task, mem, desc, prot_reg);
+ err = iser_reg_prot_sg(task, mem, desc,
+ use_dma_key, prot_reg);
if (unlikely(err))
goto err_reg;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ae70cc1463ac..85132d867bc8 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -133,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device)
(unsigned long)comp);
}
- device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
- if (IS_ERR(device->mr))
- goto dma_mr_err;
+ if (!iser_always_reg) {
+ int access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ device->mr = ib_get_dma_mr(device->pd, access);
+ if (IS_ERR(device->mr))
+ goto dma_mr_err;
+ }
INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
iser_event_handler);
@@ -147,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device)
return 0;
handler_err:
- ib_dereg_mr(device->mr);
+ if (device->mr)
+ ib_dereg_mr(device->mr);
dma_mr_err:
for (i = 0; i < device->comps_used; i++)
tasklet_kill(&device->comps[i].tasklet);
@@ -173,7 +178,6 @@ comps_err:
static void iser_free_device_ib_res(struct iser_device *device)
{
int i;
- BUG_ON(device->mr == NULL);
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
@@ -184,7 +188,8 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
(void)ib_unregister_event_handler(&device->event_handler);
- (void)ib_dereg_mr(device->mr);
+ if (device->mr)
+ (void)ib_dereg_mr(device->mr);
ib_dealloc_pd(device->pd);
kfree(device->comps);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 403bd29443b8..aa59037d7504 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -238,8 +238,6 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
rx_sg->lkey = device->pd->local_dma_lkey;
}
- isert_conn->rx_desc_head = 0;
-
return 0;
dma_map_fail:
@@ -634,7 +632,7 @@ static void
isert_init_conn(struct isert_conn *isert_conn)
{
isert_conn->state = ISER_CONN_INIT;
- INIT_LIST_HEAD(&isert_conn->accept_node);
+ INIT_LIST_HEAD(&isert_conn->node);
init_completion(&isert_conn->login_comp);
init_completion(&isert_conn->login_req_comp);
init_completion(&isert_conn->wait);
@@ -762,28 +760,15 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
ret = isert_rdma_post_recvl(isert_conn);
if (ret)
goto out_conn_dev;
- /*
- * Obtain the second reference now before isert_rdma_accept() to
- * ensure that any initiator generated REJECT CM event that occurs
- * asynchronously won't drop the last reference until the error path
- * in iscsi_target_login_sess_out() does it's ->iscsit_free_conn() ->
- * isert_free_conn() -> isert_put_conn() -> kref_put().
- */
- if (!kref_get_unless_zero(&isert_conn->kref)) {
- isert_warn("conn %p connect_release is running\n", isert_conn);
- goto out_conn_dev;
- }
ret = isert_rdma_accept(isert_conn);
if (ret)
goto out_conn_dev;
- mutex_lock(&isert_np->np_accept_mutex);
- list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list);
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_lock(&isert_np->mutex);
+ list_add_tail(&isert_conn->node, &isert_np->accepted);
+ mutex_unlock(&isert_np->mutex);
- isert_info("np %p: Allow accept_np to continue\n", np);
- up(&isert_np->np_sem);
return 0;
out_conn_dev:
@@ -831,13 +816,21 @@ static void
isert_connected_handler(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ struct isert_np *isert_np = cma_id->context;
isert_info("conn %p\n", isert_conn);
mutex_lock(&isert_conn->mutex);
- if (isert_conn->state != ISER_CONN_FULL_FEATURE)
- isert_conn->state = ISER_CONN_UP;
+ isert_conn->state = ISER_CONN_UP;
+ kref_get(&isert_conn->kref);
mutex_unlock(&isert_conn->mutex);
+
+ mutex_lock(&isert_np->mutex);
+ list_move_tail(&isert_conn->node, &isert_np->pending);
+ mutex_unlock(&isert_np->mutex);
+
+ isert_info("np %p: Allow accept_np to continue\n", isert_np);
+ up(&isert_np->sem);
}
static void
@@ -903,14 +896,14 @@ isert_np_cma_handler(struct isert_np *isert_np,
switch (event) {
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- isert_np->np_cm_id = NULL;
+ isert_np->cm_id = NULL;
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
- isert_np->np_cm_id = isert_setup_id(isert_np);
- if (IS_ERR(isert_np->np_cm_id)) {
+ isert_np->cm_id = isert_setup_id(isert_np);
+ if (IS_ERR(isert_np->cm_id)) {
isert_err("isert np %p setup id failed: %ld\n",
- isert_np, PTR_ERR(isert_np->np_cm_id));
- isert_np->np_cm_id = NULL;
+ isert_np, PTR_ERR(isert_np->cm_id));
+ isert_np->cm_id = NULL;
}
break;
default:
@@ -929,7 +922,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
struct isert_conn *isert_conn;
bool terminating = false;
- if (isert_np->np_cm_id == cma_id)
+ if (isert_np->cm_id == cma_id)
return isert_np_cma_handler(cma_id->context, event);
isert_conn = cma_id->qp->qp_context;
@@ -945,13 +938,13 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
if (terminating)
goto out;
- mutex_lock(&isert_np->np_accept_mutex);
- if (!list_empty(&isert_conn->accept_node)) {
- list_del_init(&isert_conn->accept_node);
+ mutex_lock(&isert_np->mutex);
+ if (!list_empty(&isert_conn->node)) {
+ list_del_init(&isert_conn->node);
isert_put_conn(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
}
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_unlock(&isert_np->mutex);
out:
return 0;
@@ -962,6 +955,7 @@ isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ list_del_init(&isert_conn->node);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
@@ -1006,35 +1000,51 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
static int
-isert_post_recv(struct isert_conn *isert_conn, u32 count)
+isert_post_recvm(struct isert_conn *isert_conn, u32 count)
{
struct ib_recv_wr *rx_wr, *rx_wr_failed;
int i, ret;
- unsigned int rx_head = isert_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &isert_conn->rx_descs[rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
- rx_head = (rx_head + 1) & (ISERT_QP_MAX_RECV_DTOS - 1);
+ rx_desc = &isert_conn->rx_descs[i];
+ rx_wr->wr_id = (uintptr_t)rx_desc;
+ rx_wr->sg_list = &rx_desc->rx_sg;
+ rx_wr->num_sge = 1;
+ rx_wr->next = rx_wr + 1;
}
-
rx_wr--;
rx_wr->next = NULL; /* mark end of work requests list */
isert_conn->post_recv_buf_count += count;
ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
- &rx_wr_failed);
+ &rx_wr_failed);
if (ret) {
isert_err("ib_post_recv() failed with ret: %d\n", ret);
isert_conn->post_recv_buf_count -= count;
- } else {
- isert_dbg("Posted %d RX buffers\n", count);
- isert_conn->rx_desc_head = rx_head;
}
+
+ return ret;
+}
+
+static int
+isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
+{
+ struct ib_recv_wr *rx_wr_failed, rx_wr;
+ int ret;
+
+ rx_wr.wr_id = (uintptr_t)rx_desc;
+ rx_wr.sg_list = &rx_desc->rx_sg;
+ rx_wr.num_sge = 1;
+ rx_wr.next = NULL;
+
+ isert_conn->post_recv_buf_count++;
+ ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed);
+ if (ret) {
+ isert_err("ib_post_recv() failed with ret: %d\n", ret);
+ isert_conn->post_recv_buf_count--;
+ }
+
return ret;
}
@@ -1205,7 +1215,8 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
if (ret)
return ret;
- ret = isert_post_recv(isert_conn, ISERT_MIN_POSTED_RX);
+ ret = isert_post_recvm(isert_conn,
+ ISERT_QP_MAX_RECV_DTOS);
if (ret)
return ret;
@@ -1278,7 +1289,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
}
static struct iscsi_cmd
-*isert_allocate_cmd(struct iscsi_conn *conn)
+*isert_allocate_cmd(struct iscsi_conn *conn, struct iser_rx_desc *rx_desc)
{
struct isert_conn *isert_conn = conn->context;
struct isert_cmd *isert_cmd;
@@ -1292,6 +1303,7 @@ static struct iscsi_cmd
isert_cmd = iscsit_priv_cmd(cmd);
isert_cmd->conn = isert_conn;
isert_cmd->iscsi_cmd = cmd;
+ isert_cmd->rx_desc = rx_desc;
return cmd;
}
@@ -1303,9 +1315,9 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
{
struct iscsi_conn *conn = isert_conn->conn;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf;
- struct scatterlist *sg;
int imm_data, imm_data_len, unsol_data, sg_nents, rc;
bool dump_payload = false;
+ unsigned int data_len;
rc = iscsit_setup_scsi_cmd(conn, cmd, buf);
if (rc < 0)
@@ -1314,7 +1326,10 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
imm_data = cmd->immediate_data;
imm_data_len = cmd->first_burst_len;
unsol_data = cmd->unsolicited_data;
+ data_len = cmd->se_cmd.data_length;
+ if (imm_data && imm_data_len == data_len)
+ cmd->se_cmd.se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
rc = iscsit_process_scsi_cmd(conn, cmd, hdr);
if (rc < 0) {
return 0;
@@ -1326,13 +1341,20 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
if (!imm_data)
return 0;
- sg = &cmd->se_cmd.t_data_sg[0];
- sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
-
- isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
- sg, sg_nents, &rx_desc->data[0], imm_data_len);
-
- sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len);
+ if (imm_data_len != data_len) {
+ sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
+ sg_copy_from_buffer(cmd->se_cmd.t_data_sg, sg_nents,
+ &rx_desc->data[0], imm_data_len);
+ isert_dbg("Copy Immediate sg_nents: %u imm_data_len: %d\n",
+ sg_nents, imm_data_len);
+ } else {
+ sg_init_table(&isert_cmd->sg, 1);
+ cmd->se_cmd.t_data_sg = &isert_cmd->sg;
+ cmd->se_cmd.t_data_nents = 1;
+ sg_set_buf(&isert_cmd->sg, &rx_desc->data[0], imm_data_len);
+ isert_dbg("Transfer Immediate imm_data_len: %d\n",
+ imm_data_len);
+ }
cmd->write_data_done += imm_data_len;
@@ -1407,6 +1429,15 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
if (rc < 0)
return rc;
+ /*
+ * multiple data-outs on the same command can arrive -
+ * so post the buffer before hand
+ */
+ rc = isert_post_recv(isert_conn, rx_desc);
+ if (rc) {
+ isert_err("ib_post_recv failed with %d\n", rc);
+ return rc;
+ }
return 0;
}
@@ -1479,7 +1510,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
switch (opcode) {
case ISCSI_OP_SCSI_CMD:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1493,7 +1524,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
rx_desc, (unsigned char *)hdr);
break;
case ISCSI_OP_NOOP_OUT:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1506,7 +1537,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_SCSI_TMFUNC:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
@@ -1514,22 +1545,20 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_LOGOUT:
- cmd = isert_allocate_cmd(conn);
+ cmd = isert_allocate_cmd(conn, rx_desc);
if (!cmd)
break;
ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr);
break;
case ISCSI_OP_TEXT:
- if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF) {
+ if (be32_to_cpu(hdr->ttt) != 0xFFFFFFFF)
cmd = iscsit_find_cmd_from_itt(conn, hdr->itt);
- if (!cmd)
- break;
- } else {
- cmd = isert_allocate_cmd(conn);
- if (!cmd)
- break;
- }
+ else
+ cmd = isert_allocate_cmd(conn, rx_desc);
+
+ if (!cmd)
+ break;
isert_cmd = iscsit_priv_cmd(cmd);
ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd,
@@ -1589,7 +1618,7 @@ isert_rcv_completion(struct iser_rx_desc *desc,
struct ib_device *ib_dev = isert_conn->cm_id->device;
struct iscsi_hdr *hdr;
u64 rx_dma;
- int rx_buflen, outstanding;
+ int rx_buflen;
if ((char *)desc == isert_conn->login_req_buf) {
rx_dma = isert_conn->login_req_dma;
@@ -1629,22 +1658,6 @@ isert_rcv_completion(struct iser_rx_desc *desc,
DMA_FROM_DEVICE);
isert_conn->post_recv_buf_count--;
- isert_dbg("Decremented post_recv_buf_count: %d\n",
- isert_conn->post_recv_buf_count);
-
- if ((char *)desc == isert_conn->login_req_buf)
- return;
-
- outstanding = isert_conn->post_recv_buf_count;
- if (outstanding + ISERT_MIN_POSTED_RX <= ISERT_QP_MAX_RECV_DTOS) {
- int err, count = min(ISERT_QP_MAX_RECV_DTOS - outstanding,
- ISERT_MIN_POSTED_RX);
- err = isert_post_recv(isert_conn, count);
- if (err) {
- isert_err("isert_post_recv() count: %d failed, %d\n",
- count, err);
- }
- }
}
static int
@@ -2156,6 +2169,12 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
struct ib_send_wr *wr_failed;
int ret;
+ ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
+ if (ret) {
+ isert_err("ib_post_recv failed with %d\n", ret);
+ return ret;
+ }
+
ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
&wr_failed);
if (ret) {
@@ -2950,6 +2969,12 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
&isert_cmd->tx_desc.send_wr);
isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
wr->send_wr_num += 1;
+
+ rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
+ if (rc) {
+ isert_err("ib_post_recv failed with %d\n", rc);
+ return rc;
+ }
}
rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
@@ -2999,9 +3024,16 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
static int
isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
{
- int ret;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ int ret = 0;
switch (state) {
+ case ISTATE_REMOVE:
+ spin_lock_bh(&conn->cmd_lock);
+ list_del_init(&cmd->i_conn_node);
+ spin_unlock_bh(&conn->cmd_lock);
+ isert_put_cmd(isert_cmd, true);
+ break;
case ISTATE_SEND_NOPIN_WANT_RESPONSE:
ret = isert_put_nopin(cmd, conn, false);
break;
@@ -3106,10 +3138,10 @@ isert_setup_np(struct iscsi_np *np,
isert_err("Unable to allocate struct isert_np\n");
return -ENOMEM;
}
- sema_init(&isert_np->np_sem, 0);
- mutex_init(&isert_np->np_accept_mutex);
- INIT_LIST_HEAD(&isert_np->np_accept_list);
- init_completion(&isert_np->np_login_comp);
+ sema_init(&isert_np->sem, 0);
+ mutex_init(&isert_np->mutex);
+ INIT_LIST_HEAD(&isert_np->accepted);
+ INIT_LIST_HEAD(&isert_np->pending);
isert_np->np = np;
/*
@@ -3125,7 +3157,7 @@ isert_setup_np(struct iscsi_np *np,
goto out;
}
- isert_np->np_cm_id = isert_lid;
+ isert_np->cm_id = isert_lid;
np->np_context = isert_np;
return 0;
@@ -3214,7 +3246,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
int ret;
accept_wait:
- ret = down_interruptible(&isert_np->np_sem);
+ ret = down_interruptible(&isert_np->sem);
if (ret)
return -ENODEV;
@@ -3231,15 +3263,15 @@ accept_wait:
}
spin_unlock_bh(&np->np_thread_lock);
- mutex_lock(&isert_np->np_accept_mutex);
- if (list_empty(&isert_np->np_accept_list)) {
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_lock(&isert_np->mutex);
+ if (list_empty(&isert_np->pending)) {
+ mutex_unlock(&isert_np->mutex);
goto accept_wait;
}
- isert_conn = list_first_entry(&isert_np->np_accept_list,
- struct isert_conn, accept_node);
- list_del_init(&isert_conn->accept_node);
- mutex_unlock(&isert_np->np_accept_mutex);
+ isert_conn = list_first_entry(&isert_np->pending,
+ struct isert_conn, node);
+ list_del_init(&isert_conn->node);
+ mutex_unlock(&isert_np->mutex);
conn->context = isert_conn;
isert_conn->conn = conn;
@@ -3257,28 +3289,39 @@ isert_free_np(struct iscsi_np *np)
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
- if (isert_np->np_cm_id)
- rdma_destroy_id(isert_np->np_cm_id);
+ if (isert_np->cm_id)
+ rdma_destroy_id(isert_np->cm_id);
/*
* FIXME: At this point we don't have a good way to insure
* that at this point we don't have hanging connections that
* completed RDMA establishment but didn't start iscsi login
* process. So work-around this by cleaning up what ever piled
- * up in np_accept_list.
+ * up in accepted and pending lists.
*/
- mutex_lock(&isert_np->np_accept_mutex);
- if (!list_empty(&isert_np->np_accept_list)) {
- isert_info("Still have isert connections, cleaning up...\n");
+ mutex_lock(&isert_np->mutex);
+ if (!list_empty(&isert_np->pending)) {
+ isert_info("Still have isert pending connections\n");
+ list_for_each_entry_safe(isert_conn, n,
+ &isert_np->pending,
+ node) {
+ isert_info("cleaning isert_conn %p state (%d)\n",
+ isert_conn, isert_conn->state);
+ isert_connect_release(isert_conn);
+ }
+ }
+
+ if (!list_empty(&isert_np->accepted)) {
+ isert_info("Still have isert accepted connections\n");
list_for_each_entry_safe(isert_conn, n,
- &isert_np->np_accept_list,
- accept_node) {
+ &isert_np->accepted,
+ node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
isert_connect_release(isert_conn);
}
}
- mutex_unlock(&isert_np->np_accept_mutex);
+ mutex_unlock(&isert_np->mutex);
np->np_context = NULL;
kfree(isert_np);
@@ -3345,6 +3388,41 @@ isert_wait4flush(struct isert_conn *isert_conn)
wait_for_completion(&isert_conn->wait_comp_err);
}
+/**
+ * isert_put_unsol_pending_cmds() - Drop commands waiting for
+ * unsolicitate dataout
+ * @conn: iscsi connection
+ *
+ * We might still have commands that are waiting for unsolicited
+ * dataouts messages. We must put the extra reference on those
+ * before blocking on the target_wait_for_session_cmds
+ */
+static void
+isert_put_unsol_pending_cmds(struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd, *tmp;
+ static LIST_HEAD(drop_cmd_list);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry_safe(cmd, tmp, &conn->conn_cmd_list, i_conn_node) {
+ if ((cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA) &&
+ (cmd->write_data_done < conn->sess->sess_ops->FirstBurstLength) &&
+ (cmd->write_data_done < cmd->se_cmd.data_length))
+ list_move_tail(&cmd->i_conn_node, &drop_cmd_list);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ list_for_each_entry_safe(cmd, tmp, &drop_cmd_list, i_conn_node) {
+ list_del_init(&cmd->i_conn_node);
+ if (cmd->i_state != ISTATE_REMOVE) {
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+
+ isert_info("conn %p dropping cmd %p\n", conn, cmd);
+ isert_put_cmd(isert_cmd, true);
+ }
+ }
+}
+
static void isert_wait_conn(struct iscsi_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
@@ -3363,8 +3441,9 @@ static void isert_wait_conn(struct iscsi_conn *conn)
isert_conn_terminate(isert_conn);
mutex_unlock(&isert_conn->mutex);
- isert_wait4cmds(conn);
isert_wait4flush(isert_conn);
+ isert_put_unsol_pending_cmds(conn);
+ isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 6a04ba3c0f72..c5b99bcecbcf 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -113,7 +113,6 @@ enum {
};
struct isert_rdma_wr {
- struct list_head wr_list;
struct isert_cmd *isert_cmd;
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
@@ -134,14 +133,13 @@ struct isert_cmd {
uint64_t write_va;
u64 pdu_buf_dma;
u32 pdu_buf_len;
- u32 read_va_off;
- u32 write_va_off;
- u32 rdma_wr_num;
struct isert_conn *conn;
struct iscsi_cmd *iscsi_cmd;
struct iser_tx_desc tx_desc;
+ struct iser_rx_desc *rx_desc;
struct isert_rdma_wr rdma_wr;
struct work_struct comp_work;
+ struct scatterlist sg;
};
struct isert_device;
@@ -159,11 +157,10 @@ struct isert_conn {
u64 login_req_dma;
int login_req_len;
u64 login_rsp_dma;
- unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
- struct ib_recv_wr rx_wr[ISERT_MIN_POSTED_RX];
+ struct ib_recv_wr rx_wr[ISERT_QP_MAX_RECV_DTOS];
struct iscsi_conn *conn;
- struct list_head accept_node;
+ struct list_head node;
struct completion login_comp;
struct completion login_req_comp;
struct iser_tx_desc login_tx_desc;
@@ -222,9 +219,9 @@ struct isert_device {
struct isert_np {
struct iscsi_np *np;
- struct semaphore np_sem;
- struct rdma_cm_id *np_cm_id;
- struct mutex np_accept_mutex;
- struct list_head np_accept_list;
- struct completion np_login_comp;
+ struct semaphore sem;
+ struct rdma_cm_id *cm_id;
+ struct mutex mutex;
+ struct list_head accepted;
+ struct list_head pending;
};