aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c92
-rw-r--r--drivers/vhost/scsi.c136
-rw-r--r--drivers/vhost/vhost.c56
3 files changed, 158 insertions, 126 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 969a85960e9f..831eb4fd197d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -276,12 +276,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
* of used idx. Once lower device DMA done contiguously, we will signal KVM
* guest used idx.
*/
-static int vhost_zerocopy_signal_used(struct vhost_net *net,
- struct vhost_virtqueue *vq)
+static void vhost_zerocopy_signal_used(struct vhost_net *net,
+ struct vhost_virtqueue *vq)
{
struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
- int i;
+ int i, add;
int j = 0;
for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
@@ -289,15 +289,17 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net,
vhost_net_tx_err(net);
if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
- vhost_add_used_and_signal(vq->dev, vq,
- vq->heads[i].id, 0);
++j;
} else
break;
}
- if (j)
- nvq->done_idx = i;
- return j;
+ while (j) {
+ add = min(UIO_MAXIOV - nvq->done_idx, j);
+ vhost_add_used_and_signal_n(vq->dev, vq,
+ &vq->heads[nvq->done_idx], add);
+ nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
+ j -= add;
+ }
}
static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
@@ -306,6 +308,11 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
struct vhost_virtqueue *vq = ubufs->vq;
int cnt = atomic_read(&ubufs->kref.refcount);
+ /* set len to mark this desc buffers done DMA */
+ vq->heads[ubuf->desc].len = success ?
+ VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
+ vhost_net_ubuf_put(ubufs);
+
/*
* Trigger polling thread if guest stopped submitting new buffers:
* in this case, the refcount after decrement will eventually reach 1
@@ -316,10 +323,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
*/
if (cnt <= 2 || !(cnt % 16))
vhost_poll_queue(&vq->poll);
- /* set len to mark this desc buffers done DMA */
- vq->heads[ubuf->desc].len = success ?
- VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
- vhost_net_ubuf_put(ubufs);
}
/* Expects to be always run from workqueue - which acts as
@@ -360,6 +363,13 @@ static void handle_tx(struct vhost_net *net)
if (zcopy)
vhost_zerocopy_signal_used(net, vq);
+ /* If more outstanding DMAs, queue the work.
+ * Handle upend_idx wrap around
+ */
+ if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND)
+ % UIO_MAXIOV == nvq->done_idx))
+ break;
+
head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
ARRAY_SIZE(vq->iov),
&out, &in,
@@ -369,17 +379,6 @@ static void handle_tx(struct vhost_net *net)
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
if (head == vq->num) {
- int num_pends;
-
- /* If more outstanding DMAs, queue the work.
- * Handle upend_idx wrap around
- */
- num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
- (nvq->upend_idx - nvq->done_idx) :
- (nvq->upend_idx + UIO_MAXIOV -
- nvq->done_idx);
- if (unlikely(num_pends > VHOST_MAX_PEND))
- break;
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
vhost_disable_notify(&net->dev, vq);
continue;
@@ -402,43 +401,36 @@ static void handle_tx(struct vhost_net *net)
iov_length(nvq->hdr, s), hdr_size);
break;
}
- zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
- nvq->upend_idx != nvq->done_idx);
+
+ zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
+ && (nvq->upend_idx + 1) % UIO_MAXIOV !=
+ nvq->done_idx
+ && vhost_net_tx_select_zcopy(net);
/* use msg_control to pass vhost zerocopy ubuf info to skb */
if (zcopy_used) {
+ struct ubuf_info *ubuf;
+ ubuf = nvq->ubuf_info + nvq->upend_idx;
+
vq->heads[nvq->upend_idx].id = head;
- if (!vhost_net_tx_select_zcopy(net) ||
- len < VHOST_GOODCOPY_LEN) {
- /* copy don't need to wait for DMA done */
- vq->heads[nvq->upend_idx].len =
- VHOST_DMA_DONE_LEN;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- ubufs = NULL;
- } else {
- struct ubuf_info *ubuf;
- ubuf = nvq->ubuf_info + nvq->upend_idx;
-
- vq->heads[nvq->upend_idx].len =
- VHOST_DMA_IN_PROGRESS;
- ubuf->callback = vhost_zerocopy_callback;
- ubuf->ctx = nvq->ubufs;
- ubuf->desc = nvq->upend_idx;
- msg.msg_control = ubuf;
- msg.msg_controllen = sizeof(ubuf);
- ubufs = nvq->ubufs;
- kref_get(&ubufs->kref);
- }
+ vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
+ ubuf->callback = vhost_zerocopy_callback;
+ ubuf->ctx = nvq->ubufs;
+ ubuf->desc = nvq->upend_idx;
+ msg.msg_control = ubuf;
+ msg.msg_controllen = sizeof(ubuf);
+ ubufs = nvq->ubufs;
+ kref_get(&ubufs->kref);
nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
- } else
+ } else {
msg.msg_control = NULL;
+ ubufs = NULL;
+ }
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(NULL, sock, &msg, len);
if (unlikely(err < 0)) {
if (zcopy_used) {
- if (ubufs)
- vhost_net_ubuf_put(ubufs);
+ vhost_net_ubuf_put(ubufs);
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 0c27c7df1b09..4b79a1f2f901 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1,12 +1,12 @@
/*******************************************************************************
* Vhost kernel TCM fabric driver for virtio SCSI initiators
*
- * (C) Copyright 2010-2012 RisingTide Systems LLC.
+ * (C) Copyright 2010-2013 Datera, Inc.
* (C) Copyright 2010-2012 IBM Corp.
*
* Licensed to the Linux Foundation under the General Public License (GPL) version 2.
*
- * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com>
+ * Authors: Nicholas A. Bellinger <nab@daterainc.com>
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -48,12 +48,16 @@
#include <linux/virtio_scsi.h>
#include <linux/llist.h>
#include <linux/bitmap.h>
+#include <linux/percpu_ida.h>
#include "vhost.h"
#define TCM_VHOST_VERSION "v0.1"
#define TCM_VHOST_NAMELEN 256
#define TCM_VHOST_MAX_CDB_SIZE 32
+#define TCM_VHOST_DEFAULT_TAGS 256
+#define TCM_VHOST_PREALLOC_SGLS 2048
+#define TCM_VHOST_PREALLOC_PAGES 2048
struct vhost_scsi_inflight {
/* Wait for the flush operation to finish */
@@ -79,6 +83,7 @@ struct tcm_vhost_cmd {
u32 tvc_lun;
/* Pointer to the SGL formatted memory from virtio-scsi */
struct scatterlist *tvc_sgl;
+ struct page **tvc_upages;
/* Pointer to response */
struct virtio_scsi_cmd_resp __user *tvc_resp;
/* Pointer to vhost_scsi for our device */
@@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
{
struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
struct tcm_vhost_cmd, tvc_se_cmd);
+ struct se_session *se_sess = se_cmd->se_sess;
if (tv_cmd->tvc_sgl_count) {
u32 i;
for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
put_page(sg_page(&tv_cmd->tvc_sgl[i]));
-
- kfree(tv_cmd->tvc_sgl);
}
tcm_vhost_put_inflight(tv_cmd->inflight);
- kfree(tv_cmd);
+ percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
}
static int tcm_vhost_shutdown_session(struct se_session *se_sess)
@@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
}
static struct tcm_vhost_cmd *
-vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
+vhost_scsi_get_tag(struct vhost_virtqueue *vq,
struct tcm_vhost_tpg *tpg,
struct virtio_scsi_cmd_req *v_req,
u32 exp_data_len,
@@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
{
struct tcm_vhost_cmd *cmd;
struct tcm_vhost_nexus *tv_nexus;
+ struct se_session *se_sess;
+ struct scatterlist *sg;
+ struct page **pages;
+ int tag;
tv_nexus = tpg->tpg_nexus;
if (!tv_nexus) {
pr_err("Unable to locate active struct tcm_vhost_nexus\n");
return ERR_PTR(-EIO);
}
+ se_sess = tv_nexus->tvn_se_sess;
- cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC);
- if (!cmd) {
- pr_err("Unable to allocate struct tcm_vhost_cmd\n");
- return ERR_PTR(-ENOMEM);
- }
+ tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL);
+ cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
+ sg = cmd->tvc_sgl;
+ pages = cmd->tvc_upages;
+ memset(cmd, 0, sizeof(struct tcm_vhost_cmd));
+
+ cmd->tvc_sgl = sg;
+ cmd->tvc_upages = pages;
+ cmd->tvc_se_cmd.map_tag = tag;
cmd->tvc_tag = v_req->tag;
cmd->tvc_task_attr = v_req->task_attr;
cmd->tvc_exp_data_len = exp_data_len;
@@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
* Returns the number of scatterlist entries used or -errno on error.
*/
static int
-vhost_scsi_map_to_sgl(struct scatterlist *sgl,
+vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd,
+ struct scatterlist *sgl,
unsigned int sgl_count,
struct iovec *iov,
int write)
@@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl,
struct page **pages;
int ret, i;
+ if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
+ pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than"
+ " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
+ sgl_count, TCM_VHOST_PREALLOC_SGLS);
+ return -ENOBUFS;
+ }
+
pages_nr = iov_num_pages(iov);
if (pages_nr > sgl_count)
return -ENOBUFS;
- pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ if (pages_nr > TCM_VHOST_PREALLOC_PAGES) {
+ pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
+ " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n",
+ pages_nr, TCM_VHOST_PREALLOC_PAGES);
+ return -ENOBUFS;
+ }
+
+ pages = tv_cmd->tvc_upages;
ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
/* No pages were pinned */
@@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl,
}
out:
- kfree(pages);
return ret;
}
@@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
/* TODO overflow checking */
- sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC);
- if (!sg)
- return -ENOMEM;
- pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__,
- sg, sgl_count, !sg);
+ sg = cmd->tvc_sgl;
+ pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count);
sg_init_table(sg, sgl_count);
- cmd->tvc_sgl = sg;
cmd->tvc_sgl_count = sgl_count;
pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count);
for (i = 0; i < niov; i++) {
- ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write);
+ ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
+ write);
if (ret < 0) {
for (i = 0; i < cmd->tvc_sgl_count; i++)
put_page(sg_page(&cmd->tvc_sgl[i]));
- kfree(cmd->tvc_sgl);
- cmd->tvc_sgl = NULL;
+
cmd->tvc_sgl_count = 0;
return ret;
}
@@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
for (i = 0; i < data_num; i++)
exp_data_len += vq->iov[data_first + i].iov_len;
- cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req,
- exp_data_len, data_direction);
+ cmd = vhost_scsi_get_tag(vq, tpg, &v_req,
+ exp_data_len, data_direction);
if (IS_ERR(cmd)) {
- vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
+ vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
PTR_ERR(cmd));
goto err_cmd;
}
@@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl)
kfree(nacl);
}
+static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus,
+ struct se_session *se_sess)
+{
+ struct tcm_vhost_cmd *tv_cmd;
+ unsigned int i;
+
+ if (!se_sess->sess_cmd_map)
+ return;
+
+ for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) {
+ tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
+
+ kfree(tv_cmd->tvc_sgl);
+ kfree(tv_cmd->tvc_upages);
+ }
+}
+
static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
const char *name)
{
struct se_portal_group *se_tpg;
+ struct se_session *se_sess;
struct tcm_vhost_nexus *tv_nexus;
+ struct tcm_vhost_cmd *tv_cmd;
+ unsigned int i;
mutex_lock(&tpg->tv_tpg_mutex);
if (tpg->tpg_nexus) {
@@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
return -ENOMEM;
}
/*
- * Initialize the struct se_session pointer
+ * Initialize the struct se_session pointer and setup tagpool
+ * for struct tcm_vhost_cmd descriptors
*/
- tv_nexus->tvn_se_sess = transport_init_session();
+ tv_nexus->tvn_se_sess = transport_init_session_tags(
+ TCM_VHOST_DEFAULT_TAGS,
+ sizeof(struct tcm_vhost_cmd));
if (IS_ERR(tv_nexus->tvn_se_sess)) {
mutex_unlock(&tpg->tv_tpg_mutex);
kfree(tv_nexus);
return -ENOMEM;
}
+ se_sess = tv_nexus->tvn_se_sess;
+ for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) {
+ tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
+
+ tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) *
+ TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL);
+ if (!tv_cmd->tvc_sgl) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
+ goto out;
+ }
+
+ tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
+ TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL);
+ if (!tv_cmd->tvc_upages) {
+ mutex_unlock(&tpg->tv_tpg_mutex);
+ pr_err("Unable to allocate tv_cmd->tvc_upages\n");
+ goto out;
+ }
+ }
/*
* Since we are running in 'demo mode' this call with generate a
* struct se_node_acl for the tcm_vhost struct se_portal_group with
@@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
mutex_unlock(&tpg->tv_tpg_mutex);
pr_debug("core_tpg_check_initiator_node_acl() failed"
" for %s\n", name);
- transport_free_session(tv_nexus->tvn_se_sess);
- kfree(tv_nexus);
- return -ENOMEM;
+ goto out;
}
/*
* Now register the TCM vhost virtual I_T Nexus as active with the
@@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
mutex_unlock(&tpg->tv_tpg_mutex);
return 0;
+
+out:
+ tcm_vhost_free_cmd_map_res(tv_nexus, se_sess);
+ transport_free_session(se_sess);
+ kfree(tv_nexus);
+ return -ENOMEM;
}
static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg)
@@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg)
pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated"
" %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport),
tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
+
+ tcm_vhost_free_cmd_map_res(tv_nexus, se_sess);
/*
* Release the SCSI I_T Nexus to the emulated vhost Target Port
*/
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e58cf0001cee..9a9502a4aa50 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -13,7 +13,7 @@
#include <linux/eventfd.h>
#include <linux/vhost.h>
-#include <linux/socket.h> /* memcpy_fromiovec */
+#include <linux/uio.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
@@ -1332,48 +1332,9 @@ EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
* want to notify the guest, using eventfd. */
int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
{
- struct vring_used_elem __user *used;
+ struct vring_used_elem heads = { head, len };
- /* The virtqueue contains a ring of used buffers. Get a pointer to the
- * next entry in that used ring. */
- used = &vq->used->ring[vq->last_used_idx % vq->num];
- if (__put_user(head, &used->id)) {
- vq_err(vq, "Failed to write used id");
- return -EFAULT;
- }
- if (__put_user(len, &used->len)) {
- vq_err(vq, "Failed to write used len");
- return -EFAULT;
- }
- /* Make sure buffer is written before we update index. */
- smp_wmb();
- if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) {
- vq_err(vq, "Failed to increment used idx");
- return -EFAULT;
- }
- if (unlikely(vq->log_used)) {
- /* Make sure data is seen before log. */
- smp_wmb();
- /* Log used ring entry write. */
- log_write(vq->log_base,
- vq->log_addr +
- ((void __user *)used - (void __user *)vq->used),
- sizeof *used);
- /* Log used index update. */
- log_write(vq->log_base,
- vq->log_addr + offsetof(struct vring_used, idx),
- sizeof vq->used->idx);
- if (vq->log_ctx)
- eventfd_signal(vq->log_ctx, 1);
- }
- vq->last_used_idx++;
- /* If the driver never bothers to signal in a very long while,
- * used index might wrap around. If that happens, invalidate
- * signalled_used index we stored. TODO: make sure driver
- * signals at least once in 2^16 and remove this. */
- if (unlikely(vq->last_used_idx == vq->signalled_used))
- vq->signalled_used_valid = false;
- return 0;
+ return vhost_add_used_n(vq, &heads, 1);
}
EXPORT_SYMBOL_GPL(vhost_add_used);
@@ -1387,7 +1348,16 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
start = vq->last_used_idx % vq->num;
used = vq->used->ring + start;
- if (__copy_to_user(used, heads, count * sizeof *used)) {
+ if (count == 1) {
+ if (__put_user(heads[0].id, &used->id)) {
+ vq_err(vq, "Failed to write used id");
+ return -EFAULT;
+ }
+ if (__put_user(heads[0].len, &used->len)) {
+ vq_err(vq, "Failed to write used len");
+ return -EFAULT;
+ }
+ } else if (__copy_to_user(used, heads, count * sizeof *used)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}