aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c757
1 files changed, 413 insertions, 344 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9fd06eeb1a17..d8a23561b4cb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -41,9 +41,14 @@
#include <linux/nbd.h>
+static DEFINE_IDR(nbd_index_idr);
+static DEFINE_MUTEX(nbd_index_mutex);
+
struct nbd_sock {
struct socket *sock;
struct mutex tx_lock;
+ struct request *pending;
+ int sent;
};
#define NBD_TIMEDOUT 0
@@ -89,8 +94,13 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16;
-static struct nbd_device *nbd_dev;
static int max_part;
+static struct workqueue_struct *recv_workqueue;
+static int part_shift;
+
+static int nbd_dev_dbg_init(struct nbd_device *nbd);
+static void nbd_dev_dbg_close(struct nbd_device *nbd);
+
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
{
@@ -116,7 +126,8 @@ static const char *nbdcmd_to_ascii(int cmd)
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
- bdev->bd_inode->i_size = 0;
+ if (bdev->bd_openers <= 1)
+ bd_set_size(bdev, 0);
set_capacity(nbd->disk, 0);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
@@ -125,29 +136,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
- if (!nbd_is_connected(nbd))
- return;
-
- bdev->bd_inode->i_size = nbd->bytesize;
+ blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
+ blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
+ bd_set_size(bdev, nbd->bytesize);
set_capacity(nbd->disk, nbd->bytesize >> 9);
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}
-static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
+static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
loff_t blocksize, loff_t nr_blocks)
{
- int ret;
-
- ret = set_blocksize(bdev, blocksize);
- if (ret)
- return ret;
-
nbd->blksize = blocksize;
nbd->bytesize = blocksize * nr_blocks;
-
- nbd_size_update(nbd, bdev);
-
- return 0;
+ if (nbd_is_connected(nbd))
+ nbd_size_update(nbd, bdev);
}
static void nbd_end_request(struct nbd_cmd *cmd)
@@ -191,15 +193,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
- req->errors++;
+ req->errors = -EIO;
- /*
- * If our disconnect packet times out then we're already holding the
- * config_lock and could deadlock here, so just set an error and return,
- * we'll handle shutting everything down later.
- */
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
- return BLK_EH_HANDLED;
mutex_lock(&nbd->config_lock);
sock_shutdown(nbd);
mutex_unlock(&nbd->config_lock);
@@ -209,13 +204,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
/*
* Send or receive packet.
*/
-static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
- int size, int msg_flags)
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+ struct iov_iter *iter, int msg_flags, int *sent)
{
struct socket *sock = nbd->socks[index]->sock;
int result;
struct msghdr msg;
- struct kvec iov;
unsigned long pflags = current->flags;
if (unlikely(!sock)) {
@@ -225,11 +219,11 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
return -EINVAL;
}
+ msg.msg_iter = *iter;
+
current->flags |= PF_MEMALLOC;
do {
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
- iov.iov_base = buf;
- iov.iov_len = size;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_control = NULL;
@@ -237,58 +231,76 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
if (send)
- result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+ result = sock_sendmsg(sock, &msg);
else
- result = kernel_recvmsg(sock, &msg, &iov, 1, size,
- msg.msg_flags);
+ result = sock_recvmsg(sock, &msg, msg.msg_flags);
if (result <= 0) {
if (result == 0)
result = -EPIPE; /* short read */
break;
}
- size -= result;
- buf += result;
- } while (size > 0);
+ if (sent)
+ *sent += result;
+ } while (msg_data_left(&msg));
tsk_restore_flags(current, pflags, PF_MEMALLOC);
return result;
}
-static inline int sock_send_bvec(struct nbd_device *nbd, int index,
- struct bio_vec *bvec, int flags)
-{
- int result;
- void *kaddr = kmap(bvec->bv_page);
- result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset,
- bvec->bv_len, flags);
- kunmap(bvec->bv_page);
- return result;
-}
-
/* always call with the tx_lock held */
static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
+ struct nbd_sock *nsock = nbd->socks[index];
int result;
- struct nbd_request request;
+ struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
+ struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+ struct iov_iter from;
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u32 type;
u32 tag = blk_mq_unique_tag(req);
+ int sent = nsock->sent, skip = 0;
+
+ iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
- if (req_op(req) == REQ_OP_DISCARD)
+ switch (req_op(req)) {
+ case REQ_OP_DISCARD:
type = NBD_CMD_TRIM;
- else if (req_op(req) == REQ_OP_FLUSH)
+ break;
+ case REQ_OP_FLUSH:
type = NBD_CMD_FLUSH;
- else if (rq_data_dir(req) == WRITE)
+ break;
+ case REQ_OP_WRITE:
type = NBD_CMD_WRITE;
- else
+ break;
+ case REQ_OP_READ:
type = NBD_CMD_READ;
+ break;
+ default:
+ return -EIO;
+ }
- memset(&request, 0, sizeof(request));
- request.magic = htonl(NBD_REQUEST_MAGIC);
+ if (rq_data_dir(req) == WRITE &&
+ (nbd->flags & NBD_FLAG_READ_ONLY)) {
+ dev_err_ratelimited(disk_to_dev(nbd->disk),
+ "Write on read-only\n");
+ return -EIO;
+ }
+
+ /* We did a partial send previously, and we at least sent the whole
+ * request struct, so just go and send the rest of the pages in the
+ * request.
+ */
+ if (sent) {
+ if (sent >= sizeof(request)) {
+ skip = sent - sizeof(request);
+ goto send_pages;
+ }
+ iov_iter_advance(&from, sent);
+ }
request.type = htonl(type);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -299,16 +311,28 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
cmd, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
- result = sock_xmit(nbd, index, 1, &request, sizeof(request),
- (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
+ result = sock_xmit(nbd, index, 1, &from,
+ (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
if (result <= 0) {
+ if (result == -ERESTARTSYS) {
+ /* If we havne't sent anything we can just return BUSY,
+ * however if we have sent something we need to make
+ * sure we only allow this req to be sent until we are
+ * completely done.
+ */
+ if (sent) {
+ nsock->pending = req;
+ nsock->sent = sent;
+ }
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ }
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
return -EIO;
}
-
+send_pages:
if (type != NBD_CMD_WRITE)
- return 0;
+ goto out;
bio = req->bio;
while (bio) {
@@ -322,8 +346,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
cmd, bvec.bv_len);
- result = sock_send_bvec(nbd, index, &bvec, flags);
+ iov_iter_bvec(&from, ITER_BVEC | WRITE,
+ &bvec, 1, bvec.bv_len);
+ if (skip) {
+ if (skip >= iov_iter_count(&from)) {
+ skip -= iov_iter_count(&from);
+ continue;
+ }
+ iov_iter_advance(&from, skip);
+ skip = 0;
+ }
+ result = sock_xmit(nbd, index, 1, &from, flags, &sent);
if (result <= 0) {
+ if (result == -ERESTARTSYS) {
+ /* We've already sent the header, we
+ * have no choice but to set pending and
+ * return BUSY.
+ */
+ nsock->pending = req;
+ nsock->sent = sent;
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ }
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
result);
@@ -340,20 +383,12 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
}
bio = next;
}
+out:
+ nsock->pending = NULL;
+ nsock->sent = 0;
return 0;
}
-static inline int sock_recv_bvec(struct nbd_device *nbd, int index,
- struct bio_vec *bvec)
-{
- int result;
- void *kaddr = kmap(bvec->bv_page);
- result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset,
- bvec->bv_len, MSG_WAITALL);
- kunmap(bvec->bv_page);
- return result;
-}
-
/* NULL returned = something went wrong, inform userspace */
static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
{
@@ -363,9 +398,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct request *req = NULL;
u16 hwq;
u32 tag;
+ struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
+ struct iov_iter to;
reply.magic = 0;
- result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL);
+ iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+ result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
!test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
@@ -395,7 +433,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
- req->errors++;
+ req->errors = -EIO;
return cmd;
}
@@ -405,11 +443,13 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
- result = sock_recv_bvec(nbd, index, &bvec);
+ iov_iter_bvec(&to, ITER_BVEC | READ,
+ &bvec, 1, bvec.bv_len);
+ result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
- req->errors++;
+ req->errors = -EIO;
return cmd;
}
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
@@ -479,7 +519,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
if (!blk_mq_request_started(req))
return;
cmd = blk_mq_rq_to_pdu(req);
- req->errors++;
+ req->errors = -EIO;
nbd_end_request(cmd);
}
@@ -492,34 +532,23 @@ static void nbd_clear_que(struct nbd_device *nbd)
}
-static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
+static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
struct nbd_device *nbd = cmd->nbd;
struct nbd_sock *nsock;
+ int ret;
if (index >= nbd->num_connections) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
- goto error_out;
+ return -EINVAL;
}
if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
- goto error_out;
- }
-
- if (req->cmd_type != REQ_TYPE_FS &&
- req->cmd_type != REQ_TYPE_DRV_PRIV)
- goto error_out;
-
- if (req->cmd_type == REQ_TYPE_FS &&
- rq_data_dir(req) == WRITE &&
- (nbd->flags & NBD_FLAG_READ_ONLY)) {
- dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Write on read-only\n");
- goto error_out;
+ return -EINVAL;
}
req->errors = 0;
@@ -530,29 +559,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
mutex_unlock(&nsock->tx_lock);
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n");
- goto error_out;
+ return -EINVAL;
}
- if (nbd_send_cmd(nbd, cmd, index) != 0) {
- dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Request send failed\n");
- req->errors++;
- nbd_end_request(cmd);
+ /* Handle the case that we have a pending request that was partially
+ * transmitted that _has_ to be serviced first. We need to call requeue
+ * here so that it gets put _after_ the request that is already on the
+ * dispatch list.
+ */
+ if (unlikely(nsock->pending && nsock->pending != req)) {
+ blk_mq_requeue_request(req, true);
+ ret = 0;
+ goto out;
}
-
+ ret = nbd_send_cmd(nbd, cmd, index);
+out:
mutex_unlock(&nsock->tx_lock);
-
- return;
-
-error_out:
- req->errors++;
- nbd_end_request(cmd);
+ return ret;
}
static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ int ret;
/*
* Since we look at the bio's to send the request over the network we
@@ -565,16 +595,33 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
*/
init_completion(&cmd->send_complete);
blk_mq_start_request(bd->rq);
- nbd_handle_cmd(cmd, hctx->queue_num);
+
+ /* We can be called directly from the user space process, which means we
+ * could possibly have signals pending so our sendmsg will fail. In
+ * this case we need to return that we are busy, otherwise error out as
+ * appropriate.
+ */
+ ret = nbd_handle_cmd(cmd, hctx->queue_num);
+ if (ret < 0)
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
+ if (!ret)
+ ret = BLK_MQ_RQ_QUEUE_OK;
complete(&cmd->send_complete);
- return BLK_MQ_RQ_QUEUE_OK;
+ return ret;
}
-static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
+static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
+ unsigned long arg)
{
+ struct socket *sock;
struct nbd_sock **socks;
struct nbd_sock *nsock;
+ int err;
+
+ sock = sockfd_lookup(arg, &err);
+ if (!sock)
+ return err;
if (!nbd->task_setup)
nbd->task_setup = current;
@@ -596,33 +643,31 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
mutex_init(&nsock->tx_lock);
nsock->sock = sock;
+ nsock->pending = NULL;
+ nsock->sent = 0;
socks[nbd->num_connections++] = nsock;
+ if (max_part)
+ bdev->bd_invalidated = 1;
return 0;
}
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
- int i;
-
- for (i = 0; i < nbd->num_connections; i++)
- kfree(nbd->socks[i]);
- kfree(nbd->socks);
- nbd->socks = NULL;
nbd->runtime_flags = 0;
nbd->blksize = 1024;
nbd->bytesize = 0;
set_capacity(nbd->disk, 0);
nbd->flags = 0;
nbd->tag_set.timeout = 0;
- nbd->num_connections = 0;
- nbd->task_setup = NULL;
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
}
static void nbd_bdev_reset(struct block_device *bdev)
{
+ if (bdev->bd_openers > 1)
+ return;
set_device_ro(bdev, false);
bdev->bd_inode->i_size = 0;
if (max_part > 0) {
@@ -645,181 +690,181 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
static void send_disconnects(struct nbd_device *nbd)
{
- struct nbd_request request = {};
+ struct nbd_request request = {
+ .magic = htonl(NBD_REQUEST_MAGIC),
+ .type = htonl(NBD_CMD_DISC),
+ };
+ struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+ struct iov_iter from;
int i, ret;
- request.magic = htonl(NBD_REQUEST_MAGIC);
- request.type = htonl(NBD_CMD_DISC);
-
for (i = 0; i < nbd->num_connections; i++) {
- ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0);
+ iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+ ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret <= 0)
dev_err(disk_to_dev(nbd->disk),
"Send disconnect failed %d\n", ret);
}
}
-static int nbd_dev_dbg_init(struct nbd_device *nbd);
-static void nbd_dev_dbg_close(struct nbd_device *nbd);
+static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
+{
+ dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
+ if (!nbd->socks)
+ return -EINVAL;
-/* Must be called with config_lock held */
-static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
- unsigned int cmd, unsigned long arg)
+ mutex_unlock(&nbd->config_lock);
+ fsync_bdev(bdev);
+ mutex_lock(&nbd->config_lock);
+
+ /* Check again after getting mutex back. */
+ if (!nbd->socks)
+ return -EINVAL;
+
+ if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
+ &nbd->runtime_flags))
+ send_disconnects(nbd);
+ return 0;
+}
+
+static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
{
- switch (cmd) {
- case NBD_DISCONNECT: {
- dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
- if (!nbd->socks)
- return -EINVAL;
-
- mutex_unlock(&nbd->config_lock);
- fsync_bdev(bdev);
- mutex_lock(&nbd->config_lock);
-
- /* Check again after getting mutex back. */
- if (!nbd->socks)
- return -EINVAL;
-
- if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
- &nbd->runtime_flags))
- send_disconnects(nbd);
- return 0;
- }
+ sock_shutdown(nbd);
+ nbd_clear_que(nbd);
- case NBD_CLEAR_SOCK:
- sock_shutdown(nbd);
- nbd_clear_que(nbd);
- kill_bdev(bdev);
- nbd_bdev_reset(bdev);
- /*
- * We want to give the run thread a chance to wait for everybody
- * to clean up and then do it's own cleanup.
- */
- if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
- int i;
-
- for (i = 0; i < nbd->num_connections; i++)
- kfree(nbd->socks[i]);
- kfree(nbd->socks);
- nbd->socks = NULL;
- nbd->num_connections = 0;
- nbd->task_setup = NULL;
+ __invalidate_device(bdev, true);
+ nbd_bdev_reset(bdev);
+ /*
+ * We want to give the run thread a chance to wait for everybody
+ * to clean up and then do it's own cleanup.
+ */
+ if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
+ nbd->num_connections) {
+ int i;
+
+ for (i = 0; i < nbd->num_connections; i++) {
+ sockfd_put(nbd->socks[i]->sock);
+ kfree(nbd->socks[i]);
}
- return 0;
+ kfree(nbd->socks);
+ nbd->socks = NULL;
+ nbd->num_connections = 0;
+ }
+ nbd->task_setup = NULL;
- case NBD_SET_SOCK: {
- int err;
- struct socket *sock = sockfd_lookup(arg, &err);
+ return 0;
+}
+
+static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
+{
+ struct recv_thread_args *args;
+ int num_connections = nbd->num_connections;
+ int error = 0, i;
+
+ if (nbd->task_recv)
+ return -EBUSY;
+ if (!nbd->socks)
+ return -EINVAL;
+ if (num_connections > 1 &&
+ !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
+ dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
+ error = -EINVAL;
+ goto out_err;
+ }
- if (!sock)
- return err;
+ set_bit(NBD_RUNNING, &nbd->runtime_flags);
+ blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
+ args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
+ if (!args) {
+ error = -ENOMEM;
+ goto out_err;
+ }
+ nbd->task_recv = current;
+ mutex_unlock(&nbd->config_lock);
- err = nbd_add_socket(nbd, sock);
- if (!err && max_part)
- bdev->bd_invalidated = 1;
+ nbd_parse_flags(nbd, bdev);
- return err;
+ error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+ if (error) {
+ dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+ goto out_recv;
}
- case NBD_SET_BLKSIZE: {
- loff_t bsize = div_s64(nbd->bytesize, arg);
+ nbd_size_update(nbd, bdev);
- return nbd_size_set(nbd, bdev, arg, bsize);
+ nbd_dev_dbg_init(nbd);
+ for (i = 0; i < num_connections; i++) {
+ sk_set_memalloc(nbd->socks[i]->sock->sk);
+ atomic_inc(&nbd->recv_threads);
+ INIT_WORK(&args[i].work, recv_work);
+ args[i].nbd = nbd;
+ args[i].index = i;
+ queue_work(recv_workqueue, &args[i].work);
}
+ wait_event_interruptible(nbd->recv_wq,
+ atomic_read(&nbd->recv_threads) == 0);
+ for (i = 0; i < num_connections; i++)
+ flush_work(&args[i].work);
+ nbd_dev_dbg_close(nbd);
+ nbd_size_clear(nbd, bdev);
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+out_recv:
+ mutex_lock(&nbd->config_lock);
+ nbd->task_recv = NULL;
+out_err:
+ clear_bit(NBD_RUNNING, &nbd->runtime_flags);
+ nbd_clear_sock(nbd, bdev);
- case NBD_SET_SIZE:
- return nbd_size_set(nbd, bdev, nbd->blksize,
- div_s64(arg, nbd->blksize));
+ /* user requested, ignore socket errors */
+ if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
+ error = 0;
+ if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
+ error = -ETIMEDOUT;
- case NBD_SET_SIZE_BLOCKS:
- return nbd_size_set(nbd, bdev, nbd->blksize, arg);
+ nbd_reset(nbd);
+ return error;
+}
+/* Must be called with config_lock held */
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case NBD_DISCONNECT:
+ return nbd_disconnect(nbd, bdev);
+ case NBD_CLEAR_SOCK:
+ return nbd_clear_sock(nbd, bdev);
+ case NBD_SET_SOCK:
+ return nbd_add_socket(nbd, bdev, arg);
+ case NBD_SET_BLKSIZE:
+ nbd_size_set(nbd, bdev, arg,
+ div_s64(nbd->bytesize, arg));
+ return 0;
+ case NBD_SET_SIZE:
+ nbd_size_set(nbd, bdev, nbd->blksize,
+ div_s64(arg, nbd->blksize));
+ return 0;
+ case NBD_SET_SIZE_BLOCKS:
+ nbd_size_set(nbd, bdev, nbd->blksize, arg);
+ return 0;
case NBD_SET_TIMEOUT:
- nbd->tag_set.timeout = arg * HZ;
+ if (arg) {
+ nbd->tag_set.timeout = arg * HZ;
+ blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
+ }
return 0;
case NBD_SET_FLAGS:
nbd->flags = arg;
return 0;
-
- case NBD_DO_IT: {
- struct recv_thread_args *args;
- int num_connections = nbd->num_connections;
- int error = 0, i;
-
- if (nbd->task_recv)
- return -EBUSY;
- if (!nbd->socks)
- return -EINVAL;
- if (num_connections > 1 &&
- !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
- dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
- error = -EINVAL;
- goto out_err;
- }
-
- set_bit(NBD_RUNNING, &nbd->runtime_flags);
- blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
- args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
- if (!args) {
- error = -ENOMEM;
- goto out_err;
- }
- nbd->task_recv = current;
- mutex_unlock(&nbd->config_lock);
-
- nbd_parse_flags(nbd, bdev);
-
- error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
- if (error) {
- dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
- goto out_recv;
- }
-
- nbd_size_update(nbd, bdev);
-
- nbd_dev_dbg_init(nbd);
- for (i = 0; i < num_connections; i++) {
- sk_set_memalloc(nbd->socks[i]->sock->sk);
- atomic_inc(&nbd->recv_threads);
- INIT_WORK(&args[i].work, recv_work);
- args[i].nbd = nbd;
- args[i].index = i;
- queue_work(system_long_wq, &args[i].work);
- }
- wait_event_interruptible(nbd->recv_wq,
- atomic_read(&nbd->recv_threads) == 0);
- for (i = 0; i < num_connections; i++)
- flush_work(&args[i].work);
- nbd_dev_dbg_close(nbd);
- nbd_size_clear(nbd, bdev);
- device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
-out_recv:
- mutex_lock(&nbd->config_lock);
- nbd->task_recv = NULL;
-out_err:
- sock_shutdown(nbd);
- nbd_clear_que(nbd);
- kill_bdev(bdev);
- nbd_bdev_reset(bdev);
-
- /* user requested, ignore socket errors */
- if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
- error = 0;
- if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
- error = -ETIMEDOUT;
-
- nbd_reset(nbd);
- return error;
- }
-
+ case NBD_DO_IT:
+ return nbd_start_device(nbd, bdev);
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
* by NBD_DO_IT or NBD_CLEAR_SOCK.
*/
return 0;
-
case NBD_PRINT_DEBUG:
/*
* For compatibility only, we no longer keep a list of
@@ -996,6 +1041,103 @@ static struct blk_mq_ops nbd_mq_ops = {
.timeout = nbd_xmit_timeout,
};
+static void nbd_dev_remove(struct nbd_device *nbd)
+{
+ struct gendisk *disk = nbd->disk;
+ nbd->magic = 0;
+ if (disk) {
+ del_gendisk(disk);
+ blk_cleanup_queue(disk->queue);
+ blk_mq_free_tag_set(&nbd->tag_set);
+ put_disk(disk);
+ }
+ kfree(nbd);
+}
+
+static int nbd_dev_add(int index)
+{
+ struct nbd_device *nbd;
+ struct gendisk *disk;
+ struct request_queue *q;
+ int err = -ENOMEM;
+
+ nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
+ if (!nbd)
+ goto out;
+
+ disk = alloc_disk(1 << part_shift);
+ if (!disk)
+ goto out_free_nbd;
+
+ if (index >= 0) {
+ err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
+ GFP_KERNEL);
+ if (err == -ENOSPC)
+ err = -EEXIST;
+ } else {
+ err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
+ if (err >= 0)
+ index = err;
+ }
+ if (err < 0)
+ goto out_free_disk;
+
+ nbd->disk = disk;
+ nbd->tag_set.ops = &nbd_mq_ops;
+ nbd->tag_set.nr_hw_queues = 1;
+ nbd->tag_set.queue_depth = 128;
+ nbd->tag_set.numa_node = NUMA_NO_NODE;
+ nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
+ nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+ BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
+ nbd->tag_set.driver_data = nbd;
+
+ err = blk_mq_alloc_tag_set(&nbd->tag_set);
+ if (err)
+ goto out_free_idr;
+
+ q = blk_mq_init_queue(&nbd->tag_set);
+ if (IS_ERR(q)) {
+ err = PTR_ERR(q);
+ goto out_free_tags;
+ }
+ disk->queue = q;
+
+ /*
+ * Tell the block layer that we are not a rotational device
+ */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ disk->queue->limits.discard_granularity = 512;
+ blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
+ disk->queue->limits.discard_zeroes_data = 0;
+ blk_queue_max_hw_sectors(disk->queue, 65536);
+ disk->queue->limits.max_sectors = 256;
+
+ nbd->magic = NBD_MAGIC;
+ mutex_init(&nbd->config_lock);
+ disk->major = NBD_MAJOR;
+ disk->first_minor = index << part_shift;
+ disk->fops = &nbd_fops;
+ disk->private_data = nbd;
+ sprintf(disk->disk_name, "nbd%d", index);
+ init_waitqueue_head(&nbd->recv_wq);
+ nbd_reset(nbd);
+ add_disk(disk);
+ return index;
+
+out_free_tags:
+ blk_mq_free_tag_set(&nbd->tag_set);
+out_free_idr:
+ idr_remove(&nbd_index_idr, index);
+out_free_disk:
+ put_disk(disk);
+out_free_nbd:
+ kfree(nbd);
+out:
+ return err;
+}
+
/*
* And here should be modules and kernel interface
* (Just smiley confuses emacs :-)
@@ -1003,9 +1145,7 @@ static struct blk_mq_ops nbd_mq_ops = {
static int __init nbd_init(void)
{
- int err = -ENOMEM;
int i;
- int part_shift;
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
@@ -1034,111 +1174,40 @@ static int __init nbd_init(void)
if (nbds_max > 1UL << (MINORBITS - part_shift))
return -EINVAL;
-
- nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
- if (!nbd_dev)
+ recv_workqueue = alloc_workqueue("knbd-recv",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ if (!recv_workqueue)
return -ENOMEM;
- for (i = 0; i < nbds_max; i++) {
- struct request_queue *q;
- struct gendisk *disk = alloc_disk(1 << part_shift);
- if (!disk)
- goto out;
- nbd_dev[i].disk = disk;
-
- nbd_dev[i].tag_set.ops = &nbd_mq_ops;
- nbd_dev[i].tag_set.nr_hw_queues = 1;
- nbd_dev[i].tag_set.queue_depth = 128;
- nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
- nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
- nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
- BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
- nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
-
- err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
- if (err) {
- put_disk(disk);
- goto out;
- }
-
- /*
- * The new linux 2.5 block layer implementation requires
- * every gendisk to have its very own request_queue struct.
- * These structs are big so we dynamically allocate them.
- */
- q = blk_mq_init_queue(&nbd_dev[i].tag_set);
- if (IS_ERR(q)) {
- blk_mq_free_tag_set(&nbd_dev[i].tag_set);
- put_disk(disk);
- goto out;
- }
- disk->queue = q;
-
- /*
- * Tell the block layer that we are not a rotational device
- */
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
- disk->queue->limits.discard_granularity = 512;
- blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
- disk->queue->limits.discard_zeroes_data = 0;
- blk_queue_max_hw_sectors(disk->queue, 65536);
- disk->queue->limits.max_sectors = 256;
- }
-
if (register_blkdev(NBD_MAJOR, "nbd")) {
- err = -EIO;
- goto out;
+ destroy_workqueue(recv_workqueue);
+ return -EIO;
}
- printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
-
nbd_dbg_init();
- for (i = 0; i < nbds_max; i++) {
- struct gendisk *disk = nbd_dev[i].disk;
- nbd_dev[i].magic = NBD_MAGIC;
- mutex_init(&nbd_dev[i].config_lock);
- disk->major = NBD_MAJOR;
- disk->first_minor = i << part_shift;
- disk->fops = &nbd_fops;
- disk->private_data = &nbd_dev[i];
- sprintf(disk->disk_name, "nbd%d", i);
- init_waitqueue_head(&nbd_dev[i].recv_wq);
- nbd_reset(&nbd_dev[i]);
- add_disk(disk);
- }
+ mutex_lock(&nbd_index_mutex);
+ for (i = 0; i < nbds_max; i++)
+ nbd_dev_add(i);
+ mutex_unlock(&nbd_index_mutex);
+ return 0;
+}
+static int nbd_exit_cb(int id, void *ptr, void *data)
+{
+ struct nbd_device *nbd = ptr;
+ nbd_dev_remove(nbd);
return 0;
-out:
- while (i--) {
- blk_mq_free_tag_set(&nbd_dev[i].tag_set);
- blk_cleanup_queue(nbd_dev[i].disk->queue);
- put_disk(nbd_dev[i].disk);
- }
- kfree(nbd_dev);
- return err;
}
static void __exit nbd_cleanup(void)
{
- int i;
-
nbd_dbg_close();
- for (i = 0; i < nbds_max; i++) {
- struct gendisk *disk = nbd_dev[i].disk;
- nbd_dev[i].magic = 0;
- if (disk) {
- del_gendisk(disk);
- blk_cleanup_queue(disk->queue);
- blk_mq_free_tag_set(&nbd_dev[i].tag_set);
- put_disk(disk);
- }
- }
+ idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
+ idr_destroy(&nbd_index_idr);
+ destroy_workqueue(recv_workqueue);
unregister_blkdev(NBD_MAJOR, "nbd");
- kfree(nbd_dev);
- printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
}
module_init(nbd_init);