diff options
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r-- | drivers/block/nbd.c | 757 |
1 files changed, 413 insertions, 344 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9fd06eeb1a17..d8a23561b4cb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,9 +41,14 @@ #include <linux/nbd.h> +static DEFINE_IDR(nbd_index_idr); +static DEFINE_MUTEX(nbd_index_mutex); + struct nbd_sock { struct socket *sock; struct mutex tx_lock; + struct request *pending; + int sent; }; #define NBD_TIMEDOUT 0 @@ -89,8 +94,13 @@ static struct dentry *nbd_dbg_dir; #define NBD_MAGIC 0x68797548 static unsigned int nbds_max = 16; -static struct nbd_device *nbd_dev; static int max_part; +static struct workqueue_struct *recv_workqueue; +static int part_shift; + +static int nbd_dev_dbg_init(struct nbd_device *nbd); +static void nbd_dev_dbg_close(struct nbd_device *nbd); + static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -116,7 +126,8 @@ static const char *nbdcmd_to_ascii(int cmd) static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) { - bdev->bd_inode->i_size = 0; + if (bdev->bd_openers <= 1) + bd_set_size(bdev, 0); set_capacity(nbd->disk, 0); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -125,29 +136,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) { - if (!nbd_is_connected(nbd)) - return; - - bdev->bd_inode->i_size = nbd->bytesize; + blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize); + blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize); + bd_set_size(bdev, nbd->bytesize); set_capacity(nbd->disk, nbd->bytesize >> 9); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); } -static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, +static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, loff_t blocksize, loff_t nr_blocks) { - int ret; - - ret = set_blocksize(bdev, blocksize); - if (ret) - return ret; - nbd->blksize = blocksize; nbd->bytesize = blocksize * nr_blocks; - - nbd_size_update(nbd, bdev); - - return 0; + if (nbd_is_connected(nbd)) + nbd_size_update(nbd, bdev); } static void nbd_end_request(struct nbd_cmd *cmd) @@ -191,15 +193,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - req->errors++; + req->errors = -EIO; - /* - * If our disconnect packet times out then we're already holding the - * config_lock and could deadlock here, so just set an error and return, - * we'll handle shutting everything down later. - */ - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - return BLK_EH_HANDLED; mutex_lock(&nbd->config_lock); sock_shutdown(nbd); mutex_unlock(&nbd->config_lock); @@ -209,13 +204,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, /* * Send or receive packet. */ -static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, - int size, int msg_flags) +static int sock_xmit(struct nbd_device *nbd, int index, int send, + struct iov_iter *iter, int msg_flags, int *sent) { struct socket *sock = nbd->socks[index]->sock; int result; struct msghdr msg; - struct kvec iov; unsigned long pflags = current->flags; if (unlikely(!sock)) { @@ -225,11 +219,11 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, return -EINVAL; } + msg.msg_iter = *iter; + current->flags |= PF_MEMALLOC; do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; - iov.iov_base = buf; - iov.iov_len = size; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = NULL; @@ -237,58 +231,76 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (send) - result = kernel_sendmsg(sock, &msg, &iov, 1, size); + result = sock_sendmsg(sock, &msg); else - result = kernel_recvmsg(sock, &msg, &iov, 1, size, - msg.msg_flags); + result = sock_recvmsg(sock, &msg, msg.msg_flags); if (result <= 0) { if (result == 0) result = -EPIPE; /* short read */ break; } - size -= result; - buf += result; - } while (size > 0); + if (sent) + *sent += result; + } while (msg_data_left(&msg)); tsk_restore_flags(current, pflags, PF_MEMALLOC); return result; } -static inline int sock_send_bvec(struct nbd_device *nbd, int index, - struct bio_vec *bvec, int flags) -{ - int result; - void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset, - bvec->bv_len, flags); - kunmap(bvec->bv_page); - return result; -} - /* always call with the tx_lock held */ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); + struct nbd_sock *nsock = nbd->socks[index]; int result; - struct nbd_request request; + struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; + struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; + struct iov_iter from; unsigned long size = blk_rq_bytes(req); struct bio *bio; u32 type; u32 tag = blk_mq_unique_tag(req); + int sent = nsock->sent, skip = 0; + + iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); - if (req_op(req) == REQ_OP_DISCARD) + switch (req_op(req)) { + case REQ_OP_DISCARD: type = NBD_CMD_TRIM; - else if (req_op(req) == REQ_OP_FLUSH) + break; + case REQ_OP_FLUSH: type = NBD_CMD_FLUSH; - else if (rq_data_dir(req) == WRITE) + break; + case REQ_OP_WRITE: type = NBD_CMD_WRITE; - else + break; + case REQ_OP_READ: type = NBD_CMD_READ; + break; + default: + return -EIO; + } - memset(&request, 0, sizeof(request)); - request.magic = htonl(NBD_REQUEST_MAGIC); + if (rq_data_dir(req) == WRITE && + (nbd->flags & NBD_FLAG_READ_ONLY)) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Write on read-only\n"); + return -EIO; + } + + /* We did a partial send previously, and we at least sent the whole + * request struct, so just go and send the rest of the pages in the + * request. + */ + if (sent) { + if (sent >= sizeof(request)) { + skip = sent - sizeof(request); + goto send_pages; + } + iov_iter_advance(&from, sent); + } request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -299,16 +311,28 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); - result = sock_xmit(nbd, index, 1, &request, sizeof(request), - (type == NBD_CMD_WRITE) ? MSG_MORE : 0); + result = sock_xmit(nbd, index, 1, &from, + (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); if (result <= 0) { + if (result == -ERESTARTSYS) { + /* If we havne't sent anything we can just return BUSY, + * however if we have sent something we need to make + * sure we only allow this req to be sent until we are + * completely done. + */ + if (sent) { + nsock->pending = req; + nsock->sent = sent; + } + return BLK_MQ_RQ_QUEUE_BUSY; + } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); return -EIO; } - +send_pages: if (type != NBD_CMD_WRITE) - return 0; + goto out; bio = req->bio; while (bio) { @@ -322,8 +346,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); - result = sock_send_bvec(nbd, index, &bvec, flags); + iov_iter_bvec(&from, ITER_BVEC | WRITE, + &bvec, 1, bvec.bv_len); + if (skip) { + if (skip >= iov_iter_count(&from)) { + skip -= iov_iter_count(&from); + continue; + } + iov_iter_advance(&from, skip); + skip = 0; + } + result = sock_xmit(nbd, index, 1, &from, flags, &sent); if (result <= 0) { + if (result == -ERESTARTSYS) { + /* We've already sent the header, we + * have no choice but to set pending and + * return BUSY. + */ + nsock->pending = req; + nsock->sent = sent; + return BLK_MQ_RQ_QUEUE_BUSY; + } dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); @@ -340,20 +383,12 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } bio = next; } +out: + nsock->pending = NULL; + nsock->sent = 0; return 0; } -static inline int sock_recv_bvec(struct nbd_device *nbd, int index, - struct bio_vec *bvec) -{ - int result; - void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset, - bvec->bv_len, MSG_WAITALL); - kunmap(bvec->bv_page); - return result; -} - /* NULL returned = something went wrong, inform userspace */ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { @@ -363,9 +398,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct request *req = NULL; u16 hwq; u32 tag; + struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; + struct iov_iter to; reply.magic = 0; - result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL); + iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) @@ -395,7 +433,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); - req->errors++; + req->errors = -EIO; return cmd; } @@ -405,11 +443,13 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(nbd, index, &bvec); + iov_iter_bvec(&to, ITER_BVEC | READ, + &bvec, 1, bvec.bv_len); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); - req->errors++; + req->errors = -EIO; return cmd; } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", @@ -479,7 +519,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) if (!blk_mq_request_started(req)) return; cmd = blk_mq_rq_to_pdu(req); - req->errors++; + req->errors = -EIO; nbd_end_request(cmd); } @@ -492,34 +532,23 @@ static void nbd_clear_que(struct nbd_device *nbd) } -static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) +static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; struct nbd_sock *nsock; + int ret; if (index >= nbd->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on invalid socket\n"); - goto error_out; + return -EINVAL; } if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); - goto error_out; - } - - if (req->cmd_type != REQ_TYPE_FS && - req->cmd_type != REQ_TYPE_DRV_PRIV) - goto error_out; - - if (req->cmd_type == REQ_TYPE_FS && - rq_data_dir(req) == WRITE && - (nbd->flags & NBD_FLAG_READ_ONLY)) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Write on read-only\n"); - goto error_out; + return -EINVAL; } req->errors = 0; @@ -530,29 +559,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) mutex_unlock(&nsock->tx_lock); dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); - goto error_out; + return -EINVAL; } - if (nbd_send_cmd(nbd, cmd, index) != 0) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Request send failed\n"); - req->errors++; - nbd_end_request(cmd); + /* Handle the case that we have a pending request that was partially + * transmitted that _has_ to be serviced first. We need to call requeue + * here so that it gets put _after_ the request that is already on the + * dispatch list. + */ + if (unlikely(nsock->pending && nsock->pending != req)) { + blk_mq_requeue_request(req, true); + ret = 0; + goto out; } - + ret = nbd_send_cmd(nbd, cmd, index); +out: mutex_unlock(&nsock->tx_lock); - - return; - -error_out: - req->errors++; - nbd_end_request(cmd); + return ret; } static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + int ret; /* * Since we look at the bio's to send the request over the network we @@ -565,16 +595,33 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, */ init_completion(&cmd->send_complete); blk_mq_start_request(bd->rq); - nbd_handle_cmd(cmd, hctx->queue_num); + + /* We can be called directly from the user space process, which means we + * could possibly have signals pending so our sendmsg will fail. In + * this case we need to return that we are busy, otherwise error out as + * appropriate. + */ + ret = nbd_handle_cmd(cmd, hctx->queue_num); + if (ret < 0) + ret = BLK_MQ_RQ_QUEUE_ERROR; + if (!ret) + ret = BLK_MQ_RQ_QUEUE_OK; complete(&cmd->send_complete); - return BLK_MQ_RQ_QUEUE_OK; + return ret; } -static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) +static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, + unsigned long arg) { + struct socket *sock; struct nbd_sock **socks; struct nbd_sock *nsock; + int err; + + sock = sockfd_lookup(arg, &err); + if (!sock) + return err; if (!nbd->task_setup) nbd->task_setup = current; @@ -596,33 +643,31 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) mutex_init(&nsock->tx_lock); nsock->sock = sock; + nsock->pending = NULL; + nsock->sent = 0; socks[nbd->num_connections++] = nsock; + if (max_part) + bdev->bd_invalidated = 1; return 0; } /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; nbd->tag_set.timeout = 0; - nbd->num_connections = 0; - nbd->task_setup = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } static void nbd_bdev_reset(struct block_device *bdev) { + if (bdev->bd_openers > 1) + return; set_device_ro(bdev, false); bdev->bd_inode->i_size = 0; if (max_part > 0) { @@ -645,181 +690,181 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) static void send_disconnects(struct nbd_device *nbd) { - struct nbd_request request = {}; + struct nbd_request request = { + .magic = htonl(NBD_REQUEST_MAGIC), + .type = htonl(NBD_CMD_DISC), + }; + struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; + struct iov_iter from; int i, ret; - request.magic = htonl(NBD_REQUEST_MAGIC); - request.type = htonl(NBD_CMD_DISC); - for (i = 0; i < nbd->num_connections; i++) { - ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0); + iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); + ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) dev_err(disk_to_dev(nbd->disk), "Send disconnect failed %d\n", ret); } } -static int nbd_dev_dbg_init(struct nbd_device *nbd); -static void nbd_dev_dbg_close(struct nbd_device *nbd); +static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev) +{ + dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); + if (!nbd->socks) + return -EINVAL; -/* Must be called with config_lock held */ -static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, - unsigned int cmd, unsigned long arg) + mutex_unlock(&nbd->config_lock); + fsync_bdev(bdev); + mutex_lock(&nbd->config_lock); + + /* Check again after getting mutex back. */ + if (!nbd->socks) + return -EINVAL; + + if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, + &nbd->runtime_flags)) + send_disconnects(nbd); + return 0; +} + +static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) { - switch (cmd) { - case NBD_DISCONNECT: { - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->socks) - return -EINVAL; - - mutex_unlock(&nbd->config_lock); - fsync_bdev(bdev); - mutex_lock(&nbd->config_lock); - - /* Check again after getting mutex back. */ - if (!nbd->socks) - return -EINVAL; - - if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, - &nbd->runtime_flags)) - send_disconnects(nbd); - return 0; - } + sock_shutdown(nbd); + nbd_clear_que(nbd); - case NBD_CLEAR_SOCK: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - /* - * We want to give the run thread a chance to wait for everybody - * to clean up and then do it's own cleanup. - */ - if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) { - int i; - - for (i = 0; i < nbd->num_connections; i++) - kfree(nbd->socks[i]); - kfree(nbd->socks); - nbd->socks = NULL; - nbd->num_connections = 0; - nbd->task_setup = NULL; + __invalidate_device(bdev, true); + nbd_bdev_reset(bdev); + /* + * We want to give the run thread a chance to wait for everybody + * to clean up and then do it's own cleanup. + */ + if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) && + nbd->num_connections) { + int i; + + for (i = 0; i < nbd->num_connections; i++) { + sockfd_put(nbd->socks[i]->sock); + kfree(nbd->socks[i]); } - return 0; + kfree(nbd->socks); + nbd->socks = NULL; + nbd->num_connections = 0; + } + nbd->task_setup = NULL; - case NBD_SET_SOCK: { - int err; - struct socket *sock = sockfd_lookup(arg, &err); + return 0; +} + +static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev) +{ + struct recv_thread_args *args; + int num_connections = nbd->num_connections; + int error = 0, i; + + if (nbd->task_recv) + return -EBUSY; + if (!nbd->socks) + return -EINVAL; + if (num_connections > 1 && + !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); + error = -EINVAL; + goto out_err; + } - if (!sock) - return err; + set_bit(NBD_RUNNING, &nbd->runtime_flags); + blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); + args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); + if (!args) { + error = -ENOMEM; + goto out_err; + } + nbd->task_recv = current; + mutex_unlock(&nbd->config_lock); - err = nbd_add_socket(nbd, sock); - if (!err && max_part) - bdev->bd_invalidated = 1; + nbd_parse_flags(nbd, bdev); - return err; + error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (error) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + goto out_recv; } - case NBD_SET_BLKSIZE: { - loff_t bsize = div_s64(nbd->bytesize, arg); + nbd_size_update(nbd, bdev); - return nbd_size_set(nbd, bdev, arg, bsize); + nbd_dev_dbg_init(nbd); + for (i = 0; i < num_connections; i++) { + sk_set_memalloc(nbd->socks[i]->sock->sk); + atomic_inc(&nbd->recv_threads); + INIT_WORK(&args[i].work, recv_work); + args[i].nbd = nbd; + args[i].index = i; + queue_work(recv_workqueue, &args[i].work); } + wait_event_interruptible(nbd->recv_wq, + atomic_read(&nbd->recv_threads) == 0); + for (i = 0; i < num_connections; i++) + flush_work(&args[i].work); + nbd_dev_dbg_close(nbd); + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +out_recv: + mutex_lock(&nbd->config_lock); + nbd->task_recv = NULL; +out_err: + clear_bit(NBD_RUNNING, &nbd->runtime_flags); + nbd_clear_sock(nbd, bdev); - case NBD_SET_SIZE: - return nbd_size_set(nbd, bdev, nbd->blksize, - div_s64(arg, nbd->blksize)); + /* user requested, ignore socket errors */ + if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + error = 0; + if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) + error = -ETIMEDOUT; - case NBD_SET_SIZE_BLOCKS: - return nbd_size_set(nbd, bdev, nbd->blksize, arg); + nbd_reset(nbd); + return error; +} +/* Must be called with config_lock held */ +static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, + unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case NBD_DISCONNECT: + return nbd_disconnect(nbd, bdev); + case NBD_CLEAR_SOCK: + return nbd_clear_sock(nbd, bdev); + case NBD_SET_SOCK: + return nbd_add_socket(nbd, bdev, arg); + case NBD_SET_BLKSIZE: + nbd_size_set(nbd, bdev, arg, + div_s64(nbd->bytesize, arg)); + return 0; + case NBD_SET_SIZE: + nbd_size_set(nbd, bdev, nbd->blksize, + div_s64(arg, nbd->blksize)); + return 0; + case NBD_SET_SIZE_BLOCKS: + nbd_size_set(nbd, bdev, nbd->blksize, arg); + return 0; case NBD_SET_TIMEOUT: - nbd->tag_set.timeout = arg * HZ; + if (arg) { + nbd->tag_set.timeout = arg * HZ; + blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); + } return 0; case NBD_SET_FLAGS: nbd->flags = arg; return 0; - - case NBD_DO_IT: { - struct recv_thread_args *args; - int num_connections = nbd->num_connections; - int error = 0, i; - - if (nbd->task_recv) - return -EBUSY; - if (!nbd->socks) - return -EINVAL; - if (num_connections > 1 && - !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { - dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); - error = -EINVAL; - goto out_err; - } - - set_bit(NBD_RUNNING, &nbd->runtime_flags); - blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); - args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); - if (!args) { - error = -ENOMEM; - goto out_err; - } - nbd->task_recv = current; - mutex_unlock(&nbd->config_lock); - - nbd_parse_flags(nbd, bdev); - - error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); - if (error) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - goto out_recv; - } - - nbd_size_update(nbd, bdev); - - nbd_dev_dbg_init(nbd); - for (i = 0; i < num_connections; i++) { - sk_set_memalloc(nbd->socks[i]->sock->sk); - atomic_inc(&nbd->recv_threads); - INIT_WORK(&args[i].work, recv_work); - args[i].nbd = nbd; - args[i].index = i; - queue_work(system_long_wq, &args[i].work); - } - wait_event_interruptible(nbd->recv_wq, - atomic_read(&nbd->recv_threads) == 0); - for (i = 0; i < num_connections; i++) - flush_work(&args[i].work); - nbd_dev_dbg_close(nbd); - nbd_size_clear(nbd, bdev); - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); -out_recv: - mutex_lock(&nbd->config_lock); - nbd->task_recv = NULL; -out_err: - sock_shutdown(nbd); - nbd_clear_que(nbd); - kill_bdev(bdev); - nbd_bdev_reset(bdev); - - /* user requested, ignore socket errors */ - if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) - error = 0; - if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags)) - error = -ETIMEDOUT; - - nbd_reset(nbd); - return error; - } - + case NBD_DO_IT: + return nbd_start_device(nbd, bdev); case NBD_CLEAR_QUE: /* * This is for compatibility only. The queue is always cleared * by NBD_DO_IT or NBD_CLEAR_SOCK. */ return 0; - case NBD_PRINT_DEBUG: /* * For compatibility only, we no longer keep a list of @@ -996,6 +1041,103 @@ static struct blk_mq_ops nbd_mq_ops = { .timeout = nbd_xmit_timeout, }; +static void nbd_dev_remove(struct nbd_device *nbd) +{ + struct gendisk *disk = nbd->disk; + nbd->magic = 0; + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd->tag_set); + put_disk(disk); + } + kfree(nbd); +} + +static int nbd_dev_add(int index) +{ + struct nbd_device *nbd; + struct gendisk *disk; + struct request_queue *q; + int err = -ENOMEM; + + nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); + if (!nbd) + goto out; + + disk = alloc_disk(1 << part_shift); + if (!disk) + goto out_free_nbd; + + if (index >= 0) { + err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); + if (err >= 0) + index = err; + } + if (err < 0) + goto out_free_disk; + + nbd->disk = disk; + nbd->tag_set.ops = &nbd_mq_ops; + nbd->tag_set.nr_hw_queues = 1; + nbd->tag_set.queue_depth = 128; + nbd->tag_set.numa_node = NUMA_NO_NODE; + nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); + nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; + nbd->tag_set.driver_data = nbd; + + err = blk_mq_alloc_tag_set(&nbd->tag_set); + if (err) + goto out_free_idr; + + q = blk_mq_init_queue(&nbd->tag_set); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto out_free_tags; + } + disk->queue = q; + + /* + * Tell the block layer that we are not a rotational device + */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); + disk->queue->limits.discard_granularity = 512; + blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_hw_sectors(disk->queue, 65536); + disk->queue->limits.max_sectors = 256; + + nbd->magic = NBD_MAGIC; + mutex_init(&nbd->config_lock); + disk->major = NBD_MAJOR; + disk->first_minor = index << part_shift; + disk->fops = &nbd_fops; + disk->private_data = nbd; + sprintf(disk->disk_name, "nbd%d", index); + init_waitqueue_head(&nbd->recv_wq); + nbd_reset(nbd); + add_disk(disk); + return index; + +out_free_tags: + blk_mq_free_tag_set(&nbd->tag_set); +out_free_idr: + idr_remove(&nbd_index_idr, index); +out_free_disk: + put_disk(disk); +out_free_nbd: + kfree(nbd); +out: + return err; +} + /* * And here should be modules and kernel interface * (Just smiley confuses emacs :-) @@ -1003,9 +1145,7 @@ static struct blk_mq_ops nbd_mq_ops = { static int __init nbd_init(void) { - int err = -ENOMEM; int i; - int part_shift; BUILD_BUG_ON(sizeof(struct nbd_request) != 28); @@ -1034,111 +1174,40 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; - - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) + recv_workqueue = alloc_workqueue("knbd-recv", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!recv_workqueue) return -ENOMEM; - for (i = 0; i < nbds_max; i++) { - struct request_queue *q; - struct gendisk *disk = alloc_disk(1 << part_shift); - if (!disk) - goto out; - nbd_dev[i].disk = disk; - - nbd_dev[i].tag_set.ops = &nbd_mq_ops; - nbd_dev[i].tag_set.nr_hw_queues = 1; - nbd_dev[i].tag_set.queue_depth = 128; - nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; - nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); - nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; - nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; - - err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); - if (err) { - put_disk(disk); - goto out; - } - - /* - * The new linux 2.5 block layer implementation requires - * every gendisk to have its very own request_queue struct. - * These structs are big so we dynamically allocate them. - */ - q = blk_mq_init_queue(&nbd_dev[i].tag_set); - if (IS_ERR(q)) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - goto out; - } - disk->queue = q; - - /* - * Tell the block layer that we are not a rotational device - */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); - disk->queue->limits.discard_granularity = 512; - blk_queue_max_discard_sectors(disk->queue, UINT_MAX); - disk->queue->limits.discard_zeroes_data = 0; - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; - } - if (register_blkdev(NBD_MAJOR, "nbd")) { - err = -EIO; - goto out; + destroy_workqueue(recv_workqueue); + return -EIO; } - printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); - nbd_dbg_init(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = NBD_MAGIC; - mutex_init(&nbd_dev[i].config_lock); - disk->major = NBD_MAJOR; - disk->first_minor = i << part_shift; - disk->fops = &nbd_fops; - disk->private_data = &nbd_dev[i]; - sprintf(disk->disk_name, "nbd%d", i); - init_waitqueue_head(&nbd_dev[i].recv_wq); - nbd_reset(&nbd_dev[i]); - add_disk(disk); - } + mutex_lock(&nbd_index_mutex); + for (i = 0; i < nbds_max; i++) + nbd_dev_add(i); + mutex_unlock(&nbd_index_mutex); + return 0; +} +static int nbd_exit_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + nbd_dev_remove(nbd); return 0; -out: - while (i--) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - blk_cleanup_queue(nbd_dev[i].disk->queue); - put_disk(nbd_dev[i].disk); - } - kfree(nbd_dev); - return err; } static void __exit nbd_cleanup(void) { - int i; - nbd_dbg_close(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = 0; - if (disk) { - del_gendisk(disk); - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - } - } + idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); + idr_destroy(&nbd_index_idr); + destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); - kfree(nbd_dev); - printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR); } module_init(nbd_init); |