aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c205
1 files changed, 134 insertions, 71 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..b47b2a87ae8f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -97,13 +97,18 @@ struct nbd_config {
atomic_t recv_threads;
wait_queue_head_t recv_wq;
- loff_t blksize;
+ unsigned int blksize_bits;
loff_t bytesize;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
};
+static inline unsigned int nbd_blksize(struct nbd_config *config)
+{
+ return 1u << config->blksize_bits;
+}
+
struct nbd_device {
struct blk_mq_tag_set tag_set;
@@ -117,15 +122,21 @@ struct nbd_device {
struct work_struct remove_work;
struct list_head list;
- struct task_struct *task_recv;
struct task_struct *task_setup;
unsigned long flags;
+ pid_t pid; /* pid of nbd-client, if attached */
char *backend;
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_queue_rq() succeed, and will be checked and
+ * cleared in completion. Both setting and clearing of the flag are protected
+ * by cmd->lock.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -146,7 +157,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
-#define NBD_DEF_BLKSIZE 1024
+#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
static int max_part = 16;
@@ -212,7 +223,7 @@ static ssize_t pid_show(struct device *dev,
struct gendisk *disk = dev_to_disk(dev);
struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
- return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+ return sprintf(buf, "%d\n", nbd->pid);
}
static const struct device_attribute pid_attr = {
@@ -305,26 +316,19 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
nsock->sent = 0;
}
-static void nbd_size_clear(struct nbd_device *nbd)
-{
- if (nbd->config->bytesize) {
- set_capacity(nbd->disk, 0);
- kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
- }
-}
-
static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
if (!blksize)
- blksize = NBD_DEF_BLKSIZE;
- if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
+ blksize = 1u << NBD_DEF_BLKSIZE_BITS;
+
+ if (blk_validate_block_size(blksize))
return -EINVAL;
nbd->config->bytesize = bytesize;
- nbd->config->blksize = blksize;
+ nbd->config->blksize_bits = __ffs(blksize);
- if (!nbd->task_recv)
+ if (!nbd->pid)
return 0;
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
@@ -400,6 +404,11 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return BLK_EH_DONE;
+ }
+
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -479,7 +488,8 @@ done:
}
/*
- * Send or receive packet.
+ * Send or receive packet. Return a positive value on success and
+ * negtive value on failue, and never return 0.
*/
static int sock_xmit(struct nbd_device *nbd, int index, int send,
struct iov_iter *iter, int msg_flags, int *sent)
@@ -605,7 +615,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
trace_nbd_header_sent(req, handle);
- if (result <= 0) {
+ if (result < 0) {
if (was_interrupted(result)) {
/* If we havne't sent anything we can just return BUSY,
* however if we have sent something we need to make
@@ -649,7 +659,7 @@ send_pages:
skip = 0;
}
result = sock_xmit(nbd, index, 1, &from, flags, &sent);
- if (result <= 0) {
+ if (result < 0) {
if (was_interrupted(result)) {
/* We've already sent the header, we
* have no choice but to set pending and
@@ -683,38 +693,45 @@ out:
return 0;
}
-/* NULL returned = something went wrong, inform userspace */
-static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
+static int nbd_read_reply(struct nbd_device *nbd, int index,
+ struct nbd_reply *reply)
{
- struct nbd_config *config = nbd->config;
- int result;
- struct nbd_reply reply;
- struct nbd_cmd *cmd;
- struct request *req = NULL;
- u64 handle;
- u16 hwq;
- u32 tag;
- struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
+ struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
struct iov_iter to;
- int ret = 0;
+ int result;
- reply.magic = 0;
- iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
+ reply->magic = 0;
+ iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
- if (result <= 0) {
- if (!nbd_disconnected(config))
+ if (result < 0) {
+ if (!nbd_disconnected(nbd->config))
dev_err(disk_to_dev(nbd->disk),
"Receive control failed (result %d)\n", result);
- return ERR_PTR(result);
+ return result;
}
- if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
+ if (ntohl(reply->magic) != NBD_REPLY_MAGIC) {
dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
- (unsigned long)ntohl(reply.magic));
- return ERR_PTR(-EPROTO);
+ (unsigned long)ntohl(reply->magic));
+ return -EPROTO;
}
- memcpy(&handle, reply.handle, sizeof(handle));
+ return 0;
+}
+
+/* NULL returned = something went wrong, inform userspace */
+static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
+ struct nbd_reply *reply)
+{
+ int result;
+ struct nbd_cmd *cmd;
+ struct request *req = NULL;
+ u64 handle;
+ u16 hwq;
+ u32 tag;
+ int ret = 0;
+
+ memcpy(&handle, reply->handle, sizeof(handle));
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
@@ -729,6 +746,16 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
+ tag, cmd->status, cmd->flags);
+ ret = -ENOENT;
+ goto out;
+ }
+ if (cmd->index != index) {
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)",
+ tag, index, cmd->index);
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -747,9 +774,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
- if (ntohl(reply.error)) {
+ if (ntohl(reply->error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
- ntohl(reply.error));
+ ntohl(reply->error));
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -758,11 +785,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (rq_data_dir(req) != WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
+ struct iov_iter to;
rq_for_each_segment(bvec, req, iter) {
iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
- if (result <= 0) {
+ if (result < 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
/*
@@ -771,7 +799,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
* and let the timeout stuff handle resubmitting
* this request onto another connection.
*/
- if (nbd_disconnected(config)) {
+ if (nbd_disconnected(nbd->config)) {
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -795,24 +823,46 @@ static void recv_work(struct work_struct *work)
work);
struct nbd_device *nbd = args->nbd;
struct nbd_config *config = nbd->config;
+ struct request_queue *q = nbd->disk->queue;
+ struct nbd_sock *nsock;
struct nbd_cmd *cmd;
struct request *rq;
while (1) {
- cmd = nbd_read_stat(nbd, args->index);
- if (IS_ERR(cmd)) {
- struct nbd_sock *nsock = config->socks[args->index];
+ struct nbd_reply reply;
- mutex_lock(&nsock->tx_lock);
- nbd_mark_nsock_dead(nbd, nsock, 1);
- mutex_unlock(&nsock->tx_lock);
+ if (nbd_read_reply(nbd, args->index, &reply))
+ break;
+
+ /*
+ * Grab .q_usage_counter so request pool won't go away, then no
+ * request use-after-free is possible during nbd_handle_reply().
+ * If queue is frozen, there won't be any inflight requests, we
+ * needn't to handle the incoming garbage message.
+ */
+ if (!percpu_ref_tryget(&q->q_usage_counter)) {
+ dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n",
+ __func__);
+ break;
+ }
+
+ cmd = nbd_handle_reply(nbd, args->index, &reply);
+ if (IS_ERR(cmd)) {
+ percpu_ref_put(&q->q_usage_counter);
break;
}
rq = blk_mq_rq_from_pdu(cmd);
if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq);
+ percpu_ref_put(&q->q_usage_counter);
}
+
+ nsock = config->socks[args->index];
+ mutex_lock(&nsock->tx_lock);
+ nbd_mark_nsock_dead(nbd, nsock, 1);
+ mutex_unlock(&nsock->tx_lock);
+
nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
@@ -828,6 +878,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
return true;
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return true;
+ }
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
@@ -909,7 +963,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
if (!refcount_inc_not_zero(&nbd->config_refs)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Socks array is empty\n");
- blk_mq_start_request(req);
return -EINVAL;
}
config = nbd->config;
@@ -918,7 +971,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EINVAL;
}
cmd->status = BLK_STS_OK;
@@ -942,7 +994,6 @@ again:
*/
sock_shutdown(nbd);
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EIO;
}
goto again;
@@ -964,7 +1015,13 @@ again:
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
- if (ret == -EAGAIN) {
+ /*
+ * Access to this flag is protected by cmd->lock, thus it's safe to set
+ * the flag after nbd_send_cmd() succeed to send request to server.
+ */
+ if (!ret)
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ else if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
@@ -1201,7 +1258,7 @@ static void send_disconnects(struct nbd_device *nbd)
iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
- if (ret <= 0)
+ if (ret < 0)
dev_err(disk_to_dev(nbd->disk),
"Send disconnect failed %d\n", ret);
mutex_unlock(&nsock->tx_lock);
@@ -1232,11 +1289,13 @@ static void nbd_config_put(struct nbd_device *nbd)
&nbd->config_lock)) {
struct nbd_config *config = nbd->config;
nbd_dev_dbg_close(nbd);
- nbd_size_clear(nbd);
+ invalidate_disk(nbd->disk);
+ if (nbd->config->bytesize)
+ kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
- nbd->task_recv = NULL;
+ nbd->pid = 0;
if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
&config->runtime_flags)) {
device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
@@ -1277,7 +1336,7 @@ static int nbd_start_device(struct nbd_device *nbd)
int num_connections = config->num_connections;
int error = 0, i;
- if (nbd->task_recv)
+ if (nbd->pid)
return -EBUSY;
if (!config->socks)
return -EINVAL;
@@ -1296,7 +1355,7 @@ static int nbd_start_device(struct nbd_device *nbd)
}
blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
- nbd->task_recv = current;
+ nbd->pid = task_pid_nr(current);
nbd_parse_flags(nbd);
@@ -1337,7 +1396,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- return nbd_set_size(nbd, config->bytesize, config->blksize);
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
@@ -1406,11 +1465,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_BLKSIZE:
return nbd_set_size(nbd, config->bytesize, arg);
case NBD_SET_SIZE:
- return nbd_set_size(nbd, arg, config->blksize);
+ return nbd_set_size(nbd, arg, nbd_blksize(config));
case NBD_SET_SIZE_BLOCKS:
- if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
return -EINVAL;
- return nbd_set_size(nbd, bytesize, config->blksize);
+ return nbd_set_size(nbd, bytesize, nbd_blksize(config));
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1476,7 +1535,7 @@ static struct nbd_config *nbd_alloc_config(void)
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
- config->blksize = NBD_DEF_BLKSIZE;
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
@@ -1552,8 +1611,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
struct nbd_device *nbd = s->private;
- if (nbd->task_recv)
- seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
+ if (nbd->pid)
+ seq_printf(s, "recv: %d\n", nbd->pid);
return 0;
}
@@ -1604,7 +1663,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
- debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
+ debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
return 0;
@@ -1757,7 +1816,9 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
- add_disk(disk);
+ err = add_disk(disk);
+ if (err)
+ goto out_err_disk;
/*
* Now publish the device.
@@ -1766,6 +1827,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
nbd_total_devices++;
return nbd;
+out_err_disk:
+ blk_cleanup_disk(disk);
out_free_idr:
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
@@ -1826,7 +1889,7 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
- u64 bsize = config->blksize;
+ u64 bsize = nbd_blksize(config);
u64 bytes = config->bytesize;
if (info->attrs[NBD_ATTR_SIZE_BYTES])
@@ -1835,7 +1898,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (bytes != config->bytesize || bsize != config->blksize)
+ if (bytes != config->bytesize || bsize != nbd_blksize(config))
return nbd_set_size(nbd, bytes, bsize);
return 0;
}
@@ -2130,7 +2193,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&nbd->config_lock);
config = nbd->config;
if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
- !nbd->task_recv) {
+ !nbd->pid) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
ret = -EINVAL;