aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r--drivers/block/loop.c249
1 files changed, 118 insertions, 131 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ef8334949b42..85de67334695 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,16 +213,18 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio)
+ if (use_dio) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- else
+ } else {
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+ }
blk_mq_unfreeze_queue(lo->lo_queue);
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
- loff_t logical_blocksize)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -234,12 +236,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
- lo->lo_logical_blocksize = logical_blocksize;
- blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
- blk_queue_logical_block_size(lo->lo_queue,
- lo->lo_logical_blocksize);
- }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -467,12 +463,21 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
+static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
+{
+ if (!atomic_dec_and_test(&cmd->ref))
+ return;
+ kfree(cmd->bvec);
+ cmd->bvec = NULL;
+ blk_mq_complete_request(cmd->rq);
+}
+
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
cmd->ret = ret;
- blk_mq_complete_request(cmd->rq);
+ lo_rw_aio_do_completion(cmd);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -480,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
{
struct iov_iter iter;
struct bio_vec *bvec;
- struct bio *bio = cmd->rq->bio;
+ struct request *rq = cmd->rq;
+ struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file;
+ unsigned int offset;
+ int segments = 0;
int ret;
- /* nomerge for loop request queue */
- WARN_ON(cmd->rq->bio != cmd->rq->biotail);
+ if (rq->bio != rq->biotail) {
+ struct req_iterator iter;
+ struct bio_vec tmp;
+
+ __rq_for_each_bio(bio, rq)
+ segments += bio_segments(bio);
+ bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
+ if (!bvec)
+ return -EIO;
+ cmd->bvec = bvec;
+
+ /*
+ * The bios of the request may be started from the middle of
+ * the 'bvec' because of bio splitting, so we can't directly
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
+ * API will take care of all details for us.
+ */
+ rq_for_each_segment(tmp, rq, iter) {
+ *bvec = tmp;
+ bvec++;
+ }
+ bvec = cmd->bvec;
+ offset = 0;
+ } else {
+ /*
+ * Same here, this bio may be started from the middle of the
+ * 'bvec' because of bio splitting, so offset from the bvec
+ * must be passed to iov iterator
+ */
+ offset = bio->bi_iter.bi_bvec_done;
+ bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+ segments = bio_segments(bio);
+ }
+ atomic_set(&cmd->ref, 2);
- bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
- bio_segments(bio), blk_rq_bytes(cmd->rq));
- /*
- * This bio may be started from the middle of the 'bvec'
- * because of bio splitting, so offset from the bvec must
- * be passed to iov iterator
- */
- iter.iov_offset = bio->bi_iter.bi_bvec_done;
+ segments, blk_rq_bytes(rq));
+ iter.iov_offset = offset;
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
@@ -507,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
+ lo_rw_aio_do_completion(cmd);
+
if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
return 0;
@@ -553,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
}
}
-struct switch_request {
- struct file *file;
- struct completion wait;
-};
-
static inline void loop_update_dio(struct loop_device *lo)
{
__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
lo->use_dio);
}
-/*
- * Do the actual switch; called from the BIO completion routine
- */
-static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
-{
- struct file *file = p->file;
- struct file *old_file = lo->lo_backing_file;
- struct address_space *mapping;
-
- /* if no new file, only flush of queued bios requested */
- if (!file)
- return;
-
- mapping = file->f_mapping;
- mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
- lo->lo_backing_file = file;
- lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
- mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- loop_update_dio(lo);
-}
-
-/*
- * loop_switch performs the hard work of switching a backing store.
- * First it needs to flush existing IO, it does this by sending a magic
- * BIO down the pipe. The completion of this BIO does the actual switch.
- */
-static int loop_switch(struct loop_device *lo, struct file *file)
-{
- struct switch_request w;
-
- w.file = file;
-
- /* freeze queue and wait for completion of scheduled requests */
- blk_mq_freeze_queue(lo->lo_queue);
-
- /* do the switch action */
- do_loop_switch(lo, &w);
-
- /* unfreeze */
- blk_mq_unfreeze_queue(lo->lo_queue);
-
- return 0;
-}
-
-/*
- * Helper to flush the IOs in loop, but keeping loop thread running
- */
-static int loop_flush(struct loop_device *lo)
-{
- /* loop not yet configured, no running thread, nothing to flush */
- if (lo->lo_state != Lo_bound)
- return 0;
- return loop_switch(lo, NULL);
-}
-
static void loop_reread_partitions(struct loop_device *lo,
struct block_device *bdev)
{
@@ -685,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf;
/* and ... switch */
- error = loop_switch(lo, file);
- if (error)
- goto out_putf;
+ blk_mq_freeze_queue(lo->lo_queue);
+ mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
+ lo->lo_backing_file = file;
+ lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
+ mapping_set_gfp_mask(file->f_mapping,
+ lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_update_dio(lo);
+ blk_mq_unfreeze_queue(lo->lo_queue);
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
@@ -820,7 +799,6 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
- int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -840,11 +818,9 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
- lo_bits = blksize_bits(lo->lo_logical_blocksize);
- blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -877,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct file *file, *f;
struct inode *inode;
struct address_space *mapping;
- unsigned lo_blocksize;
int lo_flags = 0;
int error;
loff_t size;
@@ -921,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- lo_blocksize = S_ISBLK(inode->i_mode) ?
- inode->i_bdev->bd_block_size : PAGE_SIZE;
-
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
@@ -937,8 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
- lo->lo_blocksize = lo_blocksize;
- lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -958,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
- set_blocksize(bdev, lo_blocksize);
+ set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
+ block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound;
if (part_shift)
@@ -1064,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo)
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+ blk_queue_logical_block_size(lo->lo_queue, 512);
+ blk_queue_physical_block_size(lo->lo_queue, 512);
+ blk_queue_io_min(lo->lo_queue, 512);
if (bdev) {
bdput(bdev);
invalidate_bdev(bdev);
@@ -1104,7 +1078,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1137,26 +1110,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
- if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
- if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
- lo->lo_logical_blocksize = 512;
- lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
- if (LO_INFO_BLOCKSIZE(info) != 512 &&
- LO_INFO_BLOCKSIZE(info) != 1024 &&
- LO_INFO_BLOCKSIZE(info) != 2048 &&
- LO_INFO_BLOCKSIZE(info) != 4096)
- return -EINVAL;
- if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
- return -EINVAL;
- }
-
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit ||
- lo->lo_flags != lo_flags ||
- ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
- lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
- LO_INFO_BLOCKSIZE(info))) {
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto exit;
}
@@ -1348,8 +1304,7 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
- lo->lo_logical_blocksize);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1366,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
return error;
}
+static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
+{
+ if (lo->lo_state != Lo_bound)
+ return -ENXIO;
+
+ if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
+ return -EINVAL;
+
+ blk_mq_freeze_queue(lo->lo_queue);
+
+ blk_queue_logical_block_size(lo->lo_queue, arg);
+ blk_queue_physical_block_size(lo->lo_queue, arg);
+ blk_queue_io_min(lo->lo_queue, arg);
+ loop_update_dio(lo);
+
+ blk_mq_unfreeze_queue(lo->lo_queue);
+
+ return 0;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1414,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_dio(lo, arg);
break;
+ case LOOP_SET_BLOCK_SIZE:
+ err = -EPERM;
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ err = loop_set_block_size(lo, arg);
+ break;
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
@@ -1613,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
err = loop_clr_fd(lo);
if (!err)
return;
- } else {
+ } else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
* but flush possible ongoing bios in thread.
*/
- loop_flush(lo);
+ blk_mq_freeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue);
}
mutex_unlock(&lo->lo_ctl_mutex);
@@ -1800,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i)
}
lo->lo_queue->queuedata = lo;
+ blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
+
/*
- * It doesn't make sense to enable merge because the I/O
- * submitted to backing file is handled page by page.
+ * By default, we do buffer IO, so it doesn't make sense to enable
+ * merge because the I/O submitted to backing file is handled page by
+ * page. For directio mode, merge does help to dispatch bigger request
+ * to underlayer disk. We will enable merge once directio is enabled.
*/
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
@@ -1996,10 +1981,6 @@ static int __init loop_init(void)
struct loop_device *lo;
int err;
- err = misc_register(&loop_misc);
- if (err < 0)
- return err;
-
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@@ -2017,12 +1998,12 @@ static int __init loop_init(void)
if ((1UL << part_shift) > DISK_MAX_PARTS) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
if (max_loop > 1UL << (MINORBITS - part_shift)) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
/*
@@ -2041,6 +2022,11 @@ static int __init loop_init(void)
range = 1UL << MINORBITS;
}
+ err = misc_register(&loop_misc);
+ if (err < 0)
+ goto err_out;
+
+
if (register_blkdev(LOOP_MAJOR, "loop")) {
err = -EIO;
goto misc_out;
@@ -2060,6 +2046,7 @@ static int __init loop_init(void)
misc_out:
misc_deregister(&loop_misc);
+err_out:
return err;
}