aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/loop.c')
-rw-r--r--drivers/block/loop.c500
1 files changed, 241 insertions, 259 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c3a36cfaa855..ad92192c7d61 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1,54 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * linux/drivers/block/loop.c
- *
- * Written by Theodore Ts'o, 3/29/93
- *
- * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
- * permitted under the GNU General Public License.
- *
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
- * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
- * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
- *
- * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
- *
- * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
- *
- * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
- *
- * Loadable modules and other fixes by AK, 1998
- *
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
- * Maximum number of loop devices now dynamic via max_loop module parameter.
- * Russell Kroll <rkroll@exploits.org> 19990701
- *
- * Maximum number of loop devices when compiled-in now selectable by passing
- * max_loop=<1-255> to the kernel on boot.
- * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
- *
- * Completely rewrite request handling to be make_request_fn style and
- * non blocking, pushing work to a helper thread. Lots of fixes from
- * Al Viro too.
- * Jens Axboe <axboe@suse.de>, Nov 2000
- *
- * Support up to 256 loop devices
- * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
- *
- * Support for falling back on the write file operation when the address space
- * operations write_begin is not available on the backing filesystem.
- * Anton Altaparmakov, 16 Feb 2005
- *
- * Still To Fix:
- * - Advisory locking is ignored here.
- * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- *
+ * Copyright 1993 by Theodore Ts'o.
*/
-
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
@@ -59,7 +12,6 @@
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/wait.h>
-#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/init.h>
#include <linux/swap.h>
@@ -79,12 +31,66 @@
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
+#include <linux/statfs.h>
+#include <linux/uaccess.h>
+#include <linux/blk-mq.h>
+#include <linux/spinlock.h>
+#include <uapi/linux/loop.h>
+
+/* Possible states of device */
+enum {
+ Lo_unbound,
+ Lo_bound,
+ Lo_rundown,
+ Lo_deleting,
+};
-#include "loop.h"
+struct loop_func_table;
+
+struct loop_device {
+ int lo_number;
+ loff_t lo_offset;
+ loff_t lo_sizelimit;
+ int lo_flags;
+ char lo_file_name[LO_NAME_SIZE];
+
+ struct file * lo_backing_file;
+ struct block_device *lo_device;
+
+ gfp_t old_gfp_mask;
+
+ spinlock_t lo_lock;
+ int lo_state;
+ spinlock_t lo_work_lock;
+ struct workqueue_struct *workqueue;
+ struct work_struct rootcg_work;
+ struct list_head rootcg_cmd_list;
+ struct list_head idle_worker_list;
+ struct rb_root worker_tree;
+ struct timer_list timer;
+ bool use_dio;
+ bool sysfs_inited;
+
+ struct request_queue *lo_queue;
+ struct blk_mq_tag_set tag_set;
+ struct gendisk *lo_disk;
+ struct mutex lo_mutex;
+ bool idr_visible;
+};
-#include <linux/uaccess.h>
+struct loop_cmd {
+ struct list_head list_entry;
+ bool use_aio; /* use AIO interface to handle I/O */
+ atomic_t ref; /* only for aio */
+ long ret;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+ struct cgroup_subsys_state *blkcg_css;
+ struct cgroup_subsys_state *memcg_css;
+};
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
+#define LOOP_DEFAULT_HW_Q_DEPTH (128)
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
@@ -184,8 +190,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/
if (dio) {
if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
- !(lo->lo_offset & dio_align) &&
- mapping->a_ops->direct_IO)
+ !(lo->lo_offset & dio_align) &&
+ (file->f_mode & FMODE_CAN_ODIRECT))
use_dio = true;
else
use_dio = false;
@@ -308,27 +314,22 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
* a.k.a. discard/zerorange.
*/
struct file *file = lo->lo_backing_file;
- struct request_queue *q = lo->lo_queue;
int ret;
mode |= FALLOC_FL_KEEP_SIZE;
- if (!blk_queue_discard(q)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (!bdev_max_discard_sectors(lo->lo_device))
+ return -EOPNOTSUPP;
ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
- ret = -EIO;
- out:
+ return -EIO;
return ret;
}
static int lo_req_flush(struct loop_device *lo, struct request *rq)
{
- struct file *file = lo->lo_backing_file;
- int ret = vfs_fsync(file, 0);
+ int ret = vfs_fsync(lo->lo_backing_file, 0);
if (unlikely(ret && ret != -EINVAL))
ret = -EIO;
@@ -572,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (!file)
return -EBADF;
+
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
is_loop = is_loop_device(file);
error = loop_global_lock_killable(lo, is_loop);
if (error)
@@ -626,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (partscan)
loop_reread_partitions(lo);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_err:
loop_global_unlock(lo, is_loop);
out_putf:
fput(file);
- return error;
+ goto done;
}
/* loop sysfs attributes */
@@ -680,33 +690,33 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
}
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
{
int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
- return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
}
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
{
int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
- return sprintf(buf, "%s\n", dio ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
@@ -762,7 +772,7 @@ static void loop_config_discard(struct loop_device *lo)
struct request_queue *backingq = bdev_get_queue(I_BDEV(inode));
max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
- granularity = backingq->limits.discard_granularity ?:
+ granularity = bdev_discard_granularity(I_BDEV(inode)) ?:
queue_physical_block_size(backingq);
/*
@@ -774,22 +784,24 @@ static void loop_config_discard(struct loop_device *lo)
granularity = 0;
} else {
+ struct kstatfs sbuf;
+
max_discard_sectors = UINT_MAX >> 9;
- granularity = inode->i_sb->s_blocksize;
+ if (!vfs_statfs(&file->f_path, &sbuf))
+ granularity = sbuf.f_bsize;
+ else
+ max_discard_sectors = 0;
}
if (max_discard_sectors) {
q->limits.discard_granularity = granularity;
blk_queue_max_discard_sectors(q, max_discard_sectors);
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
} else {
q->limits.discard_granularity = 0;
blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0);
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
}
- q->limits.discard_alignment = 0;
}
struct loop_worker {
@@ -803,8 +815,6 @@ struct loop_worker {
};
static void loop_workfn(struct work_struct *work);
-static void loop_rootcg_workfn(struct work_struct *work);
-static void loop_free_idle_workers(struct timer_list *timer);
#ifdef CONFIG_BLK_CGROUP
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
@@ -820,7 +830,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+ struct rb_node **node, *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
@@ -888,6 +898,39 @@ queue_work:
spin_unlock_irq(&lo->lo_work_lock);
}
+static void loop_set_timer(struct loop_device *lo)
+{
+ timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
+}
+
+static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
+{
+ struct loop_worker *pos, *worker;
+
+ spin_lock_irq(&lo->lo_work_lock);
+ list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+ idle_list) {
+ if (!delete_all &&
+ time_is_after_jiffies(worker->last_ran_at +
+ LOOP_IDLE_WORKER_TIMEOUT))
+ break;
+ list_del(&worker->idle_list);
+ rb_erase(&worker->rb_node, &lo->worker_tree);
+ css_put(worker->blkcg_css);
+ kfree(worker);
+ }
+ if (!list_empty(&lo->idle_worker_list))
+ loop_set_timer(lo);
+ spin_unlock_irq(&lo->lo_work_lock);
+}
+
+static void loop_free_idle_workers_timer(struct timer_list *timer)
+{
+ struct loop_device *lo = container_of(timer, struct loop_device, timer);
+
+ return loop_free_idle_workers(lo, false);
+}
+
static void loop_update_rotational(struct loop_device *lo)
{
struct file *file = lo->lo_backing_file;
@@ -898,7 +941,7 @@ static void loop_update_rotational(struct loop_device *lo)
/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
if (file_bdev)
- nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+ nonrot = bdev_nonrot(file_bdev);
if (nonrot)
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@ -936,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_flags = info->lo_flags;
@@ -962,6 +1010,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
+ /* suppress uevents while reconfiguring the device */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1006,24 +1057,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
- lo->workqueue = alloc_workqueue("loop%d",
- WQ_UNBOUND | WQ_FREEZABLE,
- 0,
- lo->lo_number);
if (!lo->workqueue) {
- error = -ENOMEM;
- goto out_unlock;
+ lo->workqueue = alloc_workqueue("loop%d",
+ WQ_UNBOUND | WQ_FREEZABLE,
+ 0, lo->lo_number);
+ if (!lo->workqueue) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
}
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
- INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
- INIT_LIST_HEAD(&lo->rootcg_cmd_list);
- INIT_LIST_HEAD(&lo->idle_worker_list);
- lo->worker_tree = RB_ROOT;
- timer_setup(&lo->timer, loop_free_idle_workers,
- TIMER_DEFERRABLE);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
@@ -1061,14 +1107,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
if (partscan)
- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
- return 0;
+
+ error = 0;
+done:
+ /* enable and uncork uevent now that we are done */
+ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+ return error;
out_unlock:
loop_global_unlock(lo, is_loop);
@@ -1079,61 +1130,27 @@ out_putf:
fput(file);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- return error;
+ goto done;
}
-static int __loop_clr_fd(struct loop_device *lo, bool release)
+static void __loop_clr_fd(struct loop_device *lo, bool release)
{
- struct file *filp = NULL;
+ struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- int err = 0;
- bool partscan = false;
- int lo_number;
- struct loop_worker *pos, *worker;
-
- /*
- * Flush loop_configure() and loop_change_fd(). It is acceptable for
- * loop_validate_file() to succeed, for actual clear operation has not
- * started yet.
- */
- mutex_lock(&loop_validate_mutex);
- mutex_unlock(&loop_validate_mutex);
- /*
- * loop_validate_file() now fails because l->lo_state != Lo_bound
- * became visible.
- */
-
- mutex_lock(&lo->lo_mutex);
- if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
- err = -ENXIO;
- goto out_unlock;
- }
-
- filp = lo->lo_backing_file;
- if (filp == NULL) {
- err = -EINVAL;
- goto out_unlock;
- }
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
blk_queue_write_cache(lo->lo_queue, false, false);
- /* freeze request queue during the transition */
- blk_mq_freeze_queue(lo->lo_queue);
-
- destroy_workqueue(lo->workqueue);
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- spin_unlock_irq(&lo->lo_work_lock);
- del_timer_sync(&lo->timer);
+ /*
+ * Freeze the request queue when unbinding on a live file descriptor and
+ * thus an open device. When called from ->release we are guaranteed
+ * that there is no I/O in progress already.
+ */
+ if (!release)
+ blk_mq_freeze_queue(lo->lo_queue);
spin_lock_irq(&lo->lo_lock);
+ filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
spin_unlock_irq(&lo->lo_lock);
@@ -1151,14 +1168,14 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ if (!release)
+ blk_mq_unfreeze_queue(lo->lo_queue);
- partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
- lo_number = lo->lo_number;
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
-out_unlock:
- mutex_unlock(&lo->lo_mutex);
- if (partscan) {
+
+ if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
+ int err;
+
/*
* open_mutex has been held already in release path, so don't
* acquire it if this function is called in such case.
@@ -1174,24 +1191,20 @@ out_unlock:
mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
- __func__, lo_number, err);
+ __func__, lo->lo_number, err);
/* Device is gone, no point in returning error */
- err = 0;
}
/*
* lo->lo_state is set to Lo_unbound here after above partscan has
- * finished.
- *
- * There cannot be anybody else entering __loop_clr_fd() as
- * lo->lo_backing_file is already cleared and Lo_rundown state
- * protects us from all the other places trying to change the 'lo'
- * device.
+ * finished. There cannot be anybody else entering __loop_clr_fd() as
+ * Lo_rundown state protects us from all the other places trying to
+ * change the 'lo' device.
*/
- mutex_lock(&lo->lo_mutex);
lo->lo_flags = 0;
if (!part_shift)
- lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+ set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
+ mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
@@ -1200,20 +1213,27 @@ out_unlock:
* lo_mutex triggers a circular lock dependency possibility warning as
* fput can take open_mutex which is usually taken before lo_mutex.
*/
- if (filp)
- fput(filp);
- return err;
+ fput(filp);
}
static int loop_clr_fd(struct loop_device *lo)
{
int err;
- err = mutex_lock_killable(&lo->lo_mutex);
+ /*
+ * Since lo_ioctl() is called without locks held, it is possible that
+ * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
+ *
+ * Therefore, use global lock when setting Lo_rundown state in order to
+ * make sure that loop_validate_file() will fail if the "struct file"
+ * which loop_configure()/loop_change_fd() found via fget() was this
+ * loop device.
+ */
+ err = loop_global_lock_killable(lo, true);
if (err)
return err;
if (lo->lo_state != Lo_bound) {
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return -ENXIO;
}
/*
@@ -1226,15 +1246,16 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
- if (atomic_read(&lo->lo_refcnt) > 1) {
+ if (disk_openers(lo->lo_disk) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
return 0;
}
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, true);
- return __loop_clr_fd(lo, false);
+ __loop_clr_fd(lo, false);
+ return 0;
}
static int
@@ -1263,15 +1284,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
- if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after invalidate_bdev(), try again */
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
@@ -1301,7 +1313,7 @@ out_unfreeze:
if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
!(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
- lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+ clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
partscan = true;
}
out_unlock:
@@ -1482,21 +1494,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
-
- /* invalidate_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
-
blk_queue_logical_block_size(lo->lo_queue, arg);
blk_queue_physical_block_size(lo->lo_queue, arg);
blk_queue_io_min(lo->lo_queue, arg);
loop_update_dio(lo);
-out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
return err;
@@ -1597,6 +1598,7 @@ struct compat_loop_info {
compat_ulong_t lo_inode; /* ioctl r/o */
compat_dev_t lo_rdevice; /* ioctl r/o */
compat_int_t lo_offset;
+ compat_int_t lo_encrypt_type; /* obsolete, ignored */
compat_int_t lo_encrypt_key_size; /* ioctl w/o */
compat_int_t lo_flags; /* ioctl r/o */
char lo_name[LO_NAME_SIZE];
@@ -1725,33 +1727,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
-static int lo_open(struct block_device *bdev, fmode_t mode)
-{
- struct loop_device *lo = bdev->bd_disk->private_data;
- int err;
-
- err = mutex_lock_killable(&lo->lo_mutex);
- if (err)
- return err;
- if (lo->lo_state == Lo_deleting)
- err = -ENXIO;
- else
- atomic_inc(&lo->lo_refcnt);
- mutex_unlock(&lo->lo_mutex);
- return err;
-}
-
static void lo_release(struct gendisk *disk, fmode_t mode)
{
struct loop_device *lo = disk->private_data;
- mutex_lock(&lo->lo_mutex);
- if (atomic_dec_return(&lo->lo_refcnt))
- goto out_unlock;
+ if (disk_openers(disk) > 0)
+ return;
- if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
- if (lo->lo_state != Lo_bound)
- goto out_unlock;
+ mutex_lock(&lo->lo_mutex);
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
lo->lo_state = Lo_rundown;
mutex_unlock(&lo->lo_mutex);
/*
@@ -1760,27 +1744,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
*/
__loop_clr_fd(lo, true);
return;
- } else if (lo->lo_state == Lo_bound) {
- /*
- * Otherwise keep thread (if running) and config,
- * but flush possible ongoing bios in thread.
- */
- blk_mq_freeze_queue(lo->lo_queue);
- blk_mq_unfreeze_queue(lo->lo_queue);
}
-
-out_unlock:
mutex_unlock(&lo->lo_mutex);
}
+static void lo_free_disk(struct gendisk *disk)
+{
+ struct loop_device *lo = disk->private_data;
+
+ if (lo->workqueue)
+ destroy_workqueue(lo->workqueue);
+ loop_free_idle_workers(lo, true);
+ del_timer_sync(&lo->timer);
+ mutex_destroy(&lo->lo_mutex);
+ kfree(lo);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
- .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
#endif
+ .free_disk = lo_free_disk,
};
/*
@@ -1791,6 +1778,24 @@ module_param(max_loop, int, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
+
+static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
+
+static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
+{
+ int ret = kstrtoint(s, 10, &hw_queue_depth);
+
+ return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
+}
+
+static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
+ .set = loop_set_hw_queue_depth,
+ .get = param_get_int,
+};
+
+device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
+
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
@@ -1821,12 +1826,14 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
cmd->blkcg_css = NULL;
cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP
- if (rq->bio && rq->bio->bi_blkg) {
- cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+ if (rq->bio) {
+ cmd->blkcg_css = bio_blkcg_css(rq->bio);
#ifdef CONFIG_MEMCG
- cmd->memcg_css =
- cgroup_get_e_css(cmd->blkcg_css->cgroup,
- &memory_cgrp_subsys);
+ if (cmd->blkcg_css) {
+ cmd->memcg_css =
+ cgroup_get_e_css(cmd->blkcg_css->cgroup,
+ &memory_cgrp_subsys);
+ }
#endif
}
#endif
@@ -1875,11 +1882,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
}
}
-static void loop_set_timer(struct loop_device *lo)
-{
- timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
-}
-
static void loop_process_work(struct loop_worker *worker,
struct list_head *cmd_list, struct loop_device *lo)
{
@@ -1928,27 +1930,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
}
-static void loop_free_idle_workers(struct timer_list *timer)
-{
- struct loop_device *lo = container_of(timer, struct loop_device, timer);
- struct loop_worker *pos, *worker;
-
- spin_lock_irq(&lo->lo_work_lock);
- list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
- idle_list) {
- if (time_is_after_jiffies(worker->last_ran_at +
- LOOP_IDLE_WORKER_TIMEOUT))
- break;
- list_del(&worker->idle_list);
- rb_erase(&worker->rb_node, &lo->worker_tree);
- css_put(worker->blkcg_css);
- kfree(worker);
- }
- if (!list_empty(&lo->idle_worker_list))
- loop_set_timer(lo);
- spin_unlock_irq(&lo->lo_work_lock);
-}
-
static const struct blk_mq_ops loop_mq_ops = {
.queue_rq = loop_queue_rq,
.complete = lo_complete_rq,
@@ -1964,6 +1945,9 @@ static int loop_add(int i)
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
if (!lo)
goto out;
+ lo->worker_tree = RB_ROOT;
+ INIT_LIST_HEAD(&lo->idle_worker_list);
+ timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
lo->lo_state = Lo_unbound;
err = mutex_lock_killable(&loop_ctl_mutex);
@@ -1985,7 +1969,7 @@ static int loop_add(int i)
lo->tag_set.ops = &loop_mq_ops;
lo->tag_set.nr_hw_queues = 1;
- lo->tag_set.queue_depth = 128;
+ lo->tag_set.queue_depth = hw_queue_depth;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
@@ -2032,13 +2016,13 @@ static int loop_add(int i)
* userspace tools. Parameters like this in general should be avoided.
*/
if (!part_shift)
- disk->flags |= GENHD_FL_NO_PART_SCAN;
- disk->flags |= GENHD_FL_EXT_DEVT;
- atomic_set(&lo->lo_refcnt, 0);
+ set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
spin_lock_init(&lo->lo_work_lock);
+ INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+ INIT_LIST_HEAD(&lo->rootcg_cmd_list);
disk->major = LOOP_MAJOR;
disk->first_minor = i << part_shift;
disk->minors = 1 << part_shift;
@@ -2061,7 +2045,7 @@ static int loop_add(int i)
return i;
out_cleanup_disk:
- blk_cleanup_disk(disk);
+ put_disk(disk);
out_cleanup_tags:
blk_mq_free_tag_set(&lo->tag_set);
out_free_idr:
@@ -2078,14 +2062,13 @@ static void loop_remove(struct loop_device *lo)
{
/* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
- blk_cleanup_disk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
+
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
- /* There is no route which can find this loop device. */
- mutex_destroy(&lo->lo_mutex);
- kfree(lo);
+
+ put_disk(lo->lo_disk);
}
static void loop_probe(dev_t dev)
@@ -2124,13 +2107,12 @@ static int loop_control_remove(int idx)
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
goto mark_visible;
- if (lo->lo_state != Lo_unbound ||
- atomic_read(&lo->lo_refcnt) > 0) {
+ if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
goto mark_visible;
}
- /* Mark this loop device no longer open()-able. */
+ /* Mark this loop device as no more bound, but not quite unbound yet */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);