aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2025-01-31 13:03:47 +0100
committerJens Axboe <axboe@kernel.dk>2025-01-31 07:20:08 -0700
commit1e1a9cecfab3f22ebef0a976f849c87be8d03c1c (patch)
tree34fa7958ed94c56127aa0fc55347bb409574af3a /drivers
parentblock: fix nr_hw_queue update racing with disk addition/removal (diff)
downloadwireguard-linux-1e1a9cecfab3f22ebef0a976f849c87be8d03c1c.tar.xz
wireguard-linux-1e1a9cecfab3f22ebef0a976f849c87be8d03c1c.zip
block: force noio scope in blk_mq_freeze_queue
When block drivers or the core block code perform allocations with a frozen queue, this could try to recurse into the block device to reclaim memory and deadlock. Thus all allocations done by a process that froze a queue need to be done without __GFP_IO and __GFP_FS. Instead of tying to track all of them down, force a noio scope as part of freezing the queue. Note that nvme is a bit of a mess here due to the non-owner freezes, and they will be addressed separately. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250131120352.1315351-2-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/aoe/aoedev.c5
-rw-r--r--drivers/block/ataflop.c5
-rw-r--r--drivers/block/loop.c20
-rw-r--r--drivers/block/nbd.c7
-rw-r--r--drivers/block/rbd.c5
-rw-r--r--drivers/block/sunvdc.c5
-rw-r--r--drivers/block/swim3.c5
-rw-r--r--drivers/block/virtio_blk.c5
-rw-r--r--drivers/mtd/mtd_blkdevs.c5
-rw-r--r--drivers/nvme/host/core.c17
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/scsi/scsi_lib.c5
-rw-r--r--drivers/scsi/scsi_scan.c5
-rw-r--r--drivers/ufs/core/ufs-sysfs.c7
14 files changed, 59 insertions, 39 deletions
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 3523dd82d7a0..4db7f6ce8ade 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -226,10 +226,11 @@ aoedev_downdev(struct aoedev *d)
/* fast fail all pending I/O */
if (d->blkq) {
/* UP is cleared, freeze+quiesce to insure all are errored */
- blk_mq_freeze_queue(d->blkq);
+ unsigned int memflags = blk_mq_freeze_queue(d->blkq);
+
blk_mq_quiesce_queue(d->blkq);
blk_mq_unquiesce_queue(d->blkq);
- blk_mq_unfreeze_queue(d->blkq);
+ blk_mq_unfreeze_queue(d->blkq, memflags);
}
if (d->gd)
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 110f9aca2667..a81ade622a01 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -746,6 +746,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
unsigned char *p;
int sect, nsect;
unsigned long flags;
+ unsigned int memflags;
int ret;
if (type) {
@@ -758,7 +759,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
}
q = unit[drive].disk[type]->queue;
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
local_irq_save(flags);
@@ -817,7 +818,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
ret = FormatError ? -EIO : 0;
out:
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d1f1d6bef2e6..c05fe27a96b6 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -586,6 +586,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
{
struct file *file = fget(arg);
struct file *old_file;
+ unsigned int memflags;
int error;
bool partscan;
bool is_loop;
@@ -623,14 +624,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
/* and ... switch */
disk_force_media_change(lo->lo_disk);
- blk_mq_freeze_queue(lo->lo_queue);
+ memflags = blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
mapping_set_gfp_mask(file->f_mapping,
lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue, memflags);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
loop_global_unlock(lo, is_loop);
@@ -1255,6 +1256,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
bool partscan = false;
bool size_changed = false;
+ unsigned int memflags;
err = mutex_lock_killable(&lo->lo_mutex);
if (err)
@@ -1272,7 +1274,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
}
/* I/O needs to be drained before changing lo_offset or lo_sizelimit */
- blk_mq_freeze_queue(lo->lo_queue);
+ memflags = blk_mq_freeze_queue(lo->lo_queue);
err = loop_set_status_from_info(lo, info);
if (err)
@@ -1294,7 +1296,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
loop_update_dio(lo);
out_unfreeze:
- blk_mq_unfreeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue, memflags);
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
out_unlock:
@@ -1446,6 +1448,7 @@ static int loop_set_capacity(struct loop_device *lo)
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
{
bool use_dio = !!arg;
+ unsigned int memflags;
if (lo->lo_state != Lo_bound)
return -ENXIO;
@@ -1459,18 +1462,19 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
vfs_fsync(lo->lo_backing_file, 0);
}
- blk_mq_freeze_queue(lo->lo_queue);
+ memflags = blk_mq_freeze_queue(lo->lo_queue);
if (use_dio)
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
else
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
- blk_mq_unfreeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue, memflags);
return 0;
}
static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
{
struct queue_limits lim;
+ unsigned int memflags;
int err = 0;
if (lo->lo_state != Lo_bound)
@@ -1485,10 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
lim = queue_limits_start_update(lo->lo_queue);
loop_update_limits(lo, &lim, arg);
- blk_mq_freeze_queue(lo->lo_queue);
+ memflags = blk_mq_freeze_queue(lo->lo_queue);
err = queue_limits_commit_update(lo->lo_queue, &lim);
loop_update_dio(lo);
- blk_mq_unfreeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue, memflags);
return err;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b63a0f29a54a..7bdc7eb808ea 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1234,6 +1234,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct socket *sock;
struct nbd_sock **socks;
struct nbd_sock *nsock;
+ unsigned int memflags;
int err;
/* Arg will be cast to int, check it to avoid overflow */
@@ -1247,7 +1248,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
* We need to make sure we don't get any errant requests while we're
* reallocating the ->socks array.
*/
- blk_mq_freeze_queue(nbd->disk->queue);
+ memflags = blk_mq_freeze_queue(nbd->disk->queue);
if (!netlink && !nbd->task_setup &&
!test_bit(NBD_RT_BOUND, &config->runtime_flags))
@@ -1288,12 +1289,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
INIT_WORK(&nsock->work, nbd_pending_cmd_work);
socks[config->num_connections++] = nsock;
atomic_inc(&config->live_connections);
- blk_mq_unfreeze_queue(nbd->disk->queue);
+ blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
return 0;
put_socket:
- blk_mq_unfreeze_queue(nbd->disk->queue);
+ blk_mq_unfreeze_queue(nbd->disk->queue, memflags);
sockfd_put(sock);
return err;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5b393e4a1ddf..faafd7ff43d6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -7281,9 +7281,10 @@ static ssize_t do_rbd_remove(const char *buf, size_t count)
* Prevent new IO from being queued and wait for existing
* IO to complete/fail.
*/
- blk_mq_freeze_queue(rbd_dev->disk->queue);
+ unsigned int memflags = blk_mq_freeze_queue(rbd_dev->disk->queue);
+
blk_mark_disk_dead(rbd_dev->disk);
- blk_mq_unfreeze_queue(rbd_dev->disk->queue);
+ blk_mq_unfreeze_queue(rbd_dev->disk->queue, memflags);
}
del_gendisk(rbd_dev->disk);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 88dcae6ec575..05c4aee7f262 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -1113,6 +1113,7 @@ static void vdc_requeue_inflight(struct vdc_port *port)
static void vdc_queue_drain(struct vdc_port *port)
{
struct request_queue *q = port->disk->queue;
+ unsigned int memflags;
/*
* Mark the queue as draining, then freeze/quiesce to ensure
@@ -1121,12 +1122,12 @@ static void vdc_queue_drain(struct vdc_port *port)
port->drain = 1;
spin_unlock_irq(&port->vio.lock);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
spin_lock_irq(&port->vio.lock);
port->drain = 0;
- blk_mq_unquiesce_queue(q);
+ blk_mq_unquiesce_queue(q, memflags);
blk_mq_unfreeze_queue(q);
}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 9914153b365b..3aedcb5add61 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -840,6 +840,7 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
static void release_drive(struct floppy_state *fs)
{
struct request_queue *q = disks[fs->index]->queue;
+ unsigned int memflags;
unsigned long flags;
swim3_dbg("%s", "-> release drive\n");
@@ -848,10 +849,10 @@ static void release_drive(struct floppy_state *fs)
fs->state = idle;
spin_unlock_irqrestore(&swim3_lock, flags);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
}
static int fd_eject(struct floppy_state *fs)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index bbaa26b523b8..a4af39fc7ea2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1584,11 +1584,12 @@ static int virtblk_freeze(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
struct request_queue *q = vblk->disk->queue;
+ unsigned int memflags;
/* Ensure no requests in virtqueues before deleting vqs. */
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue_nowait(q);
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
/* Ensure we don't receive any more interrupts */
virtio_reset_device(vdev);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index ee7e1d908986..847c11542f02 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -404,6 +404,7 @@ out_list_del:
int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
{
unsigned long flags;
+ unsigned int memflags;
lockdep_assert_held(&mtd_table_mutex);
@@ -420,10 +421,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
spin_unlock_irqrestore(&old->queue_lock, flags);
/* freeze+quiesce queue to ensure all requests are flushed */
- blk_mq_freeze_queue(old->rq);
+ memflags = blk_mq_freeze_queue(old->rq);
blk_mq_quiesce_queue(old->rq);
blk_mq_unquiesce_queue(old->rq);
- blk_mq_unfreeze_queue(old->rq);
+ blk_mq_unfreeze_queue(old->rq, memflags);
/* If the device is currently open, tell trans driver to close it,
then put mtd device, and don't touch it again */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 76b615d4d5b9..40046770f1bf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2132,15 +2132,16 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
struct queue_limits lim;
+ unsigned int memflags;
int ret;
lim = queue_limits_start_update(ns->disk->queue);
nvme_set_ctrl_limits(ns->ctrl, &lim);
- blk_mq_freeze_queue(ns->disk->queue);
+ memflags = blk_mq_freeze_queue(ns->disk->queue);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
- blk_mq_unfreeze_queue(ns->disk->queue);
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
/* Hide the block-interface for these devices */
if (!ret)
@@ -2155,6 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
+ unsigned int memflags;
sector_t capacity;
unsigned lbaf;
int ret;
@@ -2186,7 +2188,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
lim = queue_limits_start_update(ns->disk->queue);
- blk_mq_freeze_queue(ns->disk->queue);
+ memflags = blk_mq_freeze_queue(ns->disk->queue);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
@@ -2219,7 +2221,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
goto out;
}
@@ -2235,7 +2237,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
- blk_mq_unfreeze_queue(ns->disk->queue);
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
if (blk_queue_is_zoned(ns->queue)) {
ret = blk_revalidate_disk_zones(ns->disk);
@@ -2291,9 +2293,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
if (!ret && nvme_ns_head_multipath(ns->head)) {
struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
+ unsigned int memflags;
lim = queue_limits_start_update(ns->head->disk->queue);
- blk_mq_freeze_queue(ns->head->disk->queue);
+ memflags = blk_mq_freeze_queue(ns->head->disk->queue);
/*
* queue_limits mixes values that are the hardware limitations
* for bio splitting with what is the device configuration.
@@ -2325,7 +2328,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
- blk_mq_unfreeze_queue(ns->head->disk->queue);
+ blk_mq_unfreeze_queue(ns->head->disk->queue, memflags);
}
return ret;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bd..2a7635565083 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -60,7 +60,7 @@ void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry)
if (h->disk)
- blk_mq_unfreeze_queue(h->disk->queue);
+ blk_mq_unfreeze_queue_nomemrestore(h->disk->queue);
}
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4411426a7894..b86e259516a7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2723,6 +2723,7 @@ int
scsi_device_quiesce(struct scsi_device *sdev)
{
struct request_queue *q = sdev->request_queue;
+ unsigned int memflags;
int err;
/*
@@ -2737,7 +2738,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
blk_set_pm_only(q);
- blk_mq_freeze_queue(q);
+ memflags = blk_mq_freeze_queue(q);
/*
* Ensure that the effect of blk_set_pm_only() will be visible
* for percpu_ref_tryget() callers that occur after the queue
@@ -2745,7 +2746,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
* was called. See also https://lwn.net/Articles/573497/.
*/
synchronize_rcu();
- blk_mq_unfreeze_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
mutex_lock(&sdev->state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 042329b74c6e..312d78213954 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -220,6 +220,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
int new_shift = sbitmap_calculate_shift(depth);
bool need_alloc = !sdev->budget_map.map;
bool need_free = false;
+ unsigned int memflags;
int ret;
struct sbitmap sb_backup;
@@ -240,7 +241,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
* and here disk isn't added yet, so freezing is pretty fast
*/
if (need_free) {
- blk_mq_freeze_queue(sdev->request_queue);
+ memflags = blk_mq_freeze_queue(sdev->request_queue);
sb_backup = sdev->budget_map;
}
ret = sbitmap_init_node(&sdev->budget_map,
@@ -256,7 +257,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev,
else
sbitmap_free(&sb_backup);
ret = 0;
- blk_mq_unfreeze_queue(sdev->request_queue);
+ blk_mq_unfreeze_queue(sdev->request_queue, memflags);
}
return ret;
}
diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index 796e37a1d859..3438269a5440 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1439,6 +1439,7 @@ static ssize_t max_number_of_rtt_store(struct device *dev,
struct ufs_hba *hba = dev_get_drvdata(dev);
struct ufs_dev_info *dev_info = &hba->dev_info;
struct scsi_device *sdev;
+ unsigned int memflags;
unsigned int rtt;
int ret;
@@ -1458,14 +1459,16 @@ static ssize_t max_number_of_rtt_store(struct device *dev,
ufshcd_rpm_get_sync(hba);
+ memflags = memalloc_noio_save();
shost_for_each_device(sdev, hba->host)
- blk_mq_freeze_queue(sdev->request_queue);
+ blk_mq_freeze_queue_nomemsave(sdev->request_queue);
ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR,
QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt);
shost_for_each_device(sdev, hba->host)
- blk_mq_unfreeze_queue(sdev->request_queue);
+ blk_mq_unfreeze_queue_nomemrestore(sdev->request_queue);
+ memalloc_noio_restore(memflags);
ufshcd_rpm_put_sync(hba);