aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/block/genhd.c
diff options
context:
space:
mode:
authorMatteo Croce <mcroce@microsoft.com>2021-07-13 01:05:25 +0200
committerJens Axboe <axboe@kernel.dk>2021-08-02 13:37:28 -0600
commitcf179948554a2e0d2b622317bf6bf33138ac36e5 (patch)
treeb550929f672a4bb4054839099c4bc2a4e2efa0af /block/genhd.c
parentblock: remove cmdline-parser.c (diff)
downloadwireguard-linux-cf179948554a2e0d2b622317bf6bf33138ac36e5.tar.xz
wireguard-linux-cf179948554a2e0d2b622317bf6bf33138ac36e5.zip
block: add disk sequence number
Associating uevents with block devices in userspace is difficult and racy: the uevent netlink socket is lossy, and on slow and overloaded systems has a very high latency. Block devices do not have exclusive owners in userspace, any process can set one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0 can be reused again and again). A userspace process setting up a block device and watching for its events cannot thus reliably tell whether an event relates to the device it just set up or another earlier instance with the same name. Being able to set a UUID on a loop device would solve the race conditions. But it does not allow to derive orderings from uevents: if you see a uevent with a UUID that does not match the device you are waiting for, you cannot tell whether it's because the right uevent has not arrived yet, or it was already sent and you missed it. So you cannot tell whether you should wait for it or not. Associating a unique, monotonically increasing sequential number to the lifetime of each block device, which can be retrieved with an ioctl immediately upon setting it up, allows to solve the race conditions with uevents, and also allows userspace processes to know whether they should wait for the uevent they need or if it was dropped and thus they should move on. Additionally, increment the disk sequence number when the media change, i.e. on DISK_EVENT_MEDIA_CHANGE event. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Matteo Croce <mcroce@microsoft.com> Tested-by: Luca Boccassi <bluca@debian.org> Link: https://lore.kernel.org/r/20210712230530.29323-2-mcroce@linux.microsoft.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/genhd.c')
-rw-r--r--block/genhd.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/block/genhd.c b/block/genhd.c
index 38f053074159..ceb08af72c1a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,6 +29,23 @@
static struct kobject *block_depr;
+/*
+ * Unique, monotonically increasing sequential number associated with block
+ * devices instances (i.e. incremented each time a device is attached).
+ * Associating uevents with block devices in userspace is difficult and racy:
+ * the uevent netlink socket is lossy, and on slow and overloaded systems has
+ * a very high latency.
+ * Block devices do not have exclusive owners in userspace, any process can set
+ * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
+ * can be reused again and again).
+ * A userspace process setting up a block device and watching for its events
+ * cannot thus reliably tell whether an event relates to the device it just set
+ * up or another earlier instance with the same name.
+ * This sequential number allows userspace processes to solve this problem, and
+ * uniquely associate an uevent to the lifetime to a device.
+ */
+static atomic64_t diskseq;
+
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
static DEFINE_IDA(ext_devt_ida);
@@ -1252,6 +1269,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk));
+ inc_diskseq(disk);
+
return disk;
out_destroy_part_tbl:
@@ -1352,3 +1371,8 @@ int bdev_read_only(struct block_device *bdev)
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
}
EXPORT_SYMBOL(bdev_read_only);
+
+void inc_diskseq(struct gendisk *disk)
+{
+ disk->diskseq = atomic64_inc_return(&diskseq);
+}