From 1b0b283648163dae2a214ca28ed5a99f62a77319 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 5 Jun 2020 16:58:36 +0200 Subject: blktrace: break out of blktrace setup on concurrent calls We use one blktrace per request_queue, that means one per the entire disk. So we cannot run one blktrace on say /dev/vda and then /dev/vda1, or just two calls on /dev/vda. We check for concurrent setup only at the very end of the blktrace setup though. If we try to run two concurrent blktraces on the same block device the second one will fail, and the first one seems to go on. However when one tries to kill the first one one will see things like this: The kernel will show these: ``` debugfs: File 'dropped' in directory 'nvme1n1' already present! debugfs: File 'msg' in directory 'nvme1n1' already present! debugfs: File 'trace0' in directory 'nvme1n1' already present! `` And userspace just sees this error message for the second call: ``` blktrace /dev/nvme1n1 BLKTRACESETUP(2) /dev/nvme1n1 failed: 5/Input/output error ``` The first userspace process #1 will also claim that the files were taken underneath their nose as well. The files are taken away form the first process given that when the second blktrace fails, it will follow up with a BLKTRACESTOP and BLKTRACETEARDOWN. This means that even if go-happy process #1 is waiting for blktrace data, we *have* been asked to take teardown the blktrace. This can easily be reproduced with break-blktrace [0] run_0005.sh test. Just break out early if we know we're already going to fail, this will prevent trying to create the files all over again, which we know still exist. [0] https://github.com/mcgrof/break-blktrace Signed-off-by: Luis Chamberlain Signed-off-by: Jan Kara Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5773f0ba7e76..8fd36e827f14 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -3,6 +3,9 @@ * Copyright (C) 2006 Jens Axboe * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -494,6 +497,16 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, */ strreplace(buts->name, '/', '_'); + /* + * bdev can be NULL, as with scsi-generic, this is a helpful as + * we can be. + */ + if (q->blk_trace) { + pr_warn("Concurrent blktraces are not allowed on %s\n", + buts->name); + return -EBUSY; + } + bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From c3dbe541ef77754729de5e82be2d6e5d267c6c8c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 5 Jun 2020 16:58:37 +0200 Subject: blktrace: Avoid sparse warnings when assigning q->blk_trace Mostly for historical reasons, q->blk_trace is assigned through xchg() and cmpxchg() atomic operations. Although this is correct, sparse complains about this because it violates rcu annotations since commit c780e86dd48e ("blktrace: Protect q->blk_trace with RCU") which started to use rcu for accessing q->blk_trace. Furthermore there's no real need for atomic operations anymore since all changes to q->blk_trace happen under q->blk_trace_mutex and since it also makes more sense to check if q->blk_trace is set with the mutex held earlier. So let's just replace xchg() with rcu_replace_pointer() and cmpxchg() with explicit check and rcu_assign_pointer(). This makes the code more efficient and sparse happy. Reported-by: kbuild test robot Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8fd36e827f14..5ef0484513ec 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -347,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (!bt) return -EINVAL; @@ -501,7 +502,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * bdev can be NULL, as with scsi-generic, this is a helpful as * we can be. */ - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", buts->name); return -EBUSY; @@ -556,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto err; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); ret = 0; @@ -1642,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q) { struct blk_trace *bt; - bt = xchg(&q->blk_trace, NULL); + bt = rcu_replace_pointer(q->blk_trace, NULL, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -1674,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q, blk_trace_setup_lba(bt, bdev); - ret = -EBUSY; - if (cmpxchg(&q->blk_trace, NULL, bt)) - goto free_bt; - + rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); return 0; -- cgit v1.2.3-59-g8ed1b