aboutsummaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-02-04 19:54:55 -0700
committerJens Axboe <axboe@kernel.dk>2020-02-04 20:04:18 -0700
commit2faf852d1be8a4960d328492298da6448cca0279 (patch)
treeebbae989219154e27a95c4558f201202a5fe0681 /fs/io_uring.c
parentio_uring: spin for sq thread to idle on shutdown (diff)
downloadlinux-dev-2faf852d1be8a4960d328492298da6448cca0279.tar.xz
linux-dev-2faf852d1be8a4960d328492298da6448cca0279.zip
io_uring: cleanup fixed file data table references
syzbot reports a use-after-free in io_ring_file_ref_switch() when it tries to switch back to percpu mode. When we put the final reference to the table by calling percpu_ref_kill_and_confirm(), we don't want the zero reference to queue async work for flushing the potentially queued up items. We currently do a few flush_work(), but they merely paper around the issue, since the work item may not have been queued yet depending on the when the percpu-ref callback gets run. Coming into the file unregister, we know we have the ring quiesced. io_ring_file_ref_switch() can check for whether or not the ref is dying or not, and not queue anything async at that point. Once the ref has been confirmed killed, flush any potential items manually. Reported-by: syzbot+7caeaea49c2c8a591e3d@syzkaller.appspotmail.com Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update") Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to '')
-rw-r--r--fs/io_uring.c32
1 files changed, 19 insertions, 13 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 87f8655656b5..deff11e84094 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -753,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_files_update *ip,
unsigned nr_args);
static int io_grab_files(struct io_kiocb *req);
+static void io_ring_file_ref_flush(struct fixed_file_data *data);
static struct kmem_cache *req_cachep;
@@ -5261,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
if (!data)
return -ENXIO;
- /* protect against inflight atomic switch, which drops the ref */
- percpu_ref_get(&data->refs);
- /* wait for existing switches */
- flush_work(&data->ref_work);
percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
- wait_for_completion(&data->done);
- percpu_ref_put(&data->refs);
- /* flush potential new switch */
flush_work(&data->ref_work);
+ wait_for_completion(&data->done);
+ io_ring_file_ref_flush(data);
percpu_ref_exit(&data->refs);
__io_sqe_files_unregister(ctx);
@@ -5507,14 +5503,11 @@ struct io_file_put {
struct completion *done;
};
-static void io_ring_file_ref_switch(struct work_struct *work)
+static void io_ring_file_ref_flush(struct fixed_file_data *data)
{
struct io_file_put *pfile, *tmp;
- struct fixed_file_data *data;
struct llist_node *node;
- data = container_of(work, struct fixed_file_data, ref_work);
-
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
@@ -5524,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work)
kfree(pfile);
}
}
+}
+
+static void io_ring_file_ref_switch(struct work_struct *work)
+{
+ struct fixed_file_data *data;
+ data = container_of(work, struct fixed_file_data, ref_work);
+ io_ring_file_ref_flush(data);
percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
@@ -5535,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref)
data = container_of(ref, struct fixed_file_data, refs);
- /* we can't safely switch from inside this context, punt to wq */
- queue_work(system_wq, &data->ref_work);
+ /*
+ * We can't safely switch from inside this context, punt to wq. If
+ * the table ref is going away, the table is being unregistered.
+ * Don't queue up the async work for that case, the caller will
+ * handle it.
+ */
+ if (!percpu_ref_is_dying(&data->refs))
+ queue_work(system_wq, &data->ref_work);
}
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,