From 2fe93bd43264242e4568763d1b183b1cc58066ff Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: fuse: extract fuse_find_writeback() helper Call this from fuse_range_is_writeback() and fuse_writepage_in_flight(). Turn a BUG_ON() into a WARN_ON() in the process. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a59c16bd90ac..927d40dec376 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -329,6 +329,24 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) return (u64) v0 + ((u64) v1 << 32); } +static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi, + pgoff_t idx_from, pgoff_t idx_to) +{ + struct fuse_req *req; + + list_for_each_entry(req, &fi->writepages, writepages_entry) { + pgoff_t curr_index; + + WARN_ON(get_fuse_inode(req->inode) != fi); + curr_index = req->misc.write.in.offset >> PAGE_SHIFT; + if (idx_from < curr_index + req->num_pages && + curr_index <= idx_to) { + return req; + } + } + return NULL; +} + /* * Check if any page in a range is under writeback * @@ -340,21 +358,10 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_req *req; - bool found = false; + bool found; spin_lock(&fc->lock); - list_for_each_entry(req, &fi->writepages, writepages_entry) { - pgoff_t curr_index; - - BUG_ON(req->inode != inode); - curr_index = req->misc.write.in.offset >> PAGE_SHIFT; - if (idx_from < curr_index + req->num_pages && - curr_index <= idx_to) { - found = true; - break; - } - } + found = fuse_find_writeback(fi, idx_from, idx_to); spin_unlock(&fc->lock); return found; @@ -1744,25 +1751,17 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, struct fuse_inode *fi = get_fuse_inode(new_req->inode); struct fuse_req *tmp; struct fuse_req *old_req; - bool found = false; pgoff_t curr_index; BUG_ON(new_req->num_pages != 0); spin_lock(&fc->lock); list_del(&new_req->writepages_entry); - list_for_each_entry(old_req, &fi->writepages, writepages_entry) { - BUG_ON(old_req->inode != new_req->inode); - curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT; - if (curr_index <= page->index && - page->index < curr_index + old_req->num_pages) { - found = true; - break; - } - } - if (!found) { + old_req = fuse_find_writeback(fi, page->index, page->index); + if (!old_req) { list_add(&new_req->writepages_entry, &fi->writepages); - goto out_unlock; + spin_unlock(&fc->lock); + return false; } new_req->num_pages = 1; @@ -1791,10 +1790,9 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, new_req->misc.write.next = old_req->misc.write.next; old_req->misc.write.next = new_req; } -out_unlock: spin_unlock(&fc->lock); out: - return found; + return true; } static int fuse_writepages_fill(struct page *page, -- cgit v1.2.3-59-g8ed1b From 7f305ca1928d8b7db69f428b10988a3aa3e81053 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: fuse: clean up fuse_writepage_in_flight() Restructure the function to better separate the locked and the unlocked parts. Use the "old_req" local variable to mean only the queued request, and not any auxiliary requests added onto its misc.write.next list. These changes are in preparation for the following patch. Also turn BUG_ON instances into WARN_ON and add a header comment explaining what the function does. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 927d40dec376..8342df29815d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1744,6 +1744,13 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) end_page_writeback(data->orig_pages[i]); } +/* + * First recheck under fi->lock if the offending offset is still under + * writeback. If yes, then iterate write requests, to see if there's one + * already added for a page at this offset. If there's none, then insert this + * new request onto the auxiliary list, otherwise reuse the existing one by + * copying the new page contents over to the old temporary page. + */ static bool fuse_writepage_in_flight(struct fuse_req *new_req, struct page *page) { @@ -1751,9 +1758,8 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, struct fuse_inode *fi = get_fuse_inode(new_req->inode); struct fuse_req *tmp; struct fuse_req *old_req; - pgoff_t curr_index; - BUG_ON(new_req->num_pages != 0); + WARN_ON(new_req->num_pages != 0); spin_lock(&fc->lock); list_del(&new_req->writepages_entry); @@ -1766,32 +1772,34 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, new_req->num_pages = 1; for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { - BUG_ON(tmp->inode != new_req->inode); + pgoff_t curr_index; + + WARN_ON(tmp->inode != new_req->inode); curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT; - if (tmp->num_pages == 1 && - curr_index == page->index) { - old_req = tmp; + if (tmp->num_pages == 1 && curr_index == page->index && + test_bit(FR_PENDING, &tmp->flags)) { + copy_highpage(tmp->pages[0], page); + break; } } - if (old_req->num_pages == 1 && test_bit(FR_PENDING, &old_req->flags)) { - struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); + if (!tmp) { + new_req->misc.write.next = old_req->misc.write.next; + old_req->misc.write.next = new_req; + } - copy_highpage(old_req->pages[0], page); - spin_unlock(&fc->lock); + spin_unlock(&fc->lock); + + if (tmp) { + struct backing_dev_info *bdi = inode_to_bdi(new_req->inode); dec_wb_stat(&bdi->wb, WB_WRITEBACK); dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP); wb_writeout_inc(&bdi->wb); fuse_writepage_free(fc, new_req); fuse_request_free(new_req); - goto out; - } else { - new_req->misc.write.next = old_req->misc.write.next; - old_req->misc.write.next = new_req; } - spin_unlock(&fc->lock); -out: + return true; } -- cgit v1.2.3-59-g8ed1b From 419234d5958b8ec4f5e2ba8ed2e77916f844ded1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 16 Jan 2019 10:27:59 +0100 Subject: fuse: only reuse auxiliary request in fuse_writepage_in_flight() Don't reuse the queued request, even if it only contains a single page. This is needed because previous locking changes (spliting out fiq->waitq.lock from fc->lock) broke the assumption that request will remain in FR_PENDING at least until the new page contents are copied. This fix removes a slight optimization for a rare corner case, so we really shoudln't care. Reported-by: Kirill Tkhai Fixes: fd22d62ed0c3 ("fuse: no fc->lock for iqueue parts") Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8342df29815d..b0c32a74082f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1746,9 +1746,9 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) /* * First recheck under fi->lock if the offending offset is still under - * writeback. If yes, then iterate write requests, to see if there's one - * already added for a page at this offset. If there's none, then insert this - * new request onto the auxiliary list, otherwise reuse the existing one by + * writeback. If yes, then iterate auxiliary write requests, to see if there's + * one already added for a page at this offset. If there's none, then insert + * this new request onto the auxiliary list, otherwise reuse the existing one by * copying the new page contents over to the old temporary page. */ static bool fuse_writepage_in_flight(struct fuse_req *new_req, @@ -1771,13 +1771,14 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, } new_req->num_pages = 1; - for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { + for (tmp = old_req->misc.write.next; tmp; tmp = tmp->misc.write.next) { pgoff_t curr_index; WARN_ON(tmp->inode != new_req->inode); curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT; - if (tmp->num_pages == 1 && curr_index == page->index && - test_bit(FR_PENDING, &tmp->flags)) { + if (curr_index == page->index) { + WARN_ON(tmp->num_pages != 1); + WARN_ON(!test_bit(FR_PENDING, &tmp->flags)); copy_highpage(tmp->pages[0], page); break; } -- cgit v1.2.3-59-g8ed1b From e2653bd53a98412ff2fc6fa6a6ed3934da04a3f3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:15 +0100 Subject: fuse: fix leaked aux requests Auxiliary requests chained on req->misc.write.next may be leaked on truncate. Free these as well if the parent request was truncated off. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b0c32a74082f..ee59599f4947 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1494,6 +1494,7 @@ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, __releases(fc->lock) __acquires(fc->lock) { + struct fuse_req *aux, *next; struct fuse_inode *fi = get_fuse_inode(req->inode); struct fuse_write_in *inarg = &req->misc.write.in; __u64 data_size = req->num_pages * PAGE_SIZE; @@ -1520,6 +1521,15 @@ __acquires(fc->lock) out_free: fuse_writepage_finish(fc, req); spin_unlock(&fc->lock); + + /* After fuse_writepage_finish() aux request list is private */ + for (aux = req->misc.write.next; aux; aux = next) { + next = aux->misc.write.next; + aux->misc.write.next = NULL; + fuse_writepage_free(fc, aux); + fuse_put_request(fc, aux); + } + fuse_writepage_free(fc, req); fuse_put_request(fc, req); spin_lock(&fc->lock); -- cgit v1.2.3-59-g8ed1b From c5de16cca2d7268833abfd6456d73fbba447c19b Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 26 Nov 2018 12:46:20 +0300 Subject: fuse: Replace page without copying in fuse_writepage_in_flight() It looks like we can optimize page replacement and avoid copying by simple updating the request's page. [SzM: swap with new request's tmp page to avoid use after free.] Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ee59599f4947..6b6e2574e733 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1789,7 +1789,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, if (curr_index == page->index) { WARN_ON(tmp->num_pages != 1); WARN_ON(!test_bit(FR_PENDING, &tmp->flags)); - copy_highpage(tmp->pages[0], page); + swap(tmp->pages[0], new_req->pages[0]); break; } } -- cgit v1.2.3-59-g8ed1b From 340617508d24d699c02a1e8a5ab3f53492f53386 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 6 Nov 2018 12:15:20 +0300 Subject: fuse: Remove stale comment in end_requests() Function end_requests() does not take fc->lock. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 809c0f2f9942..3feb071a0483 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2109,11 +2109,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) return mask; } -/* - * Abort all requests on the given list (pending or processing) - * - * This function releases and reacquires fc->lock - */ +/* Abort all requests on the given list (pending or processing) */ static void end_requests(struct fuse_conn *fc, struct list_head *head) { while (!list_empty(head)) { -- cgit v1.2.3-59-g8ed1b From 8da6e9183275c837e7d2401b3018ff81f657a19a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 8 Nov 2018 12:05:20 +0300 Subject: fuse: Kill fasync only if interrupt is queued in queue_interrupt() We should sent signal only in case of interrupt is really queued. Not a real problem, but this makes the code clearer and intuitive. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3feb071a0483..11246e7965e8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -480,9 +480,9 @@ static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); wake_up_locked(&fiq->waitq); + kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } spin_unlock(&fiq->waitq.lock); - kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -- cgit v1.2.3-59-g8ed1b From 217316a601016d0d2913290070e6338576ae73f6 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 8 Nov 2018 12:05:25 +0300 Subject: fuse: Optimize request_end() by not taking fiq->waitq.lock We take global fiq->waitq.lock every time, when we are in this function, but interrupted requests are just small subset of all requests. This patch optimizes request_end() and makes it to take the lock when it's really needed. queue_interrupt() needs small change for that. After req is linked to interrupt list, we do smp_mb() and check for FR_FINISHED again. In case of FR_FINISHED bit has appeared, we remove req and leave the function: Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 11246e7965e8..74171d568621 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -431,10 +431,16 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; - - spin_lock(&fiq->waitq.lock); - list_del_init(&req->intr_entry); - spin_unlock(&fiq->waitq.lock); + /* + * test_and_set_bit() implies smp_mb() between bit + * changing and below intr_entry check. Pairs with + * smp_mb() from queue_interrupt(). + */ + if (!list_empty(&req->intr_entry)) { + spin_lock(&fiq->waitq.lock); + list_del_init(&req->intr_entry); + spin_unlock(&fiq->waitq.lock); + } WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); if (test_bit(FR_BACKGROUND, &req->flags)) { @@ -473,12 +479,18 @@ put_request: static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); - if (test_bit(FR_FINISHED, &req->flags)) { - spin_unlock(&fiq->waitq.lock); - return; - } if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); + /* + * Pairs with smp_mb() implied by test_and_set_bit() + * from request_end(). + */ + smp_mb(); + if (test_bit(FR_FINISHED, &req->flags)) { + list_del_init(&req->intr_entry); + spin_unlock(&fiq->waitq.lock); + return; + } wake_up_locked(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } -- cgit v1.2.3-59-g8ed1b From 5e0fed717a383ce6e894334cffe7a2acc9dc17b9 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 8 Nov 2018 12:05:31 +0300 Subject: fuse: Wake up req->waitq of only if not background Currently, we wait on req->waitq in request_wait_answer() function only, and it's never used for background requests. Since wake_up() is not a light-weight macros, instead of this, it unfolds in really called function, which makes locking operations taking some cpu cycles, let's avoid its call for the case we definitely know it's completely useless. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 74171d568621..0a69656402e5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -468,8 +468,11 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) fc->active_background--; flush_bg_queue(fc); spin_unlock(&fc->bg_lock); + } else { + /* Wake up waiter sleeping in request_wait_answer() */ + wake_up(&req->waitq); } - wake_up(&req->waitq); + if (req->end) req->end(fc, req); put_request: -- cgit v1.2.3-59-g8ed1b From 7407a10de57f5810f3f0e778eb1db0923d24ab16 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 8 Nov 2018 12:05:36 +0300 Subject: fuse: Do some refactoring in fuse_dev_do_write() This is needed for next patch. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0a69656402e5..682c7914a0a0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1915,16 +1915,17 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_req *req; struct fuse_out_header oh; + err = -EINVAL; if (nbytes < sizeof(struct fuse_out_header)) - return -EINVAL; + goto out; err = fuse_copy_one(cs, &oh, sizeof(oh)); if (err) - goto err_finish; + goto copy_finish; err = -EINVAL; if (oh.len != nbytes) - goto err_finish; + goto copy_finish; /* * Zero oh.unique indicates unsolicited notification message @@ -1932,41 +1933,40 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, */ if (!oh.unique) { err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); - return err ? err : nbytes; + goto out; } err = -EINVAL; if (oh.error <= -1000 || oh.error > 0) - goto err_finish; + goto copy_finish; spin_lock(&fpq->lock); - err = -ENOENT; - if (!fpq->connected) - goto err_unlock_pq; + req = NULL; + if (fpq->connected) + req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT); - req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT); - if (!req) - goto err_unlock_pq; + err = -ENOENT; + if (!req) { + spin_unlock(&fpq->lock); + goto copy_finish; + } /* Is it an interrupt reply ID? */ if (oh.unique & FUSE_INT_REQ_BIT) { __fuse_get_request(req); spin_unlock(&fpq->lock); - err = -EINVAL; - if (nbytes != sizeof(struct fuse_out_header)) { - fuse_put_request(fc, req); - goto err_finish; - } - - if (oh.error == -ENOSYS) + err = 0; + if (nbytes != sizeof(struct fuse_out_header)) + err = -EINVAL; + else if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) queue_interrupt(&fc->iq, req); + fuse_put_request(fc, req); - fuse_copy_finish(cs); - return nbytes; + goto copy_finish; } clear_bit(FR_SENT, &req->flags); @@ -1992,14 +1992,12 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, spin_unlock(&fpq->lock); request_end(fc, req); - +out: return err ? err : nbytes; - err_unlock_pq: - spin_unlock(&fpq->lock); - err_finish: +copy_finish: fuse_copy_finish(cs); - return err; + goto out; } static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from) -- cgit v1.2.3-59-g8ed1b From b782911b5297c4be0c2de88fc3b36a760eca6321 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 8 Nov 2018 12:05:42 +0300 Subject: fuse: Verify userspace asks to requeue interrupt that we really sent When queue_interrupt() is called from fuse_dev_do_write(), it came from userspace directly. Userspace may pass any request id, even the request's we have not interrupted (or even background's request). This patch adds sanity check to make kernel safe against that. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 682c7914a0a0..ed9318d4e7dc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -479,9 +479,15 @@ put_request: fuse_put_request(fc, req); } -static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) +static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); + /* Check for we've sent request to interrupt this req */ + if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) { + spin_unlock(&fiq->waitq.lock); + return -EINVAL; + } + if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); /* @@ -492,12 +498,13 @@ static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) if (test_bit(FR_FINISHED, &req->flags)) { list_del_init(&req->intr_entry); spin_unlock(&fiq->waitq.lock); - return; + return 0; } wake_up_locked(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); } spin_unlock(&fiq->waitq.lock); + return 0; } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) @@ -1962,7 +1969,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, else if (oh.error == -ENOSYS) fc->no_interrupt = 1; else if (oh.error == -EAGAIN) - queue_interrupt(&fc->iq, req); + err = queue_interrupt(&fc->iq, req); fuse_put_request(fc, req); -- cgit v1.2.3-59-g8ed1b From ebf84d0c7220c7c9b904c405e61175d2a50cfb39 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Nov 2018 13:33:11 +0300 Subject: fuse: Add fuse_inode argument to fuse_prepare_release() Here is preparation for next patches, which introduce new fi->lock for protection of ff->write_entry linked into fi->write_files. This patch just passes new argument to the function. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/cuse.c | 3 ++- fs/fuse/dir.c | 6 ++++-- fs/fuse/file.c | 10 ++++++---- fs/fuse/fuse_i.h | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 8f68181256c0..d73eba592ba1 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -141,10 +141,11 @@ static int cuse_open(struct inode *inode, struct file *file) static int cuse_release(struct inode *inode, struct file *file) { + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; - fuse_sync_release(ff, file->f_flags); + fuse_sync_release(fi, ff, file->f_flags); fuse_conn_put(fc); return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e909678afa2d..46c7430b94fc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -400,6 +400,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; + struct fuse_inode *fi; struct fuse_file *ff; /* Userspace expects S_IFREG in create mode */ @@ -451,7 +452,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - fuse_sync_release(ff, flags); + fuse_sync_release(NULL, ff, flags); fuse_queue_forget(fc, forget, outentry.nodeid, 1); err = -ENOMEM; goto out_err; @@ -462,7 +463,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, fuse_dir_changed(dir); err = finish_open(file, entry, generic_file_open); if (err) { - fuse_sync_release(ff, flags); + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); } else { file->private_data = ff; fuse_finish_open(inode, file); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6b6e2574e733..cecd9b7e0b7f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -224,7 +224,8 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) return err; } -static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) +static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, + int flags, int opcode) { struct fuse_conn *fc = ff->fc; struct fuse_req *req = ff->reserved_req; @@ -249,11 +250,12 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) void fuse_release_common(struct file *file, bool isdir) { + struct fuse_inode *fi = get_fuse_inode(file_inode(file)); struct fuse_file *ff = file->private_data; struct fuse_req *req = ff->reserved_req; int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; - fuse_prepare_release(ff, file->f_flags, opcode); + fuse_prepare_release(fi, ff, file->f_flags, opcode); if (ff->flock) { struct fuse_release_in *inarg = &req->misc.release.in; @@ -295,10 +297,10 @@ static int fuse_release(struct inode *inode, struct file *file) return 0; } -void fuse_sync_release(struct fuse_file *ff, int flags) +void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags) { WARN_ON(refcount_read(&ff->count) > 1); - fuse_prepare_release(ff, flags, FUSE_RELEASE); + fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); /* * iput(NULL) is a no-op and since the refcount is 1 and everything's * synchronous, we are fine with not doing igrab() here" diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2f2c92e6f8cb..1b536b6c20f9 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -817,7 +817,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file); -void fuse_sync_release(struct fuse_file *ff, int flags); +void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request -- cgit v1.2.3-59-g8ed1b From 4510d86fbbb36872224482bb21836d47cce8be8c Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Nov 2018 13:33:17 +0300 Subject: fuse: Convert fc->attr_version into atomic64_t This patch makes fc->attr_version of atomic64_t type, so fc->lock won't be needed to read or modify it anymore. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 19 ++----------------- fs/fuse/file.c | 8 ++++---- fs/fuse/fuse_i.h | 9 ++++++--- fs/fuse/inode.c | 4 ++-- 4 files changed, 14 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 46c7430b94fc..10106f23e9e8 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -149,21 +149,6 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, args->out.args[0].value = outarg; } -u64 fuse_get_attr_version(struct fuse_conn *fc) -{ - u64 curr_version; - - /* - * The spin lock isn't actually needed on 64bit archs, but we - * don't yet care too much about such optimizations. - */ - spin_lock(&fc->lock); - curr_version = fc->attr_version; - spin_unlock(&fc->lock); - - return curr_version; -} - /* * Check whether the dentry is still valid * @@ -674,7 +659,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); /* * If i_nlink == 0 then unlink doesn't make sense, yet this can * happen if userspace filesystem is careless. It would be @@ -828,7 +813,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); inc_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cecd9b7e0b7f..d1becba30bf3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -187,7 +187,7 @@ void fuse_finish_open(struct inode *inode, struct file *file) struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, 0); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); @@ -608,7 +608,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); spin_unlock(&fc->lock); } @@ -687,7 +687,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, spin_lock(&fc->lock); if (attr_ver == fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); } spin_unlock(&fc->lock); @@ -1006,7 +1006,7 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos) bool ret = false; spin_lock(&fc->lock); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); if (pos > inode->i_size) { i_size_write(inode, pos); ret = true; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1b536b6c20f9..47c7b2238f91 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -730,7 +730,7 @@ struct fuse_conn { struct fuse_req *destroy_req; /** Version counter for attribute changes */ - u64 attr_version; + atomic64_t attr_version; /** Called on final put */ void (*release)(struct fuse_conn *); @@ -770,6 +770,11 @@ static inline int invalid_nodeid(u64 nodeid) return !nodeid || nodeid == FUSE_ROOT_ID; } +static inline u64 fuse_get_attr_version(struct fuse_conn *fc) +{ + return atomic64_read(&fc->attr_version); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; @@ -1000,8 +1005,6 @@ void fuse_flush_writepages(struct inode *inode); void fuse_set_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode); -u64 fuse_get_attr_version(struct fuse_conn *fc); - /** * File-system tells the kernel to invalidate cache for the given node id. */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c2d4099429be..86e86fcd6267 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -163,7 +163,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - fi->attr_version = ++fc->attr_version; + fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; WRITE_ONCE(fi->inval_mask, 0); @@ -624,7 +624,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) fc->blocked = 0; fc->initialized = 0; fc->connected = 1; - fc->attr_version = 1; + atomic64_set(&fc->attr_version, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); -- cgit v1.2.3-59-g8ed1b From f15ecfef058d94d03bdb35dcdfda041b3de9d543 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Nov 2018 13:33:22 +0300 Subject: fuse: Introduce fi->lock to protect write related fields To minimize contention of fc->lock, this patch introduces a new spinlock for protection fuse_inode metadata: fuse_inode: writectr writepages write_files queued_writes attr_version inode: i_size i_nlink i_mtime i_ctime Also, it protects the fields changed in fuse_change_attributes_common() (too many to list). Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 25 ++++++++------- fs/fuse/file.c | 93 +++++++++++++++++++++++++++++--------------------------- fs/fuse/fuse_i.h | 5 ++- fs/fuse/inode.c | 9 ++++-- 4 files changed, 70 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 10106f23e9e8..eb7dfb0d9513 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -658,7 +658,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct inode *inode = d_inode(entry); struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); /* * If i_nlink == 0 then unlink doesn't make sense, yet this can @@ -668,7 +668,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) */ if (inode->i_nlink > 0) drop_nlink(inode); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fuse_invalidate_attr(inode); fuse_dir_changed(dir); fuse_invalidate_entry_cache(entry); @@ -812,10 +812,10 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, if (!err) { struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); inc_nlink(inode); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fuse_invalidate_attr(inode); fuse_update_ctime(inode); } else if (err == -EINTR) { @@ -1343,15 +1343,14 @@ static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, */ void fuse_set_nowrite(struct inode *inode) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!inode_is_locked(inode)); - spin_lock(&fc->lock); + spin_lock(&fi->lock); BUG_ON(fi->writectr < 0); fi->writectr += FUSE_NOWRITE; - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); } @@ -1372,11 +1371,11 @@ static void __fuse_release_nowrite(struct inode *inode) void fuse_release_nowrite(struct inode *inode) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); __fuse_release_nowrite(inode); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args, @@ -1511,7 +1510,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, goto error; } - spin_lock(&fc->lock); + spin_lock(&fi->lock); /* the kernel maintains i_mtime locally */ if (trust_local_cmtime) { if (attr->ia_valid & ATTR_MTIME) @@ -1529,10 +1528,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, i_size_write(inode, outarg.attr.size); if (is_truncate) { - /* NOTE: this may release/reacquire fc->lock */ + /* NOTE: this may release/reacquire fi->lock */ __fuse_release_nowrite(inode); } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); /* * Only call invalidate_inode_pages2() after removing diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d1becba30bf3..26c2523120bc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -159,17 +159,16 @@ EXPORT_SYMBOL_GPL(fuse_do_open); static void fuse_link_write_file(struct file *file) { struct inode *inode = file_inode(file); - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_file *ff = file->private_data; /* * file may be written through mmap, so chain it onto the * inodes's write_file list */ - spin_lock(&fc->lock); + spin_lock(&fi->lock); if (list_empty(&ff->write_entry)) list_add(&ff->write_entry, &fi->write_files); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } void fuse_finish_open(struct inode *inode, struct file *file) @@ -186,10 +185,10 @@ void fuse_finish_open(struct inode *inode, struct file *file) if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, 0); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fuse_invalidate_attr(inode); if (fc->writeback_cache) file_update_time(file); @@ -231,8 +230,13 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release.in; + /* Inode is NULL on error path of fuse_create_open() */ + if (likely(fi)) { + spin_lock(&fi->lock); + list_del(&ff->write_entry); + spin_unlock(&fi->lock); + } spin_lock(&fc->lock); - list_del(&ff->write_entry); if (!RB_EMPTY_NODE(&ff->polled_node)) rb_erase(&ff->polled_node, &fc->polled_files); spin_unlock(&fc->lock); @@ -358,13 +362,12 @@ static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi, static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, pgoff_t idx_to) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); bool found; - spin_lock(&fc->lock); + spin_lock(&fi->lock); found = fuse_find_writeback(fi, idx_from, idx_to); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); return found; } @@ -607,9 +610,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } io->iocb->ki_complete(io->iocb, res, 0); @@ -684,13 +687,13 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); if (attr_ver == fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } static void fuse_short_read(struct fuse_req *req, struct inode *inode, @@ -1005,13 +1008,13 @@ bool fuse_write_update_size(struct inode *inode, loff_t pos) struct fuse_inode *fi = get_fuse_inode(inode); bool ret = false; - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); if (pos > inode->i_size) { i_size_write(inode, pos); ret = true; } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); return ret; } @@ -1490,11 +1493,11 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) wake_up(&fi->page_waitq); } -/* Called under fc->lock, may release and reacquire it */ +/* Called under fi->lock, may release and reacquire it */ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, loff_t size) -__releases(fc->lock) -__acquires(fc->lock) +__releases(fi->lock) +__acquires(fi->lock) { struct fuse_req *aux, *next; struct fuse_inode *fi = get_fuse_inode(req->inode); @@ -1502,9 +1505,6 @@ __acquires(fc->lock) __u64 data_size = req->num_pages * PAGE_SIZE; bool queued; - if (!fc->connected) - goto out_free; - if (inarg->offset + data_size <= size) { inarg->size = data_size; } else if (inarg->offset < size) { @@ -1515,14 +1515,17 @@ __acquires(fc->lock) } req->in.args[1].size = inarg->size; - fi->writectr++; queued = fuse_request_queue_background(fc, req); - WARN_ON(!queued); + /* Fails on broken connection only */ + if (unlikely(!queued)) + goto out_free; + + fi->writectr++; return; out_free: fuse_writepage_finish(fc, req); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); /* After fuse_writepage_finish() aux request list is private */ for (aux = req->misc.write.next; aux; aux = next) { @@ -1534,18 +1537,18 @@ __acquires(fc->lock) fuse_writepage_free(fc, req); fuse_put_request(fc, req); - spin_lock(&fc->lock); + spin_lock(&fi->lock); } /* * If fi->writectr is positive (no truncate or fsync going on) send * all queued writepage requests. * - * Called with fc->lock + * Called with fi->lock */ void fuse_flush_writepages(struct inode *inode) -__releases(fc->lock) -__acquires(fc->lock) +__releases(fi->lock) +__acquires(fi->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1565,7 +1568,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) struct fuse_inode *fi = get_fuse_inode(inode); mapping_set_error(inode->i_mapping, req->out.h.error); - spin_lock(&fc->lock); + spin_lock(&fi->lock); while (req->misc.write.next) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; @@ -1602,7 +1605,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) } fi->writectr--; fuse_writepage_finish(fc, req); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fuse_writepage_free(fc, req); } @@ -1611,13 +1614,13 @@ static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, { struct fuse_file *ff = NULL; - spin_lock(&fc->lock); + spin_lock(&fi->lock); if (!list_empty(&fi->write_files)) { ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); fuse_file_get(ff); } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); return ff; } @@ -1688,11 +1691,11 @@ static int fuse_writepage_locked(struct page *page) inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); - spin_lock(&fc->lock); + spin_lock(&fi->lock); list_add(&req->writepages_entry, &fi->writepages); list_add_tail(&req->list, &fi->queued_writes); fuse_flush_writepages(inode); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); end_page_writeback(page); @@ -1741,16 +1744,15 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) { struct fuse_req *req = data->req; struct inode *inode = data->inode; - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); int num_pages = req->num_pages; int i; req->ff = fuse_file_get(data->ff); - spin_lock(&fc->lock); + spin_lock(&fi->lock); list_add_tail(&req->list, &fi->queued_writes); fuse_flush_writepages(inode); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); for (i = 0; i < num_pages; i++) end_page_writeback(data->orig_pages[i]); @@ -1773,12 +1775,12 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, WARN_ON(new_req->num_pages != 0); - spin_lock(&fc->lock); + spin_lock(&fi->lock); list_del(&new_req->writepages_entry); old_req = fuse_find_writeback(fi, page->index, page->index); if (!old_req) { list_add(&new_req->writepages_entry, &fi->writepages); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); return false; } @@ -1801,7 +1803,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, old_req->misc.write.next = new_req; } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); if (tmp) { struct backing_dev_info *bdi = inode_to_bdi(new_req->inode); @@ -1822,6 +1824,7 @@ static int fuse_writepages_fill(struct page *page, struct fuse_fill_wb_data *data = _data; struct fuse_req *req = data->req; struct inode *inode = data->inode; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct page *tmp_page; bool is_writeback; @@ -1892,9 +1895,9 @@ static int fuse_writepages_fill(struct page *page, req->end = fuse_writepage_end; req->inode = inode; - spin_lock(&fc->lock); + spin_lock(&fi->lock); list_add(&req->writepages_entry, &fi->writepages); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); data->req = req; } @@ -1917,12 +1920,12 @@ static int fuse_writepages_fill(struct page *page, data->orig_pages[req->num_pages] = page; /* - * Protected by fc->lock against concurrent access by + * Protected by fi->lock against concurrent access by * fuse_page_is_writeback(). */ - spin_lock(&fc->lock); + spin_lock(&fi->lock); req->num_pages++; - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); out_unlock: unlock_page(page); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 47c7b2238f91..1c610b65b1bf 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -96,7 +96,7 @@ struct fuse_inode { union { /* Write related fields (regular file only) */ struct { - /* Files usable in writepage. Protected by fc->lock */ + /* Files usable in writepage. Protected by fi->lock */ struct list_head write_files; /* Writepages pending on truncate or fsync */ @@ -144,6 +144,9 @@ struct fuse_inode { /** Lock for serializing lookup and readdir for back compatibility*/ struct mutex mutex; + + /** Lock to protect write related fields */ + spinlock_t lock; }; /** FUSE inode state bits */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 86e86fcd6267..4c767175bd34 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->orig_ino = 0; fi->state = 0; mutex_init(&fi->mutex); + spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -163,6 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + lockdep_assert_held(&fi->lock); + fi->attr_version = atomic64_inc_return(&fc->attr_version); fi->i_time = attr_valid; WRITE_ONCE(fi->inval_mask, 0); @@ -209,10 +212,10 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, loff_t oldsize; struct timespec64 old_mtime; - spin_lock(&fc->lock); + spin_lock(&fi->lock); if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); return; } @@ -227,7 +230,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, */ if (!is_wb || !S_ISREG(inode->i_mode)) i_size_write(inode, attr->size); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); if (!is_wb && S_ISREG(inode->i_mode)) { bool inval = false; -- cgit v1.2.3-59-g8ed1b From c9d8f5f0692d5960ed50970ffe63756fb8f96cdb Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Nov 2018 13:33:27 +0300 Subject: fuse: Protect fi->nlookup with fi->lock This continues previous patch and introduces the same protection for nlookup field. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 4 ++-- fs/fuse/inode.c | 4 ++-- fs/fuse/readdir.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index eb7dfb0d9513..dd0f64f7bc06 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -207,9 +207,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) fuse_queue_forget(fc, forget, outarg.nodeid, 1); goto invalid; } - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->nlookup++; - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } kfree(forget); if (ret == -ENOMEM) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4c767175bd34..3d1a63e95f69 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -325,9 +325,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, } fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->nlookup++; - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fuse_change_attributes(inode, attr, attr_valid, attr_version); return inode; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index ab18b78f4755..574d03f8a573 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -213,9 +213,9 @@ retry: } fi = get_fuse_inode(inode); - spin_lock(&fc->lock); + spin_lock(&fi->lock); fi->nlookup++; - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); forget_all_cached_acls(inode); fuse_change_attributes(inode, &o->attr, -- cgit v1.2.3-59-g8ed1b From 6b675738ce900dac8f8478b1d8487df420a315f3 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 9 Nov 2018 13:33:32 +0300 Subject: fuse: Protect ff->reserved_req via corresponding fi->lock This is rather natural action after previous patches, and it just decreases load of fc->lock. Signed-off-by: Kirill Tkhai Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 ++++++---- fs/fuse/fuse_i.h | 5 ++++- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ed9318d4e7dc..5a7309427e0e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -251,17 +251,18 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, struct file *file) { struct fuse_req *req = NULL; + struct fuse_inode *fi = get_fuse_inode(file_inode(file)); struct fuse_file *ff = file->private_data; do { wait_event(fc->reserved_req_waitq, ff->reserved_req); - spin_lock(&fc->lock); + spin_lock(&fi->lock); if (ff->reserved_req) { req = ff->reserved_req; ff->reserved_req = NULL; req->stolen_file = get_file(file); } - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); } while (!req); return req; @@ -273,16 +274,17 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc, static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) { struct file *file = req->stolen_file; + struct fuse_inode *fi = get_fuse_inode(file_inode(file)); struct fuse_file *ff = file->private_data; WARN_ON(req->max_pages); - spin_lock(&fc->lock); + spin_lock(&fi->lock); memset(req, 0, sizeof(*req)); fuse_request_init(req, NULL, NULL, 0); BUG_ON(ff->reserved_req); ff->reserved_req = req; wake_up_all(&fc->reserved_req_waitq); - spin_unlock(&fc->lock); + spin_unlock(&fi->lock); fput(file); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1c610b65b1bf..4fdd098e422d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -166,7 +166,10 @@ struct fuse_file { /** Fuse connection for this file */ struct fuse_conn *fc; - /** Request reserved for flush and release */ + /* + * Request reserved for flush and release. + * Modified under relative fuse_inode::lock. + */ struct fuse_req *reserved_req; /** Kernel file handle guaranteed to be unique */ -- cgit v1.2.3-59-g8ed1b From eb98e3bdf3aa7b15b40c65063ea935f953f60c6b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:16 +0100 Subject: fuse: clean up aborted The only caller that needs fc->aborted set is fuse_conn_abort_write(). Setting fc->aborted is now racy (fuse_abort_conn() may already be in progress or finished) but there's no reason to care. Signed-off-by: Miklos Szeredi --- fs/fuse/control.c | 4 +++- fs/fuse/cuse.c | 4 ++-- fs/fuse/dev.c | 9 ++++----- fs/fuse/fuse_i.h | 2 +- fs/fuse/inode.c | 4 ++-- 5 files changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 989df5accaee..fe80bea4ad89 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -35,7 +35,9 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { - fuse_abort_conn(fc, true); + if (fc->abort_err) + fc->aborted = true; + fuse_abort_conn(fc); fuse_conn_put(fc); } return count; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index d73eba592ba1..55a26f351467 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -408,7 +408,7 @@ err_unlock: err_region: unregister_chrdev_region(devt, 1); err: - fuse_abort_conn(fc, false); + fuse_abort_conn(fc); goto out; } @@ -587,7 +587,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, { struct cuse_conn *cc = dev_get_drvdata(dev); - fuse_abort_conn(&cc->fc, false); + fuse_abort_conn(&cc->fc); return count; } static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5a7309427e0e..8a63e52785e9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1330,7 +1330,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, goto err_unlock; if (!fiq->connected) { - err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV; + err = fc->aborted ? -ECONNABORTED : -ENODEV; goto err_unlock; } @@ -1377,7 +1377,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { - err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV; + err = fc->aborted ? -ECONNABORTED : -ENODEV; goto out_end; } if (err) { @@ -2177,7 +2177,7 @@ static void end_polls(struct fuse_conn *fc) * is OK, the request will in that case be removed from the list before we touch * it. */ -void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) +void fuse_abort_conn(struct fuse_conn *fc) { struct fuse_iqueue *fiq = &fc->iq; @@ -2193,7 +2193,6 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) fc->connected = 0; spin_unlock(&fc->bg_lock); - fc->aborted = is_abort; fuse_set_initialized(fc); list_for_each_entry(fud, &fc->devices, entry) { struct fuse_pqueue *fpq = &fud->pq; @@ -2271,7 +2270,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) /* Are we the last open device? */ if (atomic_dec_and_test(&fc->dev_count)) { WARN_ON(fc->iq.fasync != NULL); - fuse_abort_conn(fc, false); + fuse_abort_conn(fc); } fuse_dev_free(fud); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4fdd098e422d..b1ac587671ac 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -947,7 +947,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req); bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req); /* Abort all requests */ -void fuse_abort_conn(struct fuse_conn *fc, bool is_abort); +void fuse_abort_conn(struct fuse_conn *fc); void fuse_wait_aborted(struct fuse_conn *fc); /** diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3d1a63e95f69..11aac2f4eda1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -379,7 +379,7 @@ void fuse_unlock_inode(struct inode *inode, bool locked) static void fuse_umount_begin(struct super_block *sb) { - fuse_abort_conn(get_fuse_conn_super(sb), false); + fuse_abort_conn(get_fuse_conn_super(sb)); } static void fuse_send_destroy(struct fuse_conn *fc) @@ -1245,7 +1245,7 @@ static void fuse_sb_destroy(struct super_block *sb) if (fc) { fuse_send_destroy(fc); - fuse_abort_conn(fc, false); + fuse_abort_conn(fc); fuse_wait_aborted(fc); down_write(&fc->killsb); -- cgit v1.2.3-59-g8ed1b From 75126f5504524dd0f24753d8815db42d9ab23614 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:17 +0100 Subject: fuse: use atomic64_t for khctr ...to get rid of one more fc->lock use. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +--- fs/fuse/fuse_i.h | 2 +- fs/fuse/inode.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 26c2523120bc..b2a4fab08cb4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -64,9 +64,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) RB_CLEAR_NODE(&ff->polled_node); init_waitqueue_head(&ff->poll_wait); - spin_lock(&fc->lock); - ff->kh = ++fc->khctr; - spin_unlock(&fc->lock); + ff->kh = atomic64_inc_return(&fc->khctr); return ff; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b1ac587671ac..033e30af519f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -544,7 +544,7 @@ struct fuse_conn { struct fuse_iqueue iq; /** The next unique kernel file handle */ - u64 khctr; + atomic64_t khctr; /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 11aac2f4eda1..2bbb7c59d6da 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -622,7 +622,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns) atomic_set(&fc->num_waiting, 0); fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; - fc->khctr = 0; + atomic64_set(&fc->khctr, 0); fc->polled_files = RB_ROOT; fc->blocked = 0; fc->initialized = 0; -- cgit v1.2.3-59-g8ed1b From 23c94e1cdcbf5953cd380555d0781caa42311870 Mon Sep 17 00:00:00 2001 From: Martin Raiber Date: Sat, 27 Oct 2018 16:48:48 +0000 Subject: fuse: Switch to using async direct IO for FOPEN_DIRECT_IO Switch to using the async directo IO code path in fuse_direct_read_iter() and fuse_direct_write_iter(). This is especially important in connection with loop devices with direct IO enabled as loop assumes async direct io is actually async. Signed-off-by: Martin Raiber Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b2a4fab08cb4..383843c7c0e9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1436,10 +1436,26 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, return res; } +static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter); + static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); - return __fuse_direct_read(&io, to, &iocb->ki_pos); + ssize_t res; + + if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { + struct file *file = iocb->ki_filp; + + if (is_bad_inode(file_inode(file))) + return -EIO; + + res = fuse_direct_IO(iocb, to); + } else { + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); + + res = __fuse_direct_read(&io, to, &iocb->ki_pos); + } + + return res; } static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) @@ -1454,8 +1470,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ inode_lock(inode); res = generic_write_checks(iocb, from); - if (res > 0) - res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); + if (res > 0) { + if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { + res = fuse_direct_IO(iocb, from); + } else { + res = fuse_direct_io(&io, from, &iocb->ki_pos, + FUSE_DIO_WRITE); + } + } fuse_invalidate_attr(inode); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos); -- cgit v1.2.3-59-g8ed1b From 3c3db095b68c5df901d837a01a69dcd2693f85f6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:17 +0100 Subject: fuse: use iov_iter based generic splice helpers The default splice implementation is grossly inefficient and the iter based ones work just fine, so use those instead. I've measured an 8x speedup for splice write (with len = 128k). Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 383843c7c0e9..3e9e57c765f7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3156,6 +3156,7 @@ static const struct file_operations fuse_file_operations = { .lock = fuse_file_lock, .flock = fuse_file_flock, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, @@ -3174,11 +3175,12 @@ static const struct file_operations fuse_direct_io_file_operations = { .fsync = fuse_fsync, .lock = fuse_file_lock, .flock = fuse_file_flock, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, .fallocate = fuse_file_fallocate, - /* no splice_read */ }; static const struct address_space_operations fuse_file_aops = { -- cgit v1.2.3-59-g8ed1b From d4136d60751a5f45f47f1c3a77f6e8bafa11be1f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:17 +0100 Subject: fuse add copy_file_range to direct io fops Nothing preventing copy_file_range to work on files opened with FOPEN_DIRECT_IO. Fixes: 88bc7d5097a1 ("fuse: add support for copy_file_range()") Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3e9e57c765f7..c86266d4eac3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3181,6 +3181,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, .fallocate = fuse_file_fallocate, + .copy_file_range = fuse_copy_file_range, }; static const struct address_space_operations fuse_file_aops = { -- cgit v1.2.3-59-g8ed1b From 55752a3aba1387887afa024a0732f8ae52fb0645 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:17 +0100 Subject: fuse: multiplex cached/direct_io file operations This is cleanup, as well as allowing switching between I/O modes while the file is open in the future. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 71 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c86266d4eac3..e5dfc5e4b999 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -19,8 +19,6 @@ #include #include -static const struct file_operations fuse_direct_io_file_operations; - static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int opcode, struct fuse_open_out *outargp) { @@ -174,8 +172,6 @@ void fuse_finish_open(struct inode *inode, struct file *file) struct fuse_file *ff = file->private_data; struct fuse_conn *fc = get_fuse_conn(inode); - if (ff->open_flags & FOPEN_DIRECT_IO) - file->f_op = &fuse_direct_io_file_operations; if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); if (ff->open_flags & FOPEN_NONSEEKABLE) @@ -929,7 +925,7 @@ out: return err; } -static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); @@ -1183,7 +1179,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, return res > 0 ? res : err; } -static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -1486,6 +1482,26 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) return res; } +static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct fuse_file *ff = iocb->ki_filp->private_data; + + if (!(ff->open_flags & FOPEN_DIRECT_IO)) + return fuse_cache_read_iter(iocb, to); + else + return fuse_direct_read_iter(iocb, to); +} + +static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct fuse_file *ff = iocb->ki_filp->private_data; + + if (!(ff->open_flags & FOPEN_DIRECT_IO)) + return fuse_cache_write_iter(iocb, from); + else + return fuse_direct_write_iter(iocb, from); +} + static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { int i; @@ -2129,6 +2145,18 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_DIRECT_IO) { + /* Can't provide the coherency needed for MAP_SHARED */ + if (vma->vm_flags & VM_MAYSHARE) + return -ENODEV; + + invalidate_inode_pages2(file->f_mapping); + + return generic_file_mmap(file, vma); + } + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) fuse_link_write_file(file); @@ -2137,17 +2165,6 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) -{ - /* Can't provide the coherency needed for MAP_SHARED */ - if (vma->vm_flags & VM_MAYSHARE) - return -ENODEV; - - invalidate_inode_pages2(file->f_mapping); - - return generic_file_mmap(file, vma); -} - static int convert_fuse_file_lock(struct fuse_conn *fc, const struct fuse_file_lock *ffl, struct file_lock *fl) @@ -3164,26 +3181,6 @@ static const struct file_operations fuse_file_operations = { .copy_file_range = fuse_copy_file_range, }; -static const struct file_operations fuse_direct_io_file_operations = { - .llseek = fuse_file_llseek, - .read_iter = fuse_direct_read_iter, - .write_iter = fuse_direct_write_iter, - .mmap = fuse_direct_mmap, - .open = fuse_open, - .flush = fuse_flush, - .release = fuse_release, - .fsync = fuse_fsync, - .lock = fuse_file_lock, - .flock = fuse_file_flock, - .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, - .unlocked_ioctl = fuse_file_ioctl, - .compat_ioctl = fuse_file_compat_ioctl, - .poll = fuse_file_poll, - .fallocate = fuse_file_fallocate, - .copy_file_range = fuse_copy_file_range, -}; - static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, -- cgit v1.2.3-59-g8ed1b From 2f7b6f5bed01a3fd2abcc20d2c85b7c532eb95cd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 24 Jan 2019 10:40:17 +0100 Subject: fuse: lift bad inode checks into callers Bad inode checks were done done in various places, and move them into fuse_file_{read|write}_iter(). Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e5dfc5e4b999..8ee0446a8322 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1131,9 +1131,6 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, int err = 0; ssize_t res = 0; - if (is_bad_inode(inode)) - return -EIO; - if (inode->i_size < pos + iov_iter_count(ii)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); @@ -1422,9 +1419,6 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, ssize_t res; struct inode *inode = file_inode(io->iocb->ki_filp); - if (is_bad_inode(inode)) - return -EIO; - res = fuse_direct_io(io, iter, ppos, 0); fuse_invalidate_atime(inode); @@ -1439,11 +1433,6 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) ssize_t res; if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { - struct file *file = iocb->ki_filp; - - if (is_bad_inode(file_inode(file))) - return -EIO; - res = fuse_direct_IO(iocb, to); } else { struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); @@ -1460,9 +1449,6 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t res; - if (is_bad_inode(inode)) - return -EIO; - /* Don't allow parallel writes to the same file */ inode_lock(inode); res = generic_write_checks(iocb, from); @@ -1484,7 +1470,11 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct fuse_file *ff = iocb->ki_filp->private_data; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + + if (is_bad_inode(file_inode(file))) + return -EIO; if (!(ff->open_flags & FOPEN_DIRECT_IO)) return fuse_cache_read_iter(iocb, to); @@ -1494,7 +1484,11 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct fuse_file *ff = iocb->ki_filp->private_data; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + + if (is_bad_inode(file_inode(file))) + return -EIO; if (!(ff->open_flags & FOPEN_DIRECT_IO)) return fuse_cache_write_iter(iocb, from); -- cgit v1.2.3-59-g8ed1b From d9a9ea94f748f47b1d75c6c5e33edcf74476c445 Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 7 Jan 2019 16:53:17 -0800 Subject: fuse: support clients that don't implement 'opendir' Allow filesystems to return ENOSYS from opendir, preventing the kernel from sending opendir and releasedir messages in the future. This avoids userspace transitions when filesystems don't need to keep track of state per directory handle. A new capability flag, FUSE_NO_OPENDIR_SUPPORT, parallels FUSE_NO_OPEN_SUPPORT, indicating the new semantics for returning ENOSYS from opendir. Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 +++++++---- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 3 ++- include/uapi/linux/fuse.h | 7 ++++++- 4 files changed, 18 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8ee0446a8322..cc6ffd23b80f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -90,7 +90,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) if (refcount_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - if (ff->fc->no_open && !isdir) { + if (isdir ? ff->fc->no_opendir : ff->fc->no_open) { /* * Drop the release request when client does not * implement 'open' @@ -125,7 +125,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, ff->fh = 0; ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */ - if (!fc->no_open || isdir) { + if (isdir ? !fc->no_opendir : !fc->no_open) { struct fuse_open_out outarg; int err; @@ -134,11 +134,14 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, ff->fh = outarg.fh; ff->open_flags = outarg.open_flags; - } else if (err != -ENOSYS || isdir) { + } else if (err != -ENOSYS) { fuse_file_free(ff); return err; } else { - fc->no_open = 1; + if (isdir) + fc->no_opendir = 1; + else + fc->no_open = 1; } } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 033e30af519f..0920c0c032a0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -630,6 +630,9 @@ struct fuse_conn { /** Is open/release not implemented by fs? */ unsigned no_open:1; + /** Is opendir/releasedir not implemented by fs? */ + unsigned no_opendir:1; + /** Is fsync not implemented by fs? */ unsigned no_fsync:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2bbb7c59d6da..1b3f3b67d9f0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -972,7 +972,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | - FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS; + FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | + FUSE_NO_OPENDIR_SUPPORT; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index b4967d48bfda..2ac598614a8f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -122,6 +122,9 @@ * - add FOPEN_CACHE_DIR * - add FUSE_MAX_PAGES, add max_pages to init_out * - add FUSE_CACHE_SYMLINKS + * + * 7.29 + * - add FUSE_NO_OPENDIR_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -157,7 +160,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 28 +#define FUSE_KERNEL_MINOR_VERSION 29 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -259,6 +262,7 @@ struct fuse_file_lock { * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages * FUSE_CACHE_SYMLINKS: cache READLINK responses + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -284,6 +288,7 @@ struct fuse_file_lock { #define FUSE_ABORT_ERROR (1 << 21) #define FUSE_MAX_PAGES (1 << 22) #define FUSE_CACHE_SYMLINKS (1 << 23) +#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) /** * CUSE INIT request/reply flags -- cgit v1.2.3-59-g8ed1b From fabf7e0262d0bd57739d29aeac94c44b0542ff1f Mon Sep 17 00:00:00 2001 From: Chad Austin Date: Mon, 28 Jan 2019 16:34:34 -0800 Subject: fuse: cache readdir calls if filesystem opts out of opendir If a filesystem returns ENOSYS from opendir and thus opts out of opendir and releasedir requests, it almost certainly would also like readdir results cached. Default open_flags to FOPEN_KEEP_CACHE and FOPEN_CACHE_DIR in that case. With this patch, I've measured recursive directory enumeration across large FUSE mounts to be faster than native mounts. Signed-off-by: Chad Austin Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cc6ffd23b80f..06096b60f1df 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -124,7 +124,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, return -ENOMEM; ff->fh = 0; - ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */ + /* Default for no-open */ + ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0); if (isdir ? !fc->no_opendir : !fc->no_open) { struct fuse_open_out outarg; int err; -- cgit v1.2.3-59-g8ed1b