diff options
author | 2021-02-22 21:35:15 -0800 | |
---|---|---|
committer | 2021-02-22 21:35:15 -0800 | |
commit | cbecf716ca618fd44feda6bd9a64a8179d031fc5 (patch) | |
tree | 186c9f69f0d11f773253c440dac85087f67288b7 /mm/filemap.c | |
parent | Input: st1232 - add IDLE state as ready condition (diff) | |
parent | Input: aiptek - convert sysfs sprintf/snprintf family to sysfs_emit (diff) | |
download | wireguard-linux-cbecf716ca618fd44feda6bd9a64a8179d031fc5.tar.xz wireguard-linux-cbecf716ca618fd44feda6bd9a64a8179d031fc5.zip |
Merge branch 'next' into for-linus
Prepare input updates for 5.12 merge window.
Diffstat (limited to '')
-rw-r--r-- | mm/filemap.c | 989 |
1 files changed, 448 insertions, 541 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 99c49eeae71b..aa0e0fb04670 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -102,8 +102,8 @@ * ->swap_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one) * ->i_pages lock (try_to_unmap_one) - * ->pgdat->lru_lock (follow_page->mark_page_accessed) - * ->pgdat->lru_lock (check_pte_range->isolate_lru_page) + * ->lruvec->lru_lock (follow_page->mark_page_accessed) + * ->lruvec->lru_lock (check_pte_range->isolate_lru_page) * ->private_lock (page_remove_rmap->set_page_dirty) * ->i_pages lock (page_remove_rmap->set_page_dirty) * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) @@ -204,9 +204,9 @@ static void unaccount_page_cache_page(struct address_space *mapping, if (PageSwapBacked(page)) { __mod_lruvec_page_state(page, NR_SHMEM, -nr); if (PageTransHuge(page)) - __dec_node_page_state(page, NR_SHMEM_THPS); + __dec_lruvec_page_state(page, NR_SHMEM_THPS); } else if (PageTransHuge(page)) { - __dec_node_page_state(page, NR_FILE_THPS); + __dec_lruvec_page_state(page, NR_FILE_THPS); filemap_nr_thps_dec(mapping); } @@ -249,7 +249,7 @@ static void page_cache_free_page(struct address_space *mapping, freepage(page); if (PageTransHuge(page) && !PageHuge(page)) { - page_ref_sub(page, HPAGE_PMD_NR); + page_ref_sub(page, thp_nr_pages(page)); VM_BUG_ON_PAGE(page_count(page) <= 0, page); } else { put_page(page); @@ -414,7 +414,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, .range_end = end, }; - if (!mapping_cap_writeback_dirty(mapping) || + if (!mapping_can_writeback(mapping) || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; @@ -827,15 +827,15 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -static int __add_to_page_cache_locked(struct page *page, - struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask, - void **shadowp) +noinline int __add_to_page_cache_locked(struct page *page, + struct address_space *mapping, + pgoff_t offset, gfp_t gfp, + void **shadowp) { XA_STATE(xas, &mapping->i_pages, offset); int huge = PageHuge(page); int error; - void *old; + bool charged = false; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); @@ -846,25 +846,47 @@ static int __add_to_page_cache_locked(struct page *page, page->index = offset; if (!huge) { - error = mem_cgroup_charge(page, current->mm, gfp_mask); + error = mem_cgroup_charge(page, current->mm, gfp); if (error) goto error; + charged = true; } + gfp &= GFP_RECLAIM_MASK; + do { + unsigned int order = xa_get_order(xas.xa, xas.xa_index); + void *entry, *old = NULL; + + if (order > thp_order(page)) + xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), + order, gfp); xas_lock_irq(&xas); - old = xas_load(&xas); - if (old && !xa_is_value(old)) - xas_set_err(&xas, -EEXIST); + xas_for_each_conflict(&xas, entry) { + old = entry; + if (!xa_is_value(entry)) { + xas_set_err(&xas, -EEXIST); + goto unlock; + } + } + + if (old) { + if (shadowp) + *shadowp = old; + /* entry may have been split before we acquired lock */ + order = xa_get_order(xas.xa, xas.xa_index); + if (order > thp_order(page)) { + xas_split(&xas, old, order); + xas_reset(&xas); + } + } + xas_store(&xas, page); if (xas_error(&xas)) goto unlock; - if (xa_is_value(old)) { + if (old) mapping->nrexceptional--; - if (shadowp) - *shadowp = old; - } mapping->nrpages++; /* hugetlb pages do not participate in page cache accounting */ @@ -872,10 +894,12 @@ static int __add_to_page_cache_locked(struct page *page, __inc_lruvec_page_state(page, NR_FILE_PAGES); unlock: xas_unlock_irq(&xas); - } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK)); + } while (xas_nomem(&xas, gfp)); if (xas_error(&xas)) { error = xas_error(&xas); + if (charged) + mem_cgroup_uncharge(page); goto error; } @@ -1339,7 +1363,7 @@ static int __wait_on_page_locked_async(struct page *page, else ret = PageLocked(page); /* - * If we were succesful now, we know we're still on the + * If we were successful now, we know we're still on the * waitqueue as we're still under the lock. This means it's * safe to remove and return success, we know the callback * isn't going to trigger. @@ -1425,7 +1449,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem * unlock_page - unlock a locked page * @page: the page * - * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). + * Unlocks the page and wakes up sleepers in wait_on_page_locked(). * Also wakes sleepers in wait_on_page_writeback() because the wakeup * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. @@ -1464,11 +1488,19 @@ void end_page_writeback(struct page *page) rotate_reclaimable_page(page); } + /* + * Writeback does not hold a page reference of its own, relying + * on truncation to wait for the clearing of PG_writeback. + * But here we must make sure that the page is not freed and + * reused before the wake_up_page(). + */ + get_page(page); if (!test_clear_page_writeback(page)) BUG(); smp_mb__after_atomic(); wake_up_page(page, PG_writeback); + put_page(page); } EXPORT_SYMBOL(end_page_writeback); @@ -1555,19 +1587,20 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, else wait_on_page_locked(page); return 0; - } else { - if (flags & FAULT_FLAG_KILLABLE) { - int ret; + } + if (flags & FAULT_FLAG_KILLABLE) { + int ret; - ret = __lock_page_killable(page); - if (ret) { - mmap_read_unlock(mm); - return 0; - } - } else - __lock_page(page); - return 1; + ret = __lock_page_killable(page); + if (ret) { + mmap_read_unlock(mm); + return 0; + } + } else { + __lock_page(page); } + return 1; + } /** @@ -1645,19 +1678,19 @@ EXPORT_SYMBOL(page_cache_prev_miss); /** * find_get_entry - find and get a page cache entry * @mapping: the address_space to search - * @offset: the page cache index + * @index: The page cache index. * * Looks up the page cache slot at @mapping & @offset. If there is a - * page cache page, it is returned with an increased refcount. + * page cache page, the head page is returned with an increased refcount. * * If the slot holds a shadow entry of a previously evicted page, or a * swap entry from shmem/tmpfs, it is returned. * - * Return: the found page or shadow entry, %NULL if nothing is found. + * Return: The head page or shadow entry, %NULL if nothing is found. */ -struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) +struct page *find_get_entry(struct address_space *mapping, pgoff_t index) { - XA_STATE(xas, &mapping->i_pages, offset); + XA_STATE(xas, &mapping->i_pages, index); struct page *page; rcu_read_lock(); @@ -1685,7 +1718,6 @@ repeat: put_page(page); goto repeat; } - page = find_subpage(page, offset); out: rcu_read_unlock(); @@ -1693,40 +1725,37 @@ out: } /** - * find_lock_entry - locate, pin and lock a page cache entry - * @mapping: the address_space to search - * @offset: the page cache index + * find_lock_entry - Locate and lock a page cache entry. + * @mapping: The address_space to search. + * @index: The page cache index. * - * Looks up the page cache slot at @mapping & @offset. If there is a - * page cache page, it is returned locked and with an increased - * refcount. + * Looks up the page at @mapping & @index. If there is a page in the + * cache, the head page is returned locked and with an increased refcount. * * If the slot holds a shadow entry of a previously evicted page, or a * swap entry from shmem/tmpfs, it is returned. * - * find_lock_entry() may sleep. - * - * Return: the found page or shadow entry, %NULL if nothing is found. + * Context: May sleep. + * Return: The head page or shadow entry, %NULL if nothing is found. */ -struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) +struct page *find_lock_entry(struct address_space *mapping, pgoff_t index) { struct page *page; repeat: - page = find_get_entry(mapping, offset); + page = find_get_entry(mapping, index); if (page && !xa_is_value(page)) { lock_page(page); /* Has the page been truncated? */ - if (unlikely(page_mapping(page) != mapping)) { + if (unlikely(page->mapping != mapping)) { unlock_page(page); put_page(page); goto repeat; } - VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); + VM_BUG_ON_PAGE(!thp_contains(page, index), page); } return page; } -EXPORT_SYMBOL(find_lock_entry); /** * pagecache_get_page - Find and get a reference to a page. @@ -1741,6 +1770,8 @@ EXPORT_SYMBOL(find_lock_entry); * * * %FGP_ACCESSED - The page will be marked accessed. * * %FGP_LOCK - The page is returned locked. + * * %FGP_HEAD - If the page is present and a THP, return the head page + * rather than the exact page specified by the index. * * %FGP_CREAT - If no page is present then a new page is allocated using * @gfp_mask and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. @@ -1781,12 +1812,12 @@ repeat: } /* Has the page been truncated? */ - if (unlikely(compound_head(page)->mapping != mapping)) { + if (unlikely(page->mapping != mapping)) { unlock_page(page); put_page(page); goto repeat; } - VM_BUG_ON_PAGE(page->index != index, page); + VM_BUG_ON_PAGE(!thp_contains(page, index), page); } if (fgp_flags & FGP_ACCESSED) @@ -1796,11 +1827,13 @@ repeat: if (page_is_idle(page)) clear_page_idle(page); } + if (!(fgp_flags & FGP_HEAD)) + page = find_subpage(page, index); no_page: if (!page && (fgp_flags & FGP_CREAT)) { int err; - if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) + if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) gfp_mask |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) gfp_mask &= ~__GFP_FS; @@ -2138,6 +2171,259 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra) ra->ra_pages /= 4; } +static int lock_page_for_iocb(struct kiocb *iocb, struct page *page) +{ + if (iocb->ki_flags & IOCB_WAITQ) + return lock_page_async(page, iocb->ki_waitq); + else if (iocb->ki_flags & IOCB_NOWAIT) + return trylock_page(page) ? 0 : -EAGAIN; + else + return lock_page_killable(page); +} + +static struct page * +generic_file_buffered_read_readpage(struct kiocb *iocb, + struct file *filp, + struct address_space *mapping, + struct page *page) +{ + struct file_ra_state *ra = &filp->f_ra; + int error; + + if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { + unlock_page(page); + put_page(page); + return ERR_PTR(-EAGAIN); + } + + /* + * A previous I/O error may have been due to temporary + * failures, eg. multipath errors. + * PG_error will be set again if readpage fails. + */ + ClearPageError(page); + /* Start the actual read. The read will unlock the page. */ + error = mapping->a_ops->readpage(filp, page); + + if (unlikely(error)) { + put_page(page); + return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL; + } + + if (!PageUptodate(page)) { + error = lock_page_for_iocb(iocb, page); + if (unlikely(error)) { + put_page(page); + return ERR_PTR(error); + } + if (!PageUptodate(page)) { + if (page->mapping == NULL) { + /* + * invalidate_mapping_pages got it + */ + unlock_page(page); + put_page(page); + return NULL; + } + unlock_page(page); + shrink_readahead_size_eio(ra); + put_page(page); + return ERR_PTR(-EIO); + } + unlock_page(page); + } + + return page; +} + +static struct page * +generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb, + struct file *filp, + struct iov_iter *iter, + struct page *page, + loff_t pos, loff_t count) +{ + struct address_space *mapping = filp->f_mapping; + struct inode *inode = mapping->host; + int error; + + /* + * See comment in do_read_cache_page on why + * wait_on_page_locked is used to avoid unnecessarily + * serialisations and why it's safe. + */ + if (iocb->ki_flags & IOCB_WAITQ) { + error = wait_on_page_locked_async(page, + iocb->ki_waitq); + } else { + error = wait_on_page_locked_killable(page); + } + if (unlikely(error)) { + put_page(page); + return ERR_PTR(error); + } + if (PageUptodate(page)) + return page; + + if (inode->i_blkbits == PAGE_SHIFT || + !mapping->a_ops->is_partially_uptodate) + goto page_not_up_to_date; + /* pipes can't handle partially uptodate pages */ + if (unlikely(iov_iter_is_pipe(iter))) + goto page_not_up_to_date; + if (!trylock_page(page)) + goto page_not_up_to_date; + /* Did it get truncated before we got the lock? */ + if (!page->mapping) + goto page_not_up_to_date_locked; + if (!mapping->a_ops->is_partially_uptodate(page, + pos & ~PAGE_MASK, count)) + goto page_not_up_to_date_locked; + unlock_page(page); + return page; + +page_not_up_to_date: + /* Get exclusive access to the page ... */ + error = lock_page_for_iocb(iocb, page); + if (unlikely(error)) { + put_page(page); + return ERR_PTR(error); + } + +page_not_up_to_date_locked: + /* Did it get truncated before we got the lock? */ + if (!page->mapping) { + unlock_page(page); + put_page(page); + return NULL; + } + + /* Did somebody else fill it already? */ + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + + return generic_file_buffered_read_readpage(iocb, filp, mapping, page); +} + +static struct page * +generic_file_buffered_read_no_cached_page(struct kiocb *iocb, + struct iov_iter *iter) +{ + struct file *filp = iocb->ki_filp; + struct address_space *mapping = filp->f_mapping; + pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; + struct page *page; + int error; + + if (iocb->ki_flags & IOCB_NOIO) + return ERR_PTR(-EAGAIN); + + /* + * Ok, it wasn't cached, so we need to create a new + * page.. + */ + page = page_cache_alloc(mapping); + if (!page) + return ERR_PTR(-ENOMEM); + + error = add_to_page_cache_lru(page, mapping, index, + mapping_gfp_constraint(mapping, GFP_KERNEL)); + if (error) { + put_page(page); + return error != -EEXIST ? ERR_PTR(error) : NULL; + } + + return generic_file_buffered_read_readpage(iocb, filp, mapping, page); +} + +static int generic_file_buffered_read_get_pages(struct kiocb *iocb, + struct iov_iter *iter, + struct page **pages, + unsigned int nr) +{ + struct file *filp = iocb->ki_filp; + struct address_space *mapping = filp->f_mapping; + struct file_ra_state *ra = &filp->f_ra; + pgoff_t index = iocb->ki_pos >> PAGE_SHIFT; + pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; + int i, j, nr_got, err = 0; + + nr = min_t(unsigned long, last_index - index, nr); +find_page: + if (fatal_signal_pending(current)) + return -EINTR; + + nr_got = find_get_pages_contig(mapping, index, nr, pages); + if (nr_got) + goto got_pages; + + if (iocb->ki_flags & IOCB_NOIO) + return -EAGAIN; + + page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); + + nr_got = find_get_pages_contig(mapping, index, nr, pages); + if (nr_got) + goto got_pages; + + pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter); + err = PTR_ERR_OR_ZERO(pages[0]); + if (!IS_ERR_OR_NULL(pages[0])) + nr_got = 1; +got_pages: + for (i = 0; i < nr_got; i++) { + struct page *page = pages[i]; + pgoff_t pg_index = index + i; + loff_t pg_pos = max(iocb->ki_pos, + (loff_t) pg_index << PAGE_SHIFT); + loff_t pg_count = iocb->ki_pos + iter->count - pg_pos; + + if (PageReadahead(page)) { + if (iocb->ki_flags & IOCB_NOIO) { + for (j = i; j < nr_got; j++) + put_page(pages[j]); + nr_got = i; + err = -EAGAIN; + break; + } + page_cache_async_readahead(mapping, ra, filp, page, + pg_index, last_index - pg_index); + } + + if (!PageUptodate(page)) { + if ((iocb->ki_flags & IOCB_NOWAIT) || + ((iocb->ki_flags & IOCB_WAITQ) && i)) { + for (j = i; j < nr_got; j++) + put_page(pages[j]); + nr_got = i; + err = -EAGAIN; + break; + } + + page = generic_file_buffered_read_pagenotuptodate(iocb, + filp, iter, page, pg_pos, pg_count); + if (IS_ERR_OR_NULL(page)) { + for (j = i + 1; j < nr_got; j++) + put_page(pages[j]); + nr_got = i; + err = PTR_ERR_OR_ZERO(page); + break; + } + } + } + + if (likely(nr_got)) + return nr_got; + if (err) + return err; + /* + * No pages and no error means we raced and should retry: + */ + goto find_page; +} + /** * generic_file_buffered_read - generic file read routine * @iocb: the iocb to read @@ -2158,276 +2444,120 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, struct iov_iter *iter, ssize_t written) { struct file *filp = iocb->ki_filp; + struct file_ra_state *ra = &filp->f_ra; struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; - struct file_ra_state *ra = &filp->f_ra; - loff_t *ppos = &iocb->ki_pos; - pgoff_t index; - pgoff_t last_index; - pgoff_t prev_index; - unsigned long offset; /* offset into pagecache page */ - unsigned int prev_offset; - int error = 0; - - if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) + struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL; + unsigned int nr_pages = min_t(unsigned int, 512, + ((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) - + (iocb->ki_pos >> PAGE_SHIFT)); + int i, pg_nr, error = 0; + bool writably_mapped; + loff_t isize, end_offset; + + if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes)) + return 0; + if (unlikely(!iov_iter_count(iter))) return 0; + iov_iter_truncate(iter, inode->i_sb->s_maxbytes); - index = *ppos >> PAGE_SHIFT; - prev_index = ra->prev_pos >> PAGE_SHIFT; - prev_offset = ra->prev_pos & (PAGE_SIZE-1); - last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; - offset = *ppos & ~PAGE_MASK; + if (nr_pages > ARRAY_SIZE(pages_onstack)) + pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL); - for (;;) { - struct page *page; - pgoff_t end_index; - loff_t isize; - unsigned long nr, ret; + if (!pages) { + pages = pages_onstack; + nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack)); + } + do { cond_resched(); -find_page: - if (fatal_signal_pending(current)) { - error = -EINTR; - goto out; - } - page = find_get_page(mapping, index); - if (!page) { - if (iocb->ki_flags & IOCB_NOIO) - goto would_block; - page_cache_sync_readahead(mapping, - ra, filp, - index, last_index - index); - page = find_get_page(mapping, index); - if (unlikely(page == NULL)) - goto no_cached_page; - } - if (PageReadahead(page)) { - if (iocb->ki_flags & IOCB_NOIO) { - put_page(page); - goto out; - } - page_cache_async_readahead(mapping, - ra, filp, page, - index, last_index - index); - } - if (!PageUptodate(page)) { - /* - * See comment in do_read_cache_page on why - * wait_on_page_locked is used to avoid unnecessarily - * serialisations and why it's safe. - */ - if (iocb->ki_flags & IOCB_WAITQ) { - if (written) { - put_page(page); - goto out; - } - error = wait_on_page_locked_async(page, - iocb->ki_waitq); - } else { - if (iocb->ki_flags & IOCB_NOWAIT) { - put_page(page); - goto would_block; - } - error = wait_on_page_locked_killable(page); - } - if (unlikely(error)) - goto readpage_error; - if (PageUptodate(page)) - goto page_ok; - - if (inode->i_blkbits == PAGE_SHIFT || - !mapping->a_ops->is_partially_uptodate) - goto page_not_up_to_date; - /* pipes can't handle partially uptodate pages */ - if (unlikely(iov_iter_is_pipe(iter))) - goto page_not_up_to_date; - if (!trylock_page(page)) - goto page_not_up_to_date; - /* Did it get truncated before we got the lock? */ - if (!page->mapping) - goto page_not_up_to_date_locked; - if (!mapping->a_ops->is_partially_uptodate(page, - offset, iter->count)) - goto page_not_up_to_date_locked; - unlock_page(page); + /* + * If we've already successfully copied some data, then we + * can no longer safely return -EIOCBQUEUED. Hence mark + * an async read NOWAIT at that point. + */ + if ((iocb->ki_flags & IOCB_WAITQ) && written) + iocb->ki_flags |= IOCB_NOWAIT; + + i = 0; + pg_nr = generic_file_buffered_read_get_pages(iocb, iter, + pages, nr_pages); + if (pg_nr < 0) { + error = pg_nr; + break; } -page_ok: + /* - * i_size must be checked after we know the page is Uptodate. + * i_size must be checked after we know the pages are Uptodate. * * Checking i_size after the check allows us to calculate * the correct value for "nr", which means the zero-filled * part of the page is not copied back to userspace (unless * another truncate extends the file - this is desired though). */ - isize = i_size_read(inode); - end_index = (isize - 1) >> PAGE_SHIFT; - if (unlikely(!isize || index > end_index)) { - put_page(page); - goto out; - } + if (unlikely(iocb->ki_pos >= isize)) + goto put_pages; - /* nr is the maximum number of bytes to copy from this page */ - nr = PAGE_SIZE; - if (index == end_index) { - nr = ((isize - 1) & ~PAGE_MASK) + 1; - if (nr <= offset) { - put_page(page); - goto out; - } - } - nr = nr - offset; + end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); - /* If users can be writing to this page using arbitrary - * virtual addresses, take care about potential aliasing - * before reading the page on the kernel side. - */ - if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); + while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr > + (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT) + put_page(pages[--pg_nr]); /* - * When a sequential read accesses a page several times, - * only mark it as accessed the first time. + * Once we start copying data, we don't want to be touching any + * cachelines that might be contended: */ - if (prev_index != index || offset != prev_offset) - mark_page_accessed(page); - prev_index = index; + writably_mapped = mapping_writably_mapped(mapping); /* - * Ok, we have the page, and it's up-to-date, so - * now we can copy it to user space... + * When a sequential read accesses a page several times, only + * mark it as accessed the first time. */ + if (iocb->ki_pos >> PAGE_SHIFT != + ra->prev_pos >> PAGE_SHIFT) + mark_page_accessed(pages[0]); + for (i = 1; i < pg_nr; i++) + mark_page_accessed(pages[i]); + + for (i = 0; i < pg_nr; i++) { + unsigned int offset = iocb->ki_pos & ~PAGE_MASK; + unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos, + PAGE_SIZE - offset); + unsigned int copied; - ret = copy_page_to_iter(page, offset, nr, iter); - offset += ret; - index += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; - prev_offset = offset; - - put_page(page); - written += ret; - if (!iov_iter_count(iter)) - goto out; - if (ret < nr) { - error = -EFAULT; - goto out; - } - continue; - -page_not_up_to_date: - /* Get exclusive access to the page ... */ - if (iocb->ki_flags & IOCB_WAITQ) - error = lock_page_async(page, iocb->ki_waitq); - else - error = lock_page_killable(page); - if (unlikely(error)) - goto readpage_error; - -page_not_up_to_date_locked: - /* Did it get truncated before we got the lock? */ - if (!page->mapping) { - unlock_page(page); - put_page(page); - continue; - } - - /* Did somebody else fill it already? */ - if (PageUptodate(page)) { - unlock_page(page); - goto page_ok; - } + /* + * If users can be writing to this page using arbitrary + * virtual addresses, take care about potential aliasing + * before reading the page on the kernel side. + */ + if (writably_mapped) + flush_dcache_page(pages[i]); -readpage: - if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { - unlock_page(page); - put_page(page); - goto would_block; - } - /* - * A previous I/O error may have been due to temporary - * failures, eg. multipath errors. - * PG_error will be set again if readpage fails. - */ - ClearPageError(page); - /* Start the actual read. The read will unlock the page. */ - error = mapping->a_ops->readpage(filp, page); + copied = copy_page_to_iter(pages[i], offset, bytes, iter); - if (unlikely(error)) { - if (error == AOP_TRUNCATED_PAGE) { - put_page(page); - error = 0; - goto find_page; - } - goto readpage_error; - } + written += copied; + iocb->ki_pos += copied; + ra->prev_pos = iocb->ki_pos; - if (!PageUptodate(page)) { - if (iocb->ki_flags & IOCB_WAITQ) - error = lock_page_async(page, iocb->ki_waitq); - else - error = lock_page_killable(page); - - if (unlikely(error)) - goto readpage_error; - if (!PageUptodate(page)) { - if (page->mapping == NULL) { - /* - * invalidate_mapping_pages got it - */ - unlock_page(page); - put_page(page); - goto find_page; - } - unlock_page(page); - shrink_readahead_size_eio(ra); - error = -EIO; - goto readpage_error; + if (copied < bytes) { + error = -EFAULT; + break; } - unlock_page(page); } +put_pages: + for (i = 0; i < pg_nr; i++) + put_page(pages[i]); + } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error); - goto page_ok; - -readpage_error: - /* UHHUH! A synchronous read error occurred. Report it */ - put_page(page); - goto out; - -no_cached_page: - /* - * Ok, it wasn't cached, so we need to create a new - * page.. - */ - page = page_cache_alloc(mapping); - if (!page) { - error = -ENOMEM; - goto out; - } - error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_constraint(mapping, GFP_KERNEL)); - if (error) { - put_page(page); - if (error == -EEXIST) { - error = 0; - goto find_page; - } - goto out; - } - goto readpage; - } + file_accessed(filp); -would_block: - error = -EAGAIN; -out: - ra->prev_pos = prev_index; - ra->prev_pos <<= PAGE_SHIFT; - ra->prev_pos |= prev_offset; + if (pages != pages_onstack) + kfree(pages); - *ppos = ((loff_t)index << PAGE_SHIFT) + offset; - file_accessed(filp); return written ? written : error; } EXPORT_SYMBOL_GPL(generic_file_buffered_read); @@ -2568,8 +2698,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; + DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff); struct file *fpin = NULL; - pgoff_t offset = vmf->pgoff; unsigned int mmap_miss; /* If we don't want any read-ahead, don't bother */ @@ -2580,8 +2710,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) if (vmf->vma->vm_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); - page_cache_sync_readahead(mapping, ra, file, offset, - ra->ra_pages); + page_cache_sync_ra(&ractl, ra, ra->ra_pages); return fpin; } @@ -2601,10 +2730,11 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) * mmap read-around */ fpin = maybe_unlock_mmap_for_io(vmf, fpin); - ra->start = max_t(long, 0, offset - ra->ra_pages / 2); + ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2); ra->size = ra->ra_pages; ra->async_size = ra->ra_pages / 4; - ra_submit(ra, mapping, file); + ractl._index = ra->start; + do_page_cache_ra(&ractl, ra->size, ra->async_size); return fpin; } @@ -2793,42 +2923,42 @@ void filemap_map_pages(struct vm_fault *vmf, pgoff_t last_pgoff = start_pgoff; unsigned long max_idx; XA_STATE(xas, &mapping->i_pages, start_pgoff); - struct page *page; + struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); rcu_read_lock(); - xas_for_each(&xas, page, end_pgoff) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, head, end_pgoff) { + if (xas_retry(&xas, head)) continue; - if (xa_is_value(page)) + if (xa_is_value(head)) goto next; /* * Check for a locked page first, as a speculative * reference may adversely influence page migration. */ - if (PageLocked(page)) + if (PageLocked(head)) goto next; - if (!page_cache_get_speculative(page)) + if (!page_cache_get_speculative(head)) goto next; /* Has the page moved or been split? */ - if (unlikely(page != xas_reload(&xas))) + if (unlikely(head != xas_reload(&xas))) goto skip; - page = find_subpage(page, xas.xa_index); + page = find_subpage(head, xas.xa_index); - if (!PageUptodate(page) || + if (!PageUptodate(head) || PageReadahead(page) || PageHWPoison(page)) goto skip; - if (!trylock_page(page)) + if (!trylock_page(head)) goto skip; - if (page->mapping != mapping || !PageUptodate(page)) + if (head->mapping != mapping || !PageUptodate(head)) goto unlock; max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); - if (page->index >= max_idx) + if (xas.xa_index >= max_idx) goto unlock; if (mmap_miss > 0) @@ -2840,12 +2970,12 @@ void filemap_map_pages(struct vm_fault *vmf, last_pgoff = xas.xa_index; if (alloc_set_pte(vmf, page)) goto unlock; - unlock_page(page); + unlock_page(head); goto next; unlock: - unlock_page(page); + unlock_page(head); skip: - put_page(page); + put_page(head); next: /* Huge page is mapped? No need to proceed. */ if (pmd_trans_huge(*vmf->pmd)) @@ -2858,14 +2988,14 @@ EXPORT_SYMBOL(filemap_map_pages); vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct page *page = vmf->page; - struct inode *inode = file_inode(vmf->vma->vm_file); vm_fault_t ret = VM_FAULT_LOCKED; - sb_start_pagefault(inode->i_sb); + sb_start_pagefault(mapping->host->i_sb); file_update_time(vmf->vma->vm_file); lock_page(page); - if (page->mapping != inode->i_mapping) { + if (page->mapping != mapping) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; @@ -2878,7 +3008,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) set_page_dirty(page); wait_for_stable_page(page); out: - sb_end_pagefault(inode->i_sb); + sb_end_pagefault(mapping->host->i_sb); return ret; } @@ -2984,7 +3114,7 @@ filler: goto out; /* - * Page is not up to date and may be locked due one of the following + * Page is not up to date and may be locked due to one of the following * case a: Page is being filled and the page lock is held * case b: Read/write error clearing the page uptodate status * case c: Truncation in progress (page locked) @@ -3093,228 +3223,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping, } EXPORT_SYMBOL(read_cache_page_gfp); -/* - * Don't operate on ranges the page cache doesn't support, and don't exceed the - * LFS limits. If pos is under the limit it becomes a short access. If it - * exceeds the limit we return -EFBIG. - */ -static int generic_write_check_limits(struct file *file, loff_t pos, - loff_t *count) -{ - struct inode *inode = file->f_mapping->host; - loff_t max_size = inode->i_sb->s_maxbytes; - loff_t limit = rlimit(RLIMIT_FSIZE); - - if (limit != RLIM_INFINITY) { - if (pos >= limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - *count = min(*count, limit - pos); - } - - if (!(file->f_flags & O_LARGEFILE)) - max_size = MAX_NON_LFS; - - if (unlikely(pos >= max_size)) - return -EFBIG; - - *count = min(*count, max_size - pos); - - return 0; -} - -/* - * Performs necessary checks before doing a write - * - * Can adjust writing position or amount of bytes to write. - * Returns appropriate error code that caller should return or - * zero in case that write should be allowed. - */ -inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - loff_t count; - int ret; - - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - - if (!iov_iter_count(from)) - return 0; - - /* FIXME: this is for backwards compatibility with 2.4 */ - if (iocb->ki_flags & IOCB_APPEND) - iocb->ki_pos = i_size_read(inode); - - if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) - return -EINVAL; - - count = iov_iter_count(from); - ret = generic_write_check_limits(file, iocb->ki_pos, &count); - if (ret) - return ret; - - iov_iter_truncate(from, count); - return iov_iter_count(from); -} -EXPORT_SYMBOL(generic_write_checks); - -/* - * Performs necessary checks before doing a clone. - * - * Can adjust amount of bytes to clone via @req_count argument. - * Returns appropriate error code that caller should return or - * zero in case the clone should be allowed. - */ -int generic_remap_checks(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *req_count, unsigned int remap_flags) -{ - struct inode *inode_in = file_in->f_mapping->host; - struct inode *inode_out = file_out->f_mapping->host; - uint64_t count = *req_count; - uint64_t bcount; - loff_t size_in, size_out; - loff_t bs = inode_out->i_sb->s_blocksize; - int ret; - - /* The start of both ranges must be aligned to an fs block. */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) - return -EINVAL; - - /* Ensure offsets don't wrap. */ - if (pos_in + count < pos_in || pos_out + count < pos_out) - return -EINVAL; - - size_in = i_size_read(inode_in); - size_out = i_size_read(inode_out); - - /* Dedupe requires both ranges to be within EOF. */ - if ((remap_flags & REMAP_FILE_DEDUP) && - (pos_in >= size_in || pos_in + count > size_in || - pos_out >= size_out || pos_out + count > size_out)) - return -EINVAL; - - /* Ensure the infile range is within the infile. */ - if (pos_in >= size_in) - return -EINVAL; - count = min(count, size_in - (uint64_t)pos_in); - - ret = generic_write_check_limits(file_out, pos_out, &count); - if (ret) - return ret; - - /* - * If the user wanted us to link to the infile's EOF, round up to the - * next block boundary for this check. - * - * Otherwise, make sure the count is also block-aligned, having - * already confirmed the starting offsets' block alignment. - */ - if (pos_in + count == size_in) { - bcount = ALIGN(size_in, bs) - pos_in; - } else { - if (!IS_ALIGNED(count, bs)) - count = ALIGN_DOWN(count, bs); - bcount = count; - } - - /* Don't allow overlapped cloning within the same file. */ - if (inode_in == inode_out && - pos_out + bcount > pos_in && - pos_out < pos_in + bcount) - return -EINVAL; - - /* - * We shortened the request but the caller can't deal with that, so - * bounce the request back to userspace. - */ - if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) - return -EINVAL; - - *req_count = count; - return 0; -} - - -/* - * Performs common checks before doing a file copy/clone - * from @file_in to @file_out. - */ -int generic_file_rw_checks(struct file *file_in, struct file *file_out) -{ - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); - - /* Don't copy dirs, pipes, sockets... */ - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - return 0; -} - -/* - * Performs necessary checks before doing a file copy - * - * Can adjust amount of bytes to copy via @req_count argument. - * Returns appropriate error code that caller should return or - * zero in case the copy should be allowed. - */ -int generic_copy_file_checks(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t *req_count, unsigned int flags) -{ - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); - uint64_t count = *req_count; - loff_t size_in; - int ret; - - ret = generic_file_rw_checks(file_in, file_out); - if (ret) - return ret; - - /* Don't touch certain kinds of inodes */ - if (IS_IMMUTABLE(inode_out)) - return -EPERM; - - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - return -ETXTBSY; - - /* Ensure offsets don't wrap. */ - if (pos_in + count < pos_in || pos_out + count < pos_out) - return -EOVERFLOW; - - /* Shorten the copy to EOF */ - size_in = i_size_read(inode_in); - if (pos_in >= size_in) - count = 0; - else - count = min(count, size_in - (uint64_t)pos_in); - - ret = generic_write_check_limits(file_out, pos_out, &count); - if (ret) - return ret; - - /* Don't allow overlapped copying within the same file. */ - if (inode_in == inode_out && - pos_out + count > pos_in && - pos_out < pos_in + count) - return -EINVAL; - - *req_count = count; - return 0; -} - int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -3343,10 +3251,9 @@ void dio_warn_stale_pagecache(struct file *filp) { static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); char pathname[128]; - struct inode *inode = file_inode(filp); char *path; - errseq_set(&inode->i_mapping->wb_err, -EIO); + errseq_set(&filp->f_mapping->wb_err, -EIO); if (__ratelimit(&_rs)) { path = file_path(filp, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -3373,7 +3280,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) { /* If there are pages to writeback, return */ - if (filemap_range_has_page(inode->i_mapping, pos, + if (filemap_range_has_page(file->f_mapping, pos, pos + write_len - 1)) return -EAGAIN; } else { |