aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c874
1 files changed, 252 insertions, 622 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 46bc589b7a03..d9c6d1fbb6dd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -52,8 +52,8 @@
#include "internal.h"
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- enum rw_hint hint, struct writeback_control *wbc);
+static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -79,26 +79,26 @@ void unlock_buffer(struct buffer_head *bh)
EXPORT_SYMBOL(unlock_buffer);
/*
- * Returns if the page has dirty or writeback buffers. If all the buffers
- * are unlocked and clean then the PageDirty information is stale. If
- * any of the pages are locked, it is assumed they are locked for IO.
+ * Returns if the folio has dirty or writeback buffers. If all the buffers
+ * are unlocked and clean then the folio_test_dirty information is stale. If
+ * any of the buffers are locked, it is assumed they are locked for IO.
*/
-void buffer_check_dirty_writeback(struct page *page,
+void buffer_check_dirty_writeback(struct folio *folio,
bool *dirty, bool *writeback)
{
struct buffer_head *head, *bh;
*dirty = false;
*writeback = false;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
- if (!page_has_buffers(page))
+ head = folio_buffers(folio);
+ if (!head)
return;
- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
*writeback = true;
- head = page_buffers(page);
bh = head;
do {
if (buffer_locked(bh))
@@ -152,7 +152,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
/*
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
- * unlock the buffer. This is what ll_rw_block uses too.
+ * unlock the buffer.
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
@@ -282,10 +282,10 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
/*
- * If none of the buffers had errors and they are all
- * uptodate then we can set the page uptodate.
+ * If all of the buffers are uptodate then we can set the page
+ * uptodate.
*/
- if (page_uptodate && !PageError(page))
+ if (page_uptodate)
SetPageUptodate(page);
unlock_page(page);
return;
@@ -314,7 +314,7 @@ static void decrypt_bh(struct work_struct *work)
}
/*
- * I/O completion handler for block_read_full_page() - pages
+ * I/O completion handler for block_read_full_folio() - pages
* which come unlocked at the end of I/O.
*/
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
@@ -491,8 +491,8 @@ int inode_has_buffers(struct inode *inode)
* all already-submitted IO to complete, but does not queue any new
* writes to the disk.
*
- * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
- * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
+ * as you dirty the buffers, and then use osync_inode_buffers to wait for
* completion. Any other dirty buffers which are not yet queued for
* write will not be flushed to disk by the osync.
*/
@@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev,
struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
- ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
+ write_dirty_buffer(bh, 0);
put_bh(bh);
}
}
@@ -613,17 +613,14 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* FIXME: may need to call ->reservepage here as well. That's rather up to the
* address_space though.
*/
-int __set_page_dirty_buffers(struct page *page)
+bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
{
- int newly_dirty;
- struct address_space *mapping = page_mapping(page);
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
+ struct buffer_head *head;
+ bool newly_dirty;
spin_lock(&mapping->private_lock);
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
+ head = folio_buffers(folio);
+ if (head) {
struct buffer_head *bh = head;
do {
@@ -635,21 +632,21 @@ int __set_page_dirty_buffers(struct page *page)
* Lock out page's memcg migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
- lock_page_memcg(page);
- newly_dirty = !TestSetPageDirty(page);
+ folio_memcg_lock(folio);
+ newly_dirty = !folio_test_set_dirty(folio);
spin_unlock(&mapping->private_lock);
if (newly_dirty)
- __set_page_dirty(page, mapping, 1);
+ __folio_mark_dirty(folio, mapping, 1);
- unlock_page_memcg(page);
+ folio_memcg_unlock(folio);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
}
-EXPORT_SYMBOL(__set_page_dirty_buffers);
+EXPORT_SYMBOL(block_dirty_folio);
/*
* Write out and wait upon a list of buffers.
@@ -958,7 +955,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
size);
goto done;
}
- if (!try_to_free_buffers(page))
+ if (!try_to_free_buffers(page_folio(page)))
goto failed;
}
@@ -1063,8 +1060,8 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* Also. When blockdev buffers are explicitly read with bread(), they
* individually become uptodate. But their backing page remains not
* uptodate - even if all of its buffers are uptodate. A subsequent
- * block_read_full_page() against that page will discover all the uptodate
- * buffers, will set the page uptodate and will perform no I/O.
+ * block_read_full_folio() against that folio will discover all the uptodate
+ * buffers, will set the folio uptodate and will perform no I/O.
*/
/**
@@ -1177,7 +1174,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh)
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, 0, bh);
+ submit_bh(REQ_OP_READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1235,16 +1232,18 @@ static void bh_lru_install(struct buffer_head *bh)
int i;
check_irqs_on();
+ bh_lru_lock();
+
/*
* the refcount of buffer_head in bh_lru prevents dropping the
* attached page(i.e., try_to_free_buffers) so it could cause
* failing page migration.
* Skip putting upcoming bh into bh_lru until migration is done.
*/
- if (lru_cache_disabled())
+ if (lru_cache_disabled()) {
+ bh_lru_unlock();
return;
-
- bh_lru_lock();
+ }
b = this_cpu_ptr(&bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
@@ -1343,23 +1342,12 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
- ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
+ bh_readahead(bh, REQ_RAHEAD);
brelse(bh);
}
}
EXPORT_SYMBOL(__breadahead);
-void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
- gfp_t gfp)
-{
- struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
- if (likely(bh)) {
- ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
- brelse(bh);
- }
-}
-EXPORT_SYMBOL(__breadahead_gfp);
-
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
@@ -1465,58 +1453,53 @@ EXPORT_SYMBOL(set_bh_page);
static void discard_buffer(struct buffer_head * bh)
{
- unsigned long b_state, b_state_old;
+ unsigned long b_state;
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
- b_state = bh->b_state;
- for (;;) {
- b_state_old = cmpxchg(&bh->b_state, b_state,
- (b_state & ~BUFFER_FLAGS_DISCARD));
- if (b_state_old == b_state)
- break;
- b_state = b_state_old;
- }
+ b_state = READ_ONCE(bh->b_state);
+ do {
+ } while (!try_cmpxchg(&bh->b_state, &b_state,
+ b_state & ~BUFFER_FLAGS_DISCARD));
unlock_buffer(bh);
}
/**
- * block_invalidatepage - invalidate part or all of a buffer-backed page
- *
- * @page: the page which is affected
+ * block_invalidate_folio - Invalidate part or all of a buffer-backed folio.
+ * @folio: The folio which is affected.
* @offset: start of the range to invalidate
* @length: length of the range to invalidate
*
- * block_invalidatepage() is called when all or part of the page has become
+ * block_invalidate_folio() is called when all or part of the folio has been
* invalidated by a truncate operation.
*
- * block_invalidatepage() does not have to release all buffers, but it must
+ * block_invalidate_folio() does not have to release all buffers, but it must
* ensure that no dirty buffer is left outside @offset and that no I/O
* is underway against any of the blocks which are outside the truncation
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
-void block_invalidatepage(struct page *page, unsigned int offset,
- unsigned int length)
+void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct buffer_head *head, *bh, *next;
- unsigned int curr_off = 0;
- unsigned int stop = length + offset;
+ size_t curr_off = 0;
+ size_t stop = length + offset;
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
- goto out;
+ BUG_ON(!folio_test_locked(folio));
/*
* Check for overflow
*/
- BUG_ON(stop > PAGE_SIZE || stop < length);
+ BUG_ON(stop > folio_size(folio) || stop < length);
+
+ head = folio_buffers(folio);
+ if (!head)
+ return;
- head = page_buffers(page);
bh = head;
do {
- unsigned int next_off = curr_off + bh->b_size;
+ size_t next_off = curr_off + bh->b_size;
next = bh->b_this_page;
/*
@@ -1535,21 +1518,21 @@ void block_invalidatepage(struct page *page, unsigned int offset,
} while (bh != head);
/*
- * We release buffers only if the entire page is being invalidated.
+ * We release buffers only if the entire folio is being invalidated.
* The get_block cached value has been unconditionally invalidated,
* so real IO is not possible anymore.
*/
- if (length == PAGE_SIZE)
- try_to_release_page(page, 0);
+ if (length == folio_size(folio))
+ filemap_release_folio(folio, 0);
out:
return;
}
-EXPORT_SYMBOL(block_invalidatepage);
+EXPORT_SYMBOL(block_invalidate_folio);
/*
* We attach and possibly dirty the buffers atomically wrt
- * __set_page_dirty_buffers() via private_lock. try_to_free_buffers
+ * block_dirty_folio() via private_lock. try_to_free_buffers
* is already excluded via the page lock.
*/
void create_empty_buffers(struct page *page,
@@ -1606,7 +1589,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
pgoff_t end;
int i, count;
@@ -1614,24 +1597,24 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
struct buffer_head *head;
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
- pagevec_init(&pvec);
- while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
- count = pagevec_count(&pvec);
+ folio_batch_init(&fbatch);
+ while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
+ count = folio_batch_count(&fbatch);
for (i = 0; i < count; i++) {
- struct page *page = pvec.pages[i];
+ struct folio *folio = fbatch.folios[i];
- if (!page_has_buffers(page))
+ if (!folio_buffers(folio))
continue;
/*
- * We use page lock instead of bd_mapping->private_lock
+ * We use folio lock instead of bd_mapping->private_lock
* to pin buffers here since we can afford to sleep and
* it scales better than a global spinlock lock.
*/
- lock_page(page);
- /* Recheck when the page is locked which pins bhs */
- if (!page_has_buffers(page))
+ folio_lock(folio);
+ /* Recheck when the folio is locked which pins bhs */
+ head = folio_buffers(folio);
+ if (!head)
goto unlock_page;
- head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh) || (bh->b_blocknr < block))
@@ -1645,9 +1628,9 @@ next:
bh = bh->b_this_page;
} while (bh != head);
unlock_page:
- unlock_page(page);
+ folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
/* End of range already reached? */
if (index > end || !index)
@@ -1718,18 +1701,18 @@ int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
- int write_flags = wbc_to_write_flags(wbc);
+ blk_opf_t write_flags = wbc_to_write_flags(wbc);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
/*
- * Be very careful. We have no exclusion from __set_page_dirty_buffers
+ * Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
*
- * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
+ * Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
*/
@@ -1806,8 +1789,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
- inode->i_write_hint, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
nr_underway++;
}
bh = next;
@@ -1820,7 +1802,7 @@ done:
/*
* The page was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
- * ll_rw_block/submit_bh. A rare case.
+ * write_dirty_buffer/submit_bh. A rare case.
*/
end_page_writeback(page);
@@ -1861,8 +1843,7 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
- inode->i_write_hint, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
nr_underway++;
}
bh = next;
@@ -1969,34 +1950,34 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
}
}
-int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
+int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
get_block_t *get_block, const struct iomap *iomap)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
unsigned block_start, block_end;
sector_t block;
int err = 0;
unsigned blocksize, bbits;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
BUG_ON(from > PAGE_SIZE);
BUG_ON(to > PAGE_SIZE);
BUG_ON(from > to);
- head = create_page_buffers(page, inode, 0);
+ head = create_page_buffers(&folio->page, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
- block = (sector_t)page->index << (PAGE_SHIFT - bbits);
+ block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
}
@@ -2016,20 +1997,20 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
if (buffer_new(bh)) {
clean_bdev_bh_alias(bh);
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
clear_buffer_new(bh);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
continue;
}
if (block_end > to || block_start < from)
- zero_user_segments(page,
+ folio_zero_segments(folio,
to, block_end,
block_start, from);
continue;
}
}
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!buffer_uptodate(bh))
set_buffer_uptodate(bh);
continue;
@@ -2037,7 +2018,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
+ bh_read_nowait(bh, 0);
*wait_bh++=bh;
}
}
@@ -2050,14 +2031,15 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err))
- page_zero_new_buffers(page, from, to);
+ page_zero_new_buffers(&folio->page, from, to);
return err;
}
int __block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
- return __block_write_begin_int(page, pos, len, get_block, NULL);
+ return __block_write_begin_int(page_folio(page), pos, len, get_block,
+ NULL);
}
EXPORT_SYMBOL(__block_write_begin);
@@ -2091,7 +2073,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
/*
* If this is a partial write which happened to make all buffers
- * uptodate then we can optimize away a bogus readpage() for
+ * uptodate then we can optimize away a bogus read_folio() for
* the next read(). Here we 'discover' whether the page went
* uptodate as a result of this (potentially partial) write.
*/
@@ -2107,13 +2089,13 @@ static int __block_commit_write(struct inode *inode, struct page *page,
* The filesystem needs to handle block truncation upon failure.
*/
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
- unsigned flags, struct page **pagep, get_block_t *get_block)
+ struct page **pagep, get_block_t *get_block)
{
pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status;
- page = grab_cache_page_write_begin(mapping, index, flags);
+ page = grab_cache_page_write_begin(mapping, index);
if (!page)
return -ENOMEM;
@@ -2140,12 +2122,12 @@ int block_write_end(struct file *file, struct address_space *mapping,
if (unlikely(copied < len)) {
/*
- * The buffers that were written will now be uptodate, so we
- * don't have to worry about a readpage reading them and
- * overwriting a partial write. However if we have encountered
- * a short write and only partially written into a buffer, it
- * will not be marked uptodate, so a readpage might come in and
- * destroy our partial write.
+ * The buffers that were written will now be uptodate, so
+ * we don't have to worry about a read_folio reading them
+ * and overwriting a partial write. However if we have
+ * encountered a short write and only partially written
+ * into a buffer, it will not be marked uptodate, so a
+ * read_folio might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a
* non uptodate page as a zero-length write, and force the
@@ -2205,29 +2187,27 @@ int generic_write_end(struct file *file, struct address_space *mapping,
EXPORT_SYMBOL(generic_write_end);
/*
- * block_is_partially_uptodate checks whether buffers within a page are
+ * block_is_partially_uptodate checks whether buffers within a folio are
* uptodate or not.
*
- * Returns true if all buffers which correspond to a file portion
- * we want to read are uptodate.
+ * Returns true if all buffers which correspond to the specified part
+ * of the folio are uptodate.
*/
-int block_is_partially_uptodate(struct page *page, unsigned long from,
- unsigned long count)
+bool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{
unsigned block_start, block_end, blocksize;
unsigned to;
struct buffer_head *bh, *head;
- int ret = 1;
-
- if (!page_has_buffers(page))
- return 0;
+ bool ret = true;
- head = page_buffers(page);
+ head = folio_buffers(folio);
+ if (!head)
+ return false;
blocksize = head->b_size;
- to = min_t(unsigned, PAGE_SIZE - from, count);
+ to = min_t(unsigned, folio_size(folio) - from, count);
to = from + to;
- if (from < blocksize && to > PAGE_SIZE - blocksize)
- return 0;
+ if (from < blocksize && to > folio_size(folio) - blocksize)
+ return false;
bh = head;
block_start = 0;
@@ -2235,7 +2215,7 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
block_end = block_start + blocksize;
if (block_end > from && block_start < to) {
if (!buffer_uptodate(bh)) {
- ret = 0;
+ ret = false;
break;
}
if (block_end >= to)
@@ -2250,26 +2230,29 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
EXPORT_SYMBOL(block_is_partially_uptodate);
/*
- * Generic "read page" function for block devices that have the normal
+ * Generic "read_folio" function for block devices that have the normal
* get_block functionality. This is most of the block device filesystems.
- * Reads the page asynchronously --- the unlock_buffer() and
+ * Reads the folio asynchronously --- the unlock_buffer() and
* set/clear_buffer_uptodate() functions propagate buffer state into the
- * page struct once IO has completed.
+ * folio once IO has completed.
*/
-int block_read_full_page(struct page *page, get_block_t *get_block)
+int block_read_full_folio(struct folio *folio, get_block_t *get_block)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
sector_t iblock, lblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, bbits;
int nr, i;
int fully_mapped = 1;
+ bool page_error = false;
+
+ VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
- head = create_page_buffers(page, inode, 0);
+ head = create_page_buffers(&folio->page, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
- iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
+ iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits);
lblock = (i_size_read(inode)+blocksize-1) >> bbits;
bh = head;
nr = 0;
@@ -2286,11 +2269,14 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
- if (err)
- SetPageError(page);
+ if (err) {
+ folio_set_error(folio);
+ page_error = true;
+ }
}
if (!buffer_mapped(bh)) {
- zero_user(page, i * blocksize, blocksize);
+ folio_zero_range(folio, i * blocksize,
+ blocksize);
if (!err)
set_buffer_uptodate(bh);
continue;
@@ -2306,16 +2292,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
} while (i++, iblock++, (bh = bh->b_this_page) != head);
if (fully_mapped)
- SetPageMappedToDisk(page);
+ folio_set_mappedtodisk(folio);
if (!nr) {
/*
- * All buffers are uptodate - we can set the page uptodate
+ * All buffers are uptodate - we can set the folio uptodate
* as well. But not if get_block() returned an error.
*/
- if (!PageError(page))
- SetPageUptodate(page);
- unlock_page(page);
+ if (!page_error)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
return 0;
}
@@ -2336,11 +2322,11 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
if (buffer_uptodate(bh))
end_buffer_async_read(bh, 1);
else
- submit_bh(REQ_OP_READ, 0, bh);
+ submit_bh(REQ_OP_READ, bh);
}
return 0;
}
-EXPORT_SYMBOL(block_read_full_page);
+EXPORT_SYMBOL(block_read_full_folio);
/* utility function for filesystems that need to do work on expanding
* truncates. Uses filesystem pagecache writes to allow the filesystem to
@@ -2349,20 +2335,20 @@ EXPORT_SYMBOL(block_read_full_page);
int generic_cont_expand_simple(struct inode *inode, loff_t size)
{
struct address_space *mapping = inode->i_mapping;
+ const struct address_space_operations *aops = mapping->a_ops;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
int err;
err = inode_newsize_ok(inode, size);
if (err)
goto out;
- err = pagecache_write_begin(NULL, mapping, size, 0,
- AOP_FLAG_CONT_EXPAND, &page, &fsdata);
+ err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
if (err)
goto out;
- err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+ err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
BUG_ON(err > 0);
out:
@@ -2374,9 +2360,10 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
+ const struct address_space_operations *aops = mapping->a_ops;
unsigned int blocksize = i_blocksize(inode);
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
pgoff_t index, curidx;
loff_t curpos;
unsigned zerofrom, offset, len;
@@ -2393,12 +2380,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = PAGE_SIZE - zerofrom;
- err = pagecache_write_begin(file, mapping, curpos, len, 0,
+ err = aops->write_begin(file, mapping, curpos, len,
&page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
- err = pagecache_write_end(file, mapping, curpos, len, len,
+ err = aops->write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
goto out;
@@ -2426,12 +2413,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = offset - zerofrom;
- err = pagecache_write_begin(file, mapping, curpos, len, 0,
+ err = aops->write_begin(file, mapping, curpos, len,
&page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
- err = pagecache_write_end(file, mapping, curpos, len, len,
+ err = aops->write_end(file, mapping, curpos, len, len,
page, fsdata);
if (err < 0)
goto out;
@@ -2447,7 +2434,7 @@ out:
* We may have to extend the file.
*/
int cont_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned len,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
{
@@ -2466,7 +2453,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
(*bytes)++;
}
- return block_write_begin(mapping, pos, len, flags, pagep, get_block);
+ return block_write_begin(mapping, pos, len, pagep, get_block);
}
EXPORT_SYMBOL(cont_write_begin);
@@ -2535,341 +2522,6 @@ out_unlock:
}
EXPORT_SYMBOL(block_page_mkwrite);
-/*
- * nobh_write_begin()'s prereads are special: the buffer_heads are freed
- * immediately, while under the page lock. So it needs a special end_io
- * handler which does not touch the bh after unlocking it.
- */
-static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
-{
- __end_buffer_read_notouch(bh, uptodate);
-}
-
-/*
- * Attach the singly-linked list of buffers created by nobh_write_begin, to
- * the page (converting it to circular linked list and taking care of page
- * dirty races).
- */
-static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
-{
- struct buffer_head *bh;
-
- BUG_ON(!PageLocked(page));
-
- spin_lock(&page->mapping->private_lock);
- bh = head;
- do {
- if (PageDirty(page))
- set_buffer_dirty(bh);
- if (!bh->b_this_page)
- bh->b_this_page = head;
- bh = bh->b_this_page;
- } while (bh != head);
- attach_page_private(page, head);
- spin_unlock(&page->mapping->private_lock);
-}
-
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- * The filesystem needs to handle block truncation upon failure.
- */
-int nobh_write_begin(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
-{
- struct inode *inode = mapping->host;
- const unsigned blkbits = inode->i_blkbits;
- const unsigned blocksize = 1 << blkbits;
- struct buffer_head *head, *bh;
- struct page *page;
- pgoff_t index;
- unsigned from, to;
- unsigned block_in_page;
- unsigned block_start, block_end;
- sector_t block_in_file;
- int nr_reads = 0;
- int ret = 0;
- int is_mapped_to_disk = 1;
-
- index = pos >> PAGE_SHIFT;
- from = pos & (PAGE_SIZE - 1);
- to = from + len;
-
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
- *pagep = page;
- *fsdata = NULL;
-
- if (page_has_buffers(page)) {
- ret = __block_write_begin(page, pos, len, get_block);
- if (unlikely(ret))
- goto out_release;
- return ret;
- }
-
- if (PageMappedToDisk(page))
- return 0;
-
- /*
- * Allocate buffers so that we can keep track of state, and potentially
- * attach them to the page if an error occurs. In the common case of
- * no error, they will just be freed again without ever being attached
- * to the page (which is all OK, because we're under the page lock).
- *
- * Be careful: the buffer linked list is a NULL terminated one, rather
- * than the circular one we're used to.
- */
- head = alloc_page_buffers(page, blocksize, false);
- if (!head) {
- ret = -ENOMEM;
- goto out_release;
- }
-
- block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
-
- /*
- * We loop across all blocks in the page, whether or not they are
- * part of the affected region. This is so we can discover if the
- * page is fully mapped-to-disk.
- */
- for (block_start = 0, block_in_page = 0, bh = head;
- block_start < PAGE_SIZE;
- block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
- int create;
-
- block_end = block_start + blocksize;
- bh->b_state = 0;
- create = 1;
- if (block_start >= to)
- create = 0;
- ret = get_block(inode, block_in_file + block_in_page,
- bh, create);
- if (ret)
- goto failed;
- if (!buffer_mapped(bh))
- is_mapped_to_disk = 0;
- if (buffer_new(bh))
- clean_bdev_bh_alias(bh);
- if (PageUptodate(page)) {
- set_buffer_uptodate(bh);
- continue;
- }
- if (buffer_new(bh) || !buffer_mapped(bh)) {
- zero_user_segments(page, block_start, from,
- to, block_end);
- continue;
- }
- if (buffer_uptodate(bh))
- continue; /* reiserfs does this */
- if (block_start < from || block_end > to) {
- lock_buffer(bh);
- bh->b_end_io = end_buffer_read_nobh;
- submit_bh(REQ_OP_READ, 0, bh);
- nr_reads++;
- }
- }
-
- if (nr_reads) {
- /*
- * The page is locked, so these buffers are protected from
- * any VM or truncate activity. Hence we don't need to care
- * for the buffer_head refcounts.
- */
- for (bh = head; bh; bh = bh->b_this_page) {
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- ret = -EIO;
- }
- if (ret)
- goto failed;
- }
-
- if (is_mapped_to_disk)
- SetPageMappedToDisk(page);
-
- *fsdata = head; /* to be released by nobh_write_end */
-
- return 0;
-
-failed:
- BUG_ON(!ret);
- /*
- * Error recovery is a bit difficult. We need to zero out blocks that
- * were newly allocated, and dirty them to ensure they get written out.
- * Buffers need to be attached to the page at this point, otherwise
- * the handling of potential IO errors during writeout would be hard
- * (could try doing synchronous writeout, but what if that fails too?)
- */
- attach_nobh_buffers(page, head);
- page_zero_new_buffers(page, from, to);
-
-out_release:
- unlock_page(page);
- put_page(page);
- *pagep = NULL;
-
- return ret;
-}
-EXPORT_SYMBOL(nobh_write_begin);
-
-int nobh_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- struct inode *inode = page->mapping->host;
- struct buffer_head *head = fsdata;
- struct buffer_head *bh;
- BUG_ON(fsdata != NULL && page_has_buffers(page));
-
- if (unlikely(copied < len) && head)
- attach_nobh_buffers(page, head);
- if (page_has_buffers(page))
- return generic_write_end(file, mapping, pos, len,
- copied, page, fsdata);
-
- SetPageUptodate(page);
- set_page_dirty(page);
- if (pos+copied > inode->i_size) {
- i_size_write(inode, pos+copied);
- mark_inode_dirty(inode);
- }
-
- unlock_page(page);
- put_page(page);
-
- while (head) {
- bh = head;
- head = head->b_this_page;
- free_buffer_head(bh);
- }
-
- return copied;
-}
-EXPORT_SYMBOL(nobh_write_end);
-
-/*
- * nobh_writepage() - based on block_full_write_page() except
- * that it tries to operate without attaching bufferheads to
- * the page.
- */
-int nobh_writepage(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
-{
- struct inode * const inode = page->mapping->host;
- loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
- int ret;
-
- /* Is the page fully inside i_size? */
- if (page->index < end_index)
- goto out;
-
- /* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_SIZE-1);
- if (page->index >= end_index+1 || !offset) {
- unlock_page(page);
- return 0; /* don't care */
- }
-
- /*
- * The page straddles i_size. It must be zeroed out on each and every
- * writepage invocation because it may be mmapped. "A file is mapped
- * in multiples of the page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when mapped, and
- * writes to that region are not written out to the file."
- */
- zero_user_segment(page, offset, PAGE_SIZE);
-out:
- ret = mpage_writepage(page, get_block, wbc);
- if (ret == -EAGAIN)
- ret = __block_write_full_page(inode, page, get_block, wbc,
- end_buffer_async_write);
- return ret;
-}
-EXPORT_SYMBOL(nobh_writepage);
-
-int nobh_truncate_page(struct address_space *mapping,
- loff_t from, get_block_t *get_block)
-{
- pgoff_t index = from >> PAGE_SHIFT;
- unsigned offset = from & (PAGE_SIZE-1);
- unsigned blocksize;
- sector_t iblock;
- unsigned length, pos;
- struct inode *inode = mapping->host;
- struct page *page;
- struct buffer_head map_bh;
- int err;
-
- blocksize = i_blocksize(inode);
- length = offset & (blocksize - 1);
-
- /* Block boundary? Nothing to do */
- if (!length)
- return 0;
-
- length = blocksize - length;
- iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
-
- page = grab_cache_page(mapping, index);
- err = -ENOMEM;
- if (!page)
- goto out;
-
- if (page_has_buffers(page)) {
-has_buffers:
- unlock_page(page);
- put_page(page);
- return block_truncate_page(mapping, from, get_block);
- }
-
- /* Find the buffer that contains "offset" */
- pos = blocksize;
- while (offset >= pos) {
- iblock++;
- pos += blocksize;
- }
-
- map_bh.b_size = blocksize;
- map_bh.b_state = 0;
- err = get_block(inode, iblock, &map_bh, 0);
- if (err)
- goto unlock;
- /* unmapped? It's a hole - nothing to do */
- if (!buffer_mapped(&map_bh))
- goto unlock;
-
- /* Ok, it's mapped. Make sure it's up-to-date */
- if (!PageUptodate(page)) {
- err = mapping->a_ops->readpage(NULL, page);
- if (err) {
- put_page(page);
- goto out;
- }
- lock_page(page);
- if (!PageUptodate(page)) {
- err = -EIO;
- goto unlock;
- }
- if (page_has_buffers(page))
- goto has_buffers;
- }
- zero_user(page, offset, length);
- set_page_dirty(page);
- err = 0;
-
-unlock:
- unlock_page(page);
- put_page(page);
-out:
- return err;
-}
-EXPORT_SYMBOL(nobh_truncate_page);
-
int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
@@ -2926,11 +2578,9 @@ int block_truncate_page(struct address_space *mapping,
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
- err = -EIO;
- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
- wait_on_buffer(bh);
+ err = bh_read(bh, 0);
/* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
+ if (err < 0)
goto unlock;
}
@@ -3006,9 +2656,10 @@ static void end_bio_bh_io_sync(struct bio *bio)
bio_put(bio);
}
-static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- enum rw_hint write_hint, struct writeback_control *wbc)
+static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ struct writeback_control *wbc)
{
+ const enum req_op op = opf & REQ_OP_MASK;
struct bio *bio;
BUG_ON(!buffer_locked(bh));
@@ -3023,13 +2674,16 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
clear_buffer_write_io_error(bh);
- bio = bio_alloc(GFP_NOIO, 1);
+ if (buffer_meta(bh))
+ opf |= REQ_META;
+ if (buffer_prio(bh))
+ opf |= REQ_PRIO;
+
+ bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio_set_dev(bio, bh->b_bdev);
- bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3037,12 +2691,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
- if (buffer_meta(bh))
- op_flags |= REQ_META;
- if (buffer_prio(bh))
- op_flags |= REQ_PRIO;
- bio_set_op_attrs(bio, op, op_flags);
-
/* Take care of bh's that straddle the end of the device */
guard_bio_eod(bio);
@@ -3052,71 +2700,15 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
}
submit_bio(bio);
- return 0;
}
-int submit_bh(int op, int op_flags, struct buffer_head *bh)
+void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{
- return submit_bh_wbc(op, op_flags, bh, 0, NULL);
+ submit_bh_wbc(opf, bh, NULL);
}
EXPORT_SYMBOL(submit_bh);
-/**
- * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @op: whether to %READ or %WRITE
- * @op_flags: req_flag_bits
- * @nr: number of &struct buffer_heads in the array
- * @bhs: array of pointers to &struct buffer_head
- *
- * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
- * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
- * @op_flags contains flags modifying the detailed I/O behavior, most notably
- * %REQ_RAHEAD.
- *
- * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit), any buffer that appears to be clean when doing a write
- * request, and any buffer that appears to be up-to-date when doing read
- * request. Further it marks as clean buffers that are processed for
- * writing (the buffer cache won't assume that they are actually clean
- * until the buffer gets unlocked).
- *
- * ll_rw_block sets b_end_io to simple completion handler that marks
- * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
- * any waiters.
- *
- * All of the buffers must be for the same device, and must also be a
- * multiple of the current approved size for the device.
- */
-void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- struct buffer_head *bh = bhs[i];
-
- if (!trylock_buffer(bh))
- continue;
- if (op == WRITE) {
- if (test_clear_buffer_dirty(bh)) {
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
- submit_bh(op, op_flags, bh);
- continue;
- }
- } else {
- if (!buffer_uptodate(bh)) {
- bh->b_end_io = end_buffer_read_sync;
- get_bh(bh);
- submit_bh(op, op_flags, bh);
- continue;
- }
- }
- unlock_buffer(bh);
- }
-}
-EXPORT_SYMBOL(ll_rw_block);
-
-void write_dirty_buffer(struct buffer_head *bh, int op_flags)
+void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
lock_buffer(bh);
if (!test_clear_buffer_dirty(bh)) {
@@ -3125,7 +2717,7 @@ void write_dirty_buffer(struct buffer_head *bh, int op_flags)
}
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(REQ_OP_WRITE, op_flags, bh);
+ submit_bh(REQ_OP_WRITE | op_flags, bh);
}
EXPORT_SYMBOL(write_dirty_buffer);
@@ -3134,10 +2726,8 @@ EXPORT_SYMBOL(write_dirty_buffer);
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
-int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
+int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
{
- int ret = 0;
-
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
@@ -3152,14 +2742,14 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
+ submit_bh(REQ_OP_WRITE | op_flags, bh);
wait_on_buffer(bh);
- if (!ret && !buffer_uptodate(bh))
- ret = -EIO;
+ if (!buffer_uptodate(bh))
+ return -EIO;
} else {
unlock_buffer(bh);
}
- return ret;
+ return 0;
}
EXPORT_SYMBOL(__sync_dirty_buffer);
@@ -3170,21 +2760,21 @@ int sync_dirty_buffer(struct buffer_head *bh)
EXPORT_SYMBOL(sync_dirty_buffer);
/*
- * try_to_free_buffers() checks if all the buffers on this particular page
+ * try_to_free_buffers() checks if all the buffers on this particular folio
* are unused, and releases them if so.
*
* Exclusion against try_to_free_buffers may be obtained by either
- * locking the page or by holding its mapping's private_lock.
+ * locking the folio or by holding its mapping's private_lock.
*
- * If the page is dirty but all the buffers are clean then we need to
- * be sure to mark the page clean as well. This is because the page
+ * If the folio is dirty but all the buffers are clean then we need to
+ * be sure to mark the folio clean as well. This is because the folio
* may be against a block device, and a later reattachment of buffers
- * to a dirty page will set *all* buffers dirty. Which would corrupt
+ * to a dirty folio will set *all* buffers dirty. Which would corrupt
* filesystem data on the same device.
*
- * The same applies to regular filesystem pages: if all the buffers are
- * clean then we set the page clean and proceed. To do that, we require
- * total exclusion from __set_page_dirty_buffers(). That is obtained with
+ * The same applies to regular filesystem folios: if all the buffers are
+ * clean then we set the folio clean and proceed. To do that, we require
+ * total exclusion from block_dirty_folio(). That is obtained with
* private_lock.
*
* try_to_free_buffers() is non-blocking.
@@ -3195,10 +2785,10 @@ static inline int buffer_busy(struct buffer_head *bh)
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
-static int
-drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
+static bool
+drop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
{
- struct buffer_head *head = page_buffers(page);
+ struct buffer_head *head = folio_buffers(folio);
struct buffer_head *bh;
bh = head;
@@ -3216,46 +2806,46 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
bh = next;
} while (bh != head);
*buffers_to_free = head;
- detach_page_private(page);
- return 1;
+ folio_detach_private(folio);
+ return true;
failed:
- return 0;
+ return false;
}
-int try_to_free_buffers(struct page *page)
+bool try_to_free_buffers(struct folio *folio)
{
- struct address_space * const mapping = page->mapping;
+ struct address_space * const mapping = folio->mapping;
struct buffer_head *buffers_to_free = NULL;
- int ret = 0;
+ bool ret = 0;
- BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
- return 0;
+ BUG_ON(!folio_test_locked(folio));
+ if (folio_test_writeback(folio))
+ return false;
if (mapping == NULL) { /* can this still happen? */
- ret = drop_buffers(page, &buffers_to_free);
+ ret = drop_buffers(folio, &buffers_to_free);
goto out;
}
spin_lock(&mapping->private_lock);
- ret = drop_buffers(page, &buffers_to_free);
+ ret = drop_buffers(folio, &buffers_to_free);
/*
* If the filesystem writes its buffers by hand (eg ext3)
- * then we can have clean buffers against a dirty page. We
- * clean the page here; otherwise the VM will never notice
+ * then we can have clean buffers against a dirty folio. We
+ * clean the folio here; otherwise the VM will never notice
* that the filesystem did any IO at all.
*
* Also, during truncate, discard_buffer will have marked all
- * the page's buffers clean. We discover that here and clean
- * the page also.
+ * the folio's buffers clean. We discover that here and clean
+ * the folio also.
*
* private_lock must be held over this entire operation in order
- * to synchronise against __set_page_dirty_buffers and prevent the
+ * to synchronise against block_dirty_folio and prevent the
* dirty bit from being lost.
*/
if (ret)
- cancel_dirty_page(page);
+ folio_cancel_dirty(folio);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
@@ -3364,29 +2954,69 @@ int bh_uptodate_or_lock(struct buffer_head *bh)
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
- * bh_submit_read - Submit a locked buffer for reading
+ * __bh_read - Submit read for a locked buffer
* @bh: struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @wait: wait until reading finish
*
- * Returns zero on success and -EIO on error.
+ * Returns zero on success or don't wait, and -EIO on error.
*/
-int bh_submit_read(struct buffer_head *bh)
+int __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
- BUG_ON(!buffer_locked(bh));
+ int ret = 0;
- if (buffer_uptodate(bh)) {
- unlock_buffer(bh);
- return 0;
- }
+ BUG_ON(!buffer_locked(bh));
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, 0, bh);
- wait_on_buffer(bh);
- if (buffer_uptodate(bh))
- return 0;
- return -EIO;
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ if (wait) {
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ ret = -EIO;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(__bh_read);
+
+/**
+ * __bh_read_batch - Submit read for a batch of unlocked buffers
+ * @nr: entry number of the buffer batch
+ * @bhs: a batch of struct buffer_head
+ * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
+ * @force_lock: force to get a lock on the buffer if set, otherwise drops any
+ * buffer that cannot lock.
+ *
+ * Returns zero on success or don't wait, and -EIO on error.
+ */
+void __bh_read_batch(int nr, struct buffer_head *bhs[],
+ blk_opf_t op_flags, bool force_lock)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (force_lock)
+ lock_buffer(bh);
+ else
+ if (!trylock_buffer(bh))
+ continue;
+
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+
+ bh->b_end_io = end_buffer_read_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
+ }
}
-EXPORT_SYMBOL(bh_submit_read);
+EXPORT_SYMBOL(__bh_read_batch);
void __init buffer_init(void)
{