aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c686
1 files changed, 397 insertions, 289 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fece851..51731b76900d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
state = list_entry(states.next, struct extent_state, leak_list);
printk(KERN_ERR "btrfs state leak: start %llu end %llu "
"state %lu in tree %p refs %d\n",
- (unsigned long long)state->start,
- (unsigned long long)state->end,
- state->state, state->tree, atomic_read(&state->refs));
+ state->start, state->end, state->state, state->tree,
+ atomic_read(&state->refs));
list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
- "refs %d\n", (unsigned long long)eb->start,
- eb->len, atomic_read(&eb->refs));
+ "refs %d\n",
+ eb->start, eb->len, atomic_read(&eb->refs));
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
printk_ratelimited(KERN_DEBUG
"btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
- caller,
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)isize,
- (unsigned long long)start,
- (unsigned long long)end);
+ caller, btrfs_ino(inode), isize, start, end);
}
}
#else
@@ -150,8 +145,16 @@ int __init extent_io_init(void)
offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
goto free_buffer_cache;
+
+ if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ goto free_bioset;
+
return 0;
+free_bioset:
+ bioset_free(btrfs_bioset);
+ btrfs_bioset = NULL;
+
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
@@ -388,8 +391,7 @@ static int insert_state(struct extent_io_tree *tree,
if (end < start)
WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
- (unsigned long long)end,
- (unsigned long long)start);
+ end, start);
state->start = start;
state->end = end;
@@ -400,9 +402,8 @@ static int insert_state(struct extent_io_tree *tree,
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
printk(KERN_ERR "btrfs found node %llu %llu on insert of "
- "%llu %llu\n", (unsigned long long)found->start,
- (unsigned long long)found->end,
- (unsigned long long)start, (unsigned long long)end);
+ "%llu %llu\n",
+ found->start, found->end, start, end);
return -EEXIST;
}
state->tree = tree;
@@ -762,15 +763,6 @@ static void cache_state(struct extent_state *state,
}
}
-static void uncache_state(struct extent_state **cached_ptr)
-{
- if (cached_ptr && (*cached_ptr)) {
- struct extent_state *state = *cached_ptr;
- *cached_ptr = NULL;
- free_extent_state(state);
- }
-}
-
/*
* set some bits on a range in the tree. This may require allocations or
* sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1497,11 +1489,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
*end = state->end;
cur_start = state->end + 1;
node = rb_next(node);
- if (!node)
- break;
total_bytes += state->end - state->start + 1;
if (total_bytes >= max_bytes)
break;
+ if (!node)
+ break;
}
out:
spin_unlock(&tree->lock);
@@ -1628,7 +1620,7 @@ again:
*start = delalloc_start;
*end = delalloc_end;
free_extent_state(cached_state);
- return found;
+ return 0;
}
/*
@@ -1641,10 +1633,9 @@ again:
/*
* make sure to limit the number of pages we try to lock down
- * if we're looping.
*/
- if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
- delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ if (delalloc_end + 1 - delalloc_start > max_bytes)
+ delalloc_end = delalloc_start + max_bytes - 1;
/* step two, lock all the pages after the page that has start */
ret = lock_delalloc_pages(inode, locked_page,
@@ -1655,8 +1646,7 @@ again:
*/
free_extent_state(cached_state);
if (!loops) {
- unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
- max_bytes = PAGE_CACHE_SIZE - offset;
+ max_bytes = PAGE_CACHE_SIZE;
loops = 1;
goto again;
} else {
@@ -1687,31 +1677,21 @@ out_failed:
return found;
}
-int extent_clear_unlock_delalloc(struct inode *inode,
- struct extent_io_tree *tree,
- u64 start, u64 end, struct page *locked_page,
- unsigned long op)
+int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+ struct page *locked_page,
+ unsigned long clear_bits,
+ unsigned long page_ops)
{
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
int ret;
struct page *pages[16];
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
- unsigned long clear_bits = 0;
-
- if (op & EXTENT_CLEAR_UNLOCK)
- clear_bits |= EXTENT_LOCKED;
- if (op & EXTENT_CLEAR_DIRTY)
- clear_bits |= EXTENT_DIRTY;
-
- if (op & EXTENT_CLEAR_DELALLOC)
- clear_bits |= EXTENT_DELALLOC;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
- if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
- EXTENT_SET_PRIVATE2)))
+ if (page_ops == 0)
return 0;
while (nr_pages > 0) {
@@ -1720,20 +1700,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
nr_pages, ARRAY_SIZE(pages)), pages);
for (i = 0; i < ret; i++) {
- if (op & EXTENT_SET_PRIVATE2)
+ if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
page_cache_release(pages[i]);
continue;
}
- if (op & EXTENT_CLEAR_DIRTY)
+ if (page_ops & PAGE_CLEAR_DIRTY)
clear_page_dirty_for_io(pages[i]);
- if (op & EXTENT_SET_WRITEBACK)
+ if (page_ops & PAGE_SET_WRITEBACK)
set_page_writeback(pages[i]);
- if (op & EXTENT_END_WRITEBACK)
+ if (page_ops & PAGE_END_WRITEBACK)
end_page_writeback(pages[i]);
- if (op & EXTENT_CLEAR_UNLOCK_PAGE)
+ if (page_ops & PAGE_UNLOCK)
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
@@ -1810,7 +1790,7 @@ out:
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
{
struct rb_node *node;
struct extent_state *state;
@@ -1837,64 +1817,6 @@ out:
return ret;
}
-void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
- int count)
-{
- struct rb_node *node;
- struct extent_state *state;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- BUG_ON(!node);
-
- state = rb_entry(node, struct extent_state, rb_node);
- BUG_ON(state->start != start);
-
- while (count) {
- state->private = *csums++;
- count--;
- state = next_state(state);
- }
- spin_unlock(&tree->lock);
-}
-
-static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
-{
- struct bio_vec *bvec = bio->bi_io_vec + bio_index;
-
- return page_offset(bvec->bv_page) + bvec->bv_offset;
-}
-
-void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
- u32 csums[], int count)
-{
- struct rb_node *node;
- struct extent_state *state = NULL;
- u64 start;
-
- spin_lock(&tree->lock);
- do {
- start = __btrfs_get_bio_offset(bio, bio_index);
- if (state == NULL || state->start != start) {
- node = tree_search(tree, start);
- BUG_ON(!node);
-
- state = rb_entry(node, struct extent_state, rb_node);
- BUG_ON(state->start != start);
- }
- state->private = *csums++;
- count--;
- bio_index++;
-
- state = next_state(state);
- } while (count);
- spin_unlock(&tree->lock);
-}
-
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
{
struct rb_node *node;
@@ -2173,7 +2095,8 @@ static int clean_io_failure(u64 start, struct page *page)
EXTENT_LOCKED);
spin_unlock(&BTRFS_I(inode)->io_tree.lock);
- if (state && state->start == failrec->start) {
+ if (state && state->start <= failrec->start &&
+ state->end >= failrec->start + failrec->len - 1) {
fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
@@ -2201,9 +2124,9 @@ out:
* needed
*/
-static int bio_readpage_error(struct bio *failed_bio, struct page *page,
- u64 start, u64 end, int failed_mirror,
- struct extent_state *state)
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
+ struct page *page, u64 start, u64 end,
+ int failed_mirror)
{
struct io_failure_record *failrec = NULL;
u64 private;
@@ -2213,6 +2136,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct bio *bio;
+ struct btrfs_io_bio *btrfs_failed_bio;
+ struct btrfs_io_bio *btrfs_bio;
int num_copies;
int ret;
int read_mode;
@@ -2296,23 +2221,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
* all the retry and error correction code that follows. no
* matter what the error is, it is very likely to persist.
*/
- pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
- "state=%p, num_copies=%d, next_mirror %d, "
- "failed_mirror %d\n", state, num_copies,
- failrec->this_mirror, failed_mirror);
+ pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
+ num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec, 0);
return -EIO;
}
- if (!state) {
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, failrec->start,
- EXTENT_LOCKED);
- if (state && state->start != failrec->start)
- state = NULL;
- spin_unlock(&tree->lock);
- }
-
/*
* there are two premises:
* a) deliver good data to the caller
@@ -2349,9 +2263,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
read_mode = READ_SYNC;
}
- if (!state || failrec->this_mirror > num_copies) {
- pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
- "next_mirror %d, failed_mirror %d\n", state,
+ if (failrec->this_mirror > num_copies) {
+ pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2362,12 +2275,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
free_io_failure(inode, failrec, 0);
return -EIO;
}
- bio->bi_private = state;
bio->bi_end_io = failed_bio->bi_end_io;
bio->bi_sector = failrec->logical >> 9;
bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio->bi_size = 0;
+ btrfs_failed_bio = btrfs_io_bio(failed_bio);
+ if (btrfs_failed_bio->csum) {
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = btrfs_bio->csum_inline;
+ phy_offset >>= inode->i_sb->s_blocksize_bits;
+ phy_offset *= csum_size;
+ memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
+ csum_size);
+ }
+
bio_add_page(bio, page, failrec->len, start - page_offset(page));
pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2375,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
bio_put(bio);
}
+static void
+endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
+ int uptodate)
+{
+ struct extent_state *cached = NULL;
+ u64 end = start + len - 1;
+
+ if (uptodate && tree->track_uptodate)
+ set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
+ unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
+}
+
/*
* after a readpage IO is done, we need to:
* clear the uptodate bits on error
@@ -2466,9 +2403,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
+ u64 offset = 0;
u64 start;
u64 end;
+ u64 len;
+ u64 extent_start = 0;
+ u64 extent_len = 0;
int mirror;
int ret;
@@ -2477,9 +2419,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
do {
struct page *page = bvec->bv_page;
- struct extent_state *cached = NULL;
- struct extent_state *state;
- struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2439,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
+ len = bvec->bv_len;
if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags);
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
- if (state && state->start == start) {
- /*
- * take a reference on the state, unlock will drop
- * the ref
- */
- cache_state(state, &cached);
- }
- spin_unlock(&tree->lock);
-
mirror = io_bio->mirror_num;
- if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
- ret = tree->ops->readpage_end_io_hook(page, start, end,
- state, mirror);
+ if (likely(uptodate && tree->ops &&
+ tree->ops->readpage_end_io_hook)) {
+ ret = tree->ops->readpage_end_io_hook(io_bio, offset,
+ page, start, end,
+ mirror);
if (ret)
uptodate = 0;
else
clean_io_failure(start, page);
}
- if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+ if (likely(uptodate))
+ goto readpage_ok;
+
+ if (tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1;
- } else if (!uptodate) {
+ } else {
/*
* The generic bio_readpage_error handles errors the
* following way: If possible, new read requests are
@@ -2541,24 +2475,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we
* remain responsible for that page.
*/
- ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
+ ret = bio_readpage_error(bio, offset, page, start, end,
+ mirror);
if (ret == 0) {
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err)
uptodate = 0;
- uncache_state(&cached);
continue;
}
}
-
- if (uptodate && tree->track_uptodate) {
- set_extent_uptodate(tree, start, end, &cached,
- GFP_ATOMIC);
- }
- unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
- if (uptodate) {
+readpage_ok:
+ if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
@@ -2573,8 +2501,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
SetPageError(page);
}
unlock_page(page);
+ offset += len;
+
+ if (unlikely(!uptodate)) {
+ if (extent_len) {
+ endio_readpage_release_extent(tree,
+ extent_start,
+ extent_len, 1);
+ extent_start = 0;
+ extent_len = 0;
+ }
+ endio_readpage_release_extent(tree, start,
+ end - start + 1, 0);
+ } else if (!extent_len) {
+ extent_start = start;
+ extent_len = end + 1 - start;
+ } else if (extent_start + extent_len == start) {
+ extent_len += end + 1 - start;
+ } else {
+ endio_readpage_release_extent(tree, extent_start,
+ extent_len, uptodate);
+ extent_start = start;
+ extent_len = end + 1 - start;
+ }
} while (bvec <= bvec_end);
+ if (extent_len)
+ endio_readpage_release_extent(tree, extent_start, extent_len,
+ uptodate);
+ if (io_bio->end_io)
+ io_bio->end_io(io_bio, err);
bio_put(bio);
}
@@ -2586,6 +2542,7 @@ struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
+ struct btrfs_io_bio *btrfs_bio;
struct bio *bio;
bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2558,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
bio->bi_size = 0;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = NULL;
+ btrfs_bio->csum_allocated = NULL;
+ btrfs_bio->end_io = NULL;
}
return bio;
}
@@ -2614,7 +2575,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
/* this also allocates from the btrfs_bioset */
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+ struct btrfs_io_bio *btrfs_bio;
+ struct bio *bio;
+
+ bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+ if (bio) {
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = NULL;
+ btrfs_bio->csum_allocated = NULL;
+ btrfs_bio->end_io = NULL;
+ }
+ return bio;
}
@@ -2738,17 +2709,45 @@ void set_page_extent_mapped(struct page *page)
}
}
+static struct extent_map *
+__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
+ u64 start, u64 len, get_extent_t *get_extent,
+ struct extent_map **em_cached)
+{
+ struct extent_map *em;
+
+ if (em_cached && *em_cached) {
+ em = *em_cached;
+ if (em->in_tree && start >= em->start &&
+ start < extent_map_end(em)) {
+ atomic_inc(&em->refs);
+ return em;
+ }
+
+ free_extent_map(em);
+ *em_cached = NULL;
+ }
+
+ em = get_extent(inode, page, pg_offset, start, len, 0);
+ if (em_cached && !IS_ERR_OR_NULL(em)) {
+ BUG_ON(*em_cached);
+ atomic_inc(&em->refs);
+ *em_cached = em;
+ }
+ return em;
+}
/*
* basic readpage implementation. Locked extent state structs are inserted
* into the tree that are removed when the IO is done (by the end_io
* handlers)
* XXX JDM: This needs looking at to ensure proper page locking
*/
-static int __extent_read_full_page(struct extent_io_tree *tree,
- struct page *page,
- get_extent_t *get_extent,
- struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+static int __do_readpage(struct extent_io_tree *tree,
+ struct page *page,
+ get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
@@ -2762,35 +2761,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
- struct btrfs_ordered_extent *ordered;
int ret;
int nr = 0;
+ int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = 0;
+ unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
set_page_extent_mapped(page);
+ end = page_end;
if (!PageUptodate(page)) {
if (cleancache_get_page(page) == 0) {
BUG_ON(blocksize != PAGE_SIZE);
+ unlock_extent(tree, start, end);
goto out;
}
}
- end = page_end;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_extent(inode, start);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
-
if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
char *userpage;
size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2807,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- unlock_extent_cached(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
+ if (!parent_locked)
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
break;
}
- em = get_extent(inode, page, pg_offset, cur,
- end - cur + 1, 0);
+ em = __get_extent_map(inode, page, pg_offset, cur,
+ end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- unlock_extent(tree, cur, end);
+ if (!parent_locked)
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -2833,7 +2826,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
BUG_ON(end < cur);
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- this_bio_flag = EXTENT_BIO_COMPRESSED;
+ this_bio_flag |= EXTENT_BIO_COMPRESSED;
extent_set_compress_type(&this_bio_flag,
em->compress_type);
}
@@ -2877,7 +2870,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2887,7 +2881,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2905,7 +2900,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -2919,6 +2915,104 @@ out:
return 0;
}
+static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+ struct page *pages[], int nr_pages,
+ u64 start, u64 end,
+ get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ struct inode *inode;
+ struct btrfs_ordered_extent *ordered;
+ int index;
+
+ inode = pages[0]->mapping->host;
+ while (1) {
+ lock_extent(tree, start, end);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ end - start + 1);
+ if (!ordered)
+ break;
+ unlock_extent(tree, start, end);
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ }
+
+ for (index = 0; index < nr_pages; index++) {
+ __do_readpage(tree, pages[index], get_extent, em_cached, bio,
+ mirror_num, bio_flags, rw);
+ page_cache_release(pages[index]);
+ }
+}
+
+static void __extent_readpages(struct extent_io_tree *tree,
+ struct page *pages[],
+ int nr_pages, get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ u64 start = 0;
+ u64 end = 0;
+ u64 page_start;
+ int index;
+ int first_index = 0;
+
+ for (index = 0; index < nr_pages; index++) {
+ page_start = page_offset(pages[index]);
+ if (!end) {
+ start = page_start;
+ end = start + PAGE_CACHE_SIZE - 1;
+ first_index = index;
+ } else if (end + 1 == page_start) {
+ end += PAGE_CACHE_SIZE;
+ } else {
+ __do_contiguous_readpages(tree, &pages[first_index],
+ index - first_index, start,
+ end, get_extent, em_cached,
+ bio, mirror_num, bio_flags,
+ rw);
+ start = page_start;
+ end = start + PAGE_CACHE_SIZE - 1;
+ first_index = index;
+ }
+ }
+
+ if (end)
+ __do_contiguous_readpages(tree, &pages[first_index],
+ index - first_index, start,
+ end, get_extent, em_cached, bio,
+ mirror_num, bio_flags, rw);
+}
+
+static int __extent_read_full_page(struct extent_io_tree *tree,
+ struct page *page,
+ get_extent_t *get_extent,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ struct inode *inode = page->mapping->host;
+ struct btrfs_ordered_extent *ordered;
+ u64 start = page_offset(page);
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ int ret;
+
+ while (1) {
+ lock_extent(tree, start, end);
+ ordered = btrfs_lookup_ordered_extent(inode, start);
+ if (!ordered)
+ break;
+ unlock_extent(tree, start, end);
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ }
+
+ ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
+ bio_flags, rw);
+ return ret;
+}
+
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num)
{
@@ -2933,6 +3027,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
+int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent, int mirror_num)
+{
+ struct bio *bio = NULL;
+ unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
+ int ret;
+
+ ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
+ &bio_flags, READ);
+ if (bio)
+ ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ return ret;
+}
+
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)
@@ -3189,8 +3297,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (!PageWriteback(page)) {
printk(KERN_ERR "btrfs warning page %lu not "
"writeback, cur %llu end %llu\n",
- page->index, (unsigned long long)cur,
- (unsigned long long)end);
+ page->index, cur, end);
}
ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3876,7 @@ int extent_readpages(struct extent_io_tree *tree,
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct page *page;
- int i = 0;
+ struct extent_map *em_cached = NULL;
int nr = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3893,16 @@ int extent_readpages(struct extent_io_tree *tree,
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
- for (i = 0; i < nr; i++) {
- __extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags, READ);
- page_cache_release(pagepool[i]);
- }
+ __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+ &bio, 0, &bio_flags, READ);
nr = 0;
}
- for (i = 0; i < nr; i++) {
- __extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags, READ);
- page_cache_release(pagepool[i]);
- }
+ if (nr)
+ __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+ &bio, 0, &bio_flags, READ);
+
+ if (em_cached)
+ free_extent_map(em_cached);
BUG_ON(!list_empty(pages));
if (bio)
@@ -4136,6 +4241,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
+static int extent_buffer_under_io(struct extent_buffer *eb)
+{
+ return (atomic_read(&eb->io_pages) ||
+ test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+ unsigned long start_idx)
+{
+ unsigned long index;
+ unsigned long num_pages;
+ struct page *page;
+ int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+ BUG_ON(extent_buffer_under_io(eb));
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ index = start_idx + num_pages;
+ if (start_idx >= index)
+ return;
+
+ do {
+ index--;
+ page = extent_buffer_page(eb, index);
+ if (page && mapped) {
+ spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
+ /*
+ * We need to make sure we haven't be attached
+ * to a new eb.
+ */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
+ page_cache_release(page);
+ }
+ spin_unlock(&page->mapping->private_lock);
+
+ }
+ if (page) {
+ /* One for when we alloced the page */
+ page_cache_release(page);
+ }
+ } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+ btrfs_release_extent_buffer_page(eb, 0);
+ __free_extent_buffer(eb);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
u64 start,
unsigned long len,
@@ -4184,13 +4359,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *new;
unsigned long num_pages = num_extent_pages(src->start, src->len);
- new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+ new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
if (new == NULL)
return NULL;
for (i = 0; i < num_pages; i++) {
- p = alloc_page(GFP_ATOMIC);
- BUG_ON(!p);
+ p = alloc_page(GFP_NOFS);
+ if (!p) {
+ btrfs_release_extent_buffer(new);
+ return NULL;
+ }
attach_extent_buffer_page(new, p);
WARN_ON(PageDirty(p));
SetPageUptodate(p);
@@ -4210,12 +4388,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
unsigned long num_pages = num_extent_pages(0, len);
unsigned long i;
- eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+ eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
if (!eb)
return NULL;
for (i = 0; i < num_pages; i++) {
- eb->pages[i] = alloc_page(GFP_ATOMIC);
+ eb->pages[i] = alloc_page(GFP_NOFS);
if (!eb->pages[i])
goto err;
}
@@ -4231,76 +4409,6 @@ err:
return NULL;
}
-static int extent_buffer_under_io(struct extent_buffer *eb)
-{
- return (atomic_read(&eb->io_pages) ||
- test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
- test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
-/*
- * Helper for releasing extent buffer page.
- */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
- unsigned long start_idx)
-{
- unsigned long index;
- unsigned long num_pages;
- struct page *page;
- int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
-
- BUG_ON(extent_buffer_under_io(eb));
-
- num_pages = num_extent_pages(eb->start, eb->len);
- index = start_idx + num_pages;
- if (start_idx >= index)
- return;
-
- do {
- index--;
- page = extent_buffer_page(eb, index);
- if (page && mapped) {
- spin_lock(&page->mapping->private_lock);
- /*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear page_private if it's still connected to
- * this eb.
- */
- if (PagePrivate(page) &&
- page->private == (unsigned long)eb) {
- BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(PageDirty(page));
- BUG_ON(PageWriteback(page));
- /*
- * We need to make sure we haven't be attached
- * to a new eb.
- */
- ClearPagePrivate(page);
- set_page_private(page, 0);
- /* One for the page private */
- page_cache_release(page);
- }
- spin_unlock(&page->mapping->private_lock);
-
- }
- if (page) {
- /* One for when we alloced the page */
- page_cache_release(page);
- }
- } while (index != start_idx);
-}
-
-/*
- * Helper for releasing the extent buffer.
- */
-static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
-{
- btrfs_release_extent_buffer_page(eb, 0);
- __free_extent_buffer(eb);
-}
-
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
@@ -4771,7 +4879,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4813,8 +4921,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
if (start + min_len > eb->len) {
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
- "wanted %lu %lu\n", (unsigned long long)eb->start,
- eb->len, start, min_len);
+ "wanted %lu %lu\n",
+ eb->start, eb->len, start, min_len);
return -EINVAL;
}
@@ -4841,7 +4949,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4875,7 +4983,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4905,7 +5013,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4936,7 +5044,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
WARN_ON(src->len != dst_len);
offset = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(dst, i);
@@ -5022,9 +5130,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
while (len > 0) {
dst_off_in_page = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
src_off_in_page = (start_offset + src_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5183,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
dst_off_in_page = (start_offset + dst_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
src_off_in_page = (start_offset + src_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);