aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c397
1 files changed, 213 insertions, 184 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ee582a36653d..a0546401bc0a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -30,6 +30,7 @@
#include <linux/swap.h>
#include <linux/sched/mm.h>
#include <asm/unaligned.h>
+#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -46,8 +47,8 @@
#include "backref.h"
#include "props.h"
#include "qgroup.h"
-#include "dedupe.h"
#include "delalloc-space.h"
+#include "block-group.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -74,15 +75,15 @@ static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
+struct kmem_cache *btrfs_free_space_bitmap_cachep;
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, u64 delalloc_end,
- int *page_started, unsigned long *nr_written,
- int unlock, struct btrfs_dedupe_hash *hash);
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written, int unlock);
static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
@@ -178,6 +179,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
size_t cur_size = size;
unsigned long offset;
+ ASSERT((compressed_size > 0 && compressed_pages) ||
+ (compressed_size == 0 && !compressed_pages));
+
if (compressed_size && compressed_pages)
cur_size = compressed_size;
@@ -462,8 +466,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
* are written in the same order that the flusher thread sent them
* down.
*/
-static noinline void compress_file_range(struct async_chunk *async_chunk,
- int *num_added)
+static noinline int compress_file_range(struct async_chunk *async_chunk)
{
struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -479,6 +482,7 @@ static noinline void compress_file_range(struct async_chunk *async_chunk,
int i;
int will_compress;
int compress_type = fs_info->compress_type;
+ int compressed_extents = 0;
int redirty = 0;
inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
@@ -615,14 +619,21 @@ cont:
* our outstanding extent for clearing delalloc for this
* range.
*/
- extent_clear_unlock_delalloc(inode, start, end, end,
- NULL, clear_flags,
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
+ clear_flags,
PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
- goto free_pages_out;
+
+ for (i = 0; i < nr_pages; i++) {
+ WARN_ON(pages[i]->mapping);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+
+ return 0;
}
}
@@ -641,7 +652,7 @@ cont:
*/
total_in = ALIGN(total_in, PAGE_SIZE);
if (total_compressed + blocksize <= total_in) {
- *num_added += 1;
+ compressed_extents++;
/*
* The async work queues will take care of doing actual
@@ -658,7 +669,7 @@ cont:
cond_resched();
goto again;
}
- return;
+ return compressed_extents;
}
}
if (pages) {
@@ -697,16 +708,9 @@ cleanup_and_bail_uncompressed:
extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
BTRFS_COMPRESS_NONE);
- *num_added += 1;
-
- return;
+ compressed_extents++;
-free_pages_out:
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
- }
- kfree(pages);
+ return compressed_extents;
}
static void free_async_extent_pages(struct async_extent *async_extent)
@@ -762,10 +766,7 @@ retry:
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
- &page_started, &nr_written, 0,
- NULL);
+ &page_started, &nr_written, 0);
/* JDM XXX */
@@ -855,8 +856,6 @@ retry:
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
@@ -875,7 +874,7 @@ retry:
btrfs_writepage_endio_finish_ordered(p, start, end, 0);
p->mapping = NULL;
- extent_clear_unlock_delalloc(inode, start, end, end,
+ extent_clear_unlock_delalloc(inode, start, end,
NULL, 0,
PAGE_END_WRITEBACK |
PAGE_SET_ERROR);
@@ -893,8 +892,6 @@ out_free:
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- async_extent->start +
- async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
@@ -953,9 +950,8 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
*/
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, u64 delalloc_end,
- int *page_started, unsigned long *nr_written,
- int unlock, struct btrfs_dedupe_hash *hash)
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written, int unlock)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -994,8 +990,7 @@ static noinline int cow_file_range(struct inode *inode,
* our outstanding extent for clearing delalloc for this
* range.
*/
- extent_clear_unlock_delalloc(inode, start, end,
- delalloc_end, NULL,
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@@ -1078,7 +1073,7 @@ static noinline int cow_file_range(struct inode *inode,
extent_clear_unlock_delalloc(inode, start,
start + ram_size - 1,
- delalloc_end, locked_page,
+ locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
if (num_bytes < cur_alloc_size)
@@ -1123,7 +1118,6 @@ out_unlock:
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
start + cur_alloc_size,
- start + cur_alloc_size,
locked_page,
clear_bits,
page_ops);
@@ -1131,8 +1125,7 @@ out_unlock:
if (start >= end)
goto out;
}
- extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
- locked_page,
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits | EXTENT_CLEAR_DATA_RESV,
page_ops);
goto out;
@@ -1144,12 +1137,12 @@ out_unlock:
static noinline void async_cow_start(struct btrfs_work *work)
{
struct async_chunk *async_chunk;
- int num_added = 0;
+ int compressed_extents;
async_chunk = container_of(work, struct async_chunk, work);
- compress_file_range(async_chunk, &num_added);
- if (num_added == 0) {
+ compressed_extents = compress_file_range(async_chunk);
+ if (compressed_extents == 0) {
btrfs_add_delayed_iput(async_chunk->inode);
async_chunk->inode = NULL;
}
@@ -1235,7 +1228,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
PAGE_SET_ERROR;
- extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
return -ENOMEM;
}
@@ -1310,36 +1303,25 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
*/
static noinline int run_delalloc_nocow(struct inode *inode,
struct page *locked_page,
- u64 start, u64 end, int *page_started, int force,
- unsigned long *nr_written)
+ const u64 start, const u64 end,
+ int *page_started, int force,
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *leaf;
struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key found_key;
- struct extent_map *em;
- u64 cow_start;
- u64 cur_offset;
- u64 extent_end;
- u64 extent_offset;
- u64 disk_bytenr;
- u64 num_bytes;
- u64 disk_num_bytes;
- u64 ram_bytes;
- int extent_type;
+ u64 cow_start = (u64)-1;
+ u64 cur_offset = start;
int ret;
- int type;
- int nocow;
- int check_prev = 1;
- bool nolock;
+ bool check_prev = true;
+ const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
u64 ino = btrfs_ino(BTRFS_I(inode));
+ bool nocow = false;
+ u64 disk_bytenr = 0;
path = btrfs_alloc_path();
if (!path) {
- extent_clear_unlock_delalloc(inode, start, end, end,
- locked_page,
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, PAGE_UNLOCK |
@@ -1349,15 +1331,29 @@ static noinline int run_delalloc_nocow(struct inode *inode,
return -ENOMEM;
}
- nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
-
- cow_start = (u64)-1;
- cur_offset = start;
while (1) {
+ struct btrfs_key found_key;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ u64 extent_end;
+ u64 extent_offset;
+ u64 num_bytes = 0;
+ u64 disk_num_bytes;
+ u64 ram_bytes;
+ int extent_type;
+
+ nocow = false;
+
ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
+
+ /*
+ * If there is no extent for our range when doing the initial
+ * search, then go back to the previous slot as it will be the
+ * one containing the search offset
+ */
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1366,8 +1362,9 @@ static noinline int run_delalloc_nocow(struct inode *inode,
found_key.type == BTRFS_EXTENT_DATA_KEY)
path->slots[0]--;
}
- check_prev = 0;
+ check_prev = false;
next_slot:
+ /* Go to next leaf if we have exhausted the current one */
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
@@ -1381,28 +1378,40 @@ next_slot:
leaf = path->nodes[0];
}
- nocow = 0;
- disk_bytenr = 0;
- num_bytes = 0;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ /* Didn't find anything for our INO */
if (found_key.objectid > ino)
break;
+ /*
+ * Keep searching until we find an EXTENT_ITEM or there are no
+ * more extents for this inode
+ */
if (WARN_ON_ONCE(found_key.objectid < ino) ||
found_key.type < BTRFS_EXTENT_DATA_KEY) {
path->slots[0]++;
goto next_slot;
}
+
+ /* Found key is not EXTENT_DATA_KEY or starts after req range */
if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end)
break;
+ /*
+ * If the found extent starts after requested offset, then
+ * adjust extent_end to be right before this extent begins
+ */
if (found_key.offset > cur_offset) {
extent_end = found_key.offset;
extent_type = 0;
goto out_check;
}
+ /*
+ * Found extent which begins before our range and potentially
+ * intersect it
+ */
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
@@ -1416,26 +1425,36 @@ next_slot:
btrfs_file_extent_num_bytes(leaf, fi);
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
+ /*
+ * If extent we got ends before our range starts, skip
+ * to next extent
+ */
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
}
+ /* Skip holes */
if (disk_bytenr == 0)
goto out_check;
+ /* Skip compressed/encrypted/encoded extents */
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
/*
- * Do the same check as in btrfs_cross_ref_exist but
- * without the unnecessary search.
+ * If extent is created before the last volume's snapshot
+ * this implies the extent is shared, hence we can't do
+ * nocow. This is the same check as in
+ * btrfs_cross_ref_exist but without calling
+ * btrfs_search_slot.
*/
- if (!nolock &&
+ if (!freespace_inode &&
btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item))
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
+ /* If extent is RO, we must COW it */
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
@@ -1452,17 +1471,17 @@ next_slot:
goto error;
}
- WARN_ON_ONCE(nolock);
+ WARN_ON_ONCE(freespace_inode);
goto out_check;
}
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
/*
- * if there are pending snapshots for this root,
- * we fall into common COW way.
+ * If there are pending snapshots for this root, we
+ * fall into common COW way
*/
- if (!nolock && atomic_read(&root->snapshot_force_cow))
+ if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
goto out_check;
/*
* force cow if csum exists in the range.
@@ -1481,27 +1500,29 @@ next_slot:
cur_offset = cow_start;
goto error;
}
- WARN_ON_ONCE(nolock);
+ WARN_ON_ONCE(freespace_inode);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
- nocow = 1;
+ nocow = true;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- extent_end = found_key.offset +
- btrfs_file_extent_ram_bytes(leaf, fi);
- extent_end = ALIGN(extent_end,
- fs_info->sectorsize);
+ extent_end = found_key.offset + ram_bytes;
+ extent_end = ALIGN(extent_end, fs_info->sectorsize);
+ /* Skip extents outside of our requested range */
+ if (extent_end <= start) {
+ path->slots[0]++;
+ goto next_slot;
+ }
} else {
+ /* If this triggers then we have a memory corruption */
BUG();
}
out_check:
- if (extent_end <= start) {
- path->slots[0]++;
- if (nocow)
- btrfs_dec_nocow_writers(fs_info, disk_bytenr);
- goto next_slot;
- }
+ /*
+ * If nocow is false then record the beginning of the range
+ * that needs to be COWed
+ */
if (!nocow) {
if (cow_start == (u64)-1)
cow_start = cur_offset;
@@ -1513,11 +1534,16 @@ out_check:
}
btrfs_release_path(path);
+
+ /*
+ * COW range from cow_start to found_key.offset - 1. As the key
+ * will contain the beginning of the first extent that can be
+ * NOCOW, following one which needs to be COW'ed
+ */
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
- end, page_started, nr_written, 1,
- NULL);
+ page_started, nr_written, 1);
if (ret) {
if (nocow)
btrfs_dec_nocow_writers(fs_info,
@@ -1529,6 +1555,7 @@ out_check:
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 orig_start = found_key.offset - extent_offset;
+ struct extent_map *em;
em = create_io_em(inode, cur_offset, num_bytes,
orig_start,
@@ -1545,19 +1572,29 @@ out_check:
goto error;
}
free_extent_map(em);
- }
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- type = BTRFS_ORDERED_PREALLOC;
+ ret = btrfs_add_ordered_extent(inode, cur_offset,
+ disk_bytenr, num_bytes,
+ num_bytes,
+ BTRFS_ORDERED_PREALLOC);
+ if (ret) {
+ btrfs_drop_extent_cache(BTRFS_I(inode),
+ cur_offset,
+ cur_offset + num_bytes - 1,
+ 0);
+ goto error;
+ }
} else {
- type = BTRFS_ORDERED_NOCOW;
+ ret = btrfs_add_ordered_extent(inode, cur_offset,
+ disk_bytenr, num_bytes,
+ num_bytes,
+ BTRFS_ORDERED_NOCOW);
+ if (ret)
+ goto error;
}
- ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
- num_bytes, num_bytes, type);
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
- BUG_ON(ret); /* -ENOMEM */
+ nocow = false;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID)
@@ -1570,7 +1607,7 @@ out_check:
num_bytes);
extent_clear_unlock_delalloc(inode, cur_offset,
- cur_offset + num_bytes - 1, end,
+ cur_offset + num_bytes - 1,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
@@ -1595,15 +1632,18 @@ out_check:
if (cow_start != (u64)-1) {
cur_offset = end;
- ret = cow_file_range(inode, locked_page, cow_start, end, end,
- page_started, nr_written, 1, NULL);
+ ret = cow_file_range(inode, locked_page, cow_start, end,
+ page_started, nr_written, 1);
if (ret)
goto error;
}
error:
+ if (nocow)
+ btrfs_dec_nocow_writers(fs_info, disk_bytenr);
+
if (ret && cur_offset < end)
- extent_clear_unlock_delalloc(inode, cur_offset, end, end,
+ extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@@ -1654,8 +1694,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
page_started, 0, nr_written);
} else if (!inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
- ret = cow_file_range(inode, locked_page, start, end, end,
- page_started, nr_written, 1, NULL);
+ ret = cow_file_range(inode, locked_page, start, end,
+ page_started, nr_written, 1);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
@@ -2090,7 +2130,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
unsigned int extra_bits,
- struct extent_state **cached_state, int dedupe)
+ struct extent_state **cached_state)
{
WARN_ON(PAGE_ALIGNED(end));
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
@@ -2156,7 +2196,7 @@ again:
}
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret) {
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
@@ -3850,7 +3890,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
{
struct btrfs_map_token token;
- btrfs_init_map_token(&token);
+ btrfs_init_map_token(&token, leaf);
btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
@@ -4946,12 +4986,11 @@ again:
}
clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state);
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
+ 0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
@@ -5332,9 +5371,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY |
- EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 1, &cached_state);
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
+ &cached_state);
cond_resched();
spin_lock(&io_tree->lock);
@@ -5347,59 +5386,50 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
- int failures = 0;
-
- for (;;) {
- struct btrfs_trans_handle *trans;
- int ret;
-
- ret = btrfs_block_rsv_refill(root, rsv,
- rsv->size + delayed_refs_extra,
- BTRFS_RESERVE_FLUSH_LIMIT);
-
- if (ret && ++failures > 2) {
- btrfs_warn(fs_info,
- "could not allocate space for a delete; will truncate on mount");
- return ERR_PTR(-ENOSPC);
- }
-
- /*
- * Evict can generate a large amount of delayed refs without
- * having a way to add space back since we exhaust our temporary
- * block rsv. We aren't allowed to do FLUSH_ALL in this case
- * because we could deadlock with so many things in the flushing
- * code, so we have to try and hold some extra space to
- * compensate for our delayed ref generation. If we can't get
- * that space then we need see if we can steal our minimum from
- * the global reserve. We will be ratelimited by the amount of
- * space we have for the delayed refs rsv, so we'll end up
- * committing and trying again.
- */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans) || !ret) {
- if (!IS_ERR(trans)) {
- trans->block_rsv = &fs_info->trans_block_rsv;
- trans->bytes_reserved = delayed_refs_extra;
- btrfs_block_rsv_migrate(rsv, trans->block_rsv,
- delayed_refs_extra, 1);
- }
- return trans;
- }
+ struct btrfs_trans_handle *trans;
+ u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
+ int ret;
+ /*
+ * Eviction should be taking place at some place safe because of our
+ * delayed iputs. However the normal flushing code will run delayed
+ * iputs, so we cannot use FLUSH_ALL otherwise we'll deadlock.
+ *
+ * We reserve the delayed_refs_extra here again because we can't use
+ * btrfs_start_transaction(root, 0) for the same deadlocky reason as
+ * above. We reserve our extra bit here because we generate a ton of
+ * delayed refs activity by truncating.
+ *
+ * If we cannot make our reservation we'll attempt to steal from the
+ * global reserve, because we really want to be able to free up space.
+ */
+ ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
+ BTRFS_RESERVE_FLUSH_EVICT);
+ if (ret) {
/*
* Try to steal from the global reserve if there is space for
* it.
*/
- if (!btrfs_check_space_for_delayed_refs(fs_info) &&
- !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
- return trans;
+ if (btrfs_check_space_for_delayed_refs(fs_info) ||
+ btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
+ btrfs_warn(fs_info,
+ "could not allocate space for delete; will truncate on mount");
+ return ERR_PTR(-ENOSPC);
+ }
+ delayed_refs_extra = 0;
+ }
- /* If not, commit and try again. */
- ret = btrfs_commit_transaction(trans);
- if (ret)
- return ERR_PTR(ret);
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return trans;
+
+ if (delayed_refs_extra) {
+ trans->block_rsv = &fs_info->trans_block_rsv;
+ trans->bytes_reserved = delayed_refs_extra;
+ btrfs_block_rsv_migrate(rsv, trans->block_rsv,
+ delayed_refs_extra, 1);
}
+ return trans;
}
void btrfs_evict_inode(struct inode *inode)
@@ -5446,7 +5476,7 @@ void btrfs_evict_inode(struct inode *inode)
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
if (!rsv)
goto no_delete;
- rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 1);
+ rsv->size = btrfs_calc_metadata_size(fs_info, 1);
rsv->failfast = 1;
btrfs_i_size_write(BTRFS_I(inode), 0);
@@ -7701,12 +7731,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
- int unlock_bits = EXTENT_LOCKED;
int ret = 0;
- if (create)
- unlock_bits |= EXTENT_DIRTY;
- else
+ if (!create)
len = min_t(u64, len, fs_info->sectorsize);
lockstart = start;
@@ -7765,9 +7792,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (ret < 0)
goto unlock_err;
- /* clear and unlock the entire range */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, 1, 0, &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state);
} else {
ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
start, len);
@@ -7783,9 +7809,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
*/
lockstart = start + bh_result->b_size;
if (lockstart < lockend) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, unlock_bits, 1, 0,
- &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, &cached_state);
} else {
free_extent_state(cached_state);
}
@@ -7796,8 +7821,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
return 0;
unlock_err:
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, 1, 0, &cached_state);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state);
err:
if (dio_data)
current->journal_info = dio_data;
@@ -8812,8 +8837,7 @@ again:
*/
if (!inode_evicting)
clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DELALLOC_NEW |
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state);
/*
@@ -8868,8 +8892,7 @@ again:
if (PageDirty(page))
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY |
+ clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
&cached_state);
@@ -8997,12 +9020,11 @@ again:
* reserve data&meta space before lock_page() (see above comments).
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state);
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 0, 0, &cached_state);
ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
- &cached_state, 0);
+ &cached_state);
if (ret2) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state);
@@ -9060,7 +9082,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
- u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
+ u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
@@ -9380,6 +9402,7 @@ void __cold btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_trans_handle_cachep);
kmem_cache_destroy(btrfs_path_cachep);
kmem_cache_destroy(btrfs_free_space_cachep);
+ kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
}
int __init btrfs_init_cachep(void)
@@ -9409,6 +9432,12 @@ int __init btrfs_init_cachep(void)
if (!btrfs_free_space_cachep)
goto fail;
+ btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
+ PAGE_SIZE, PAGE_SIZE,
+ SLAB_RED_ZONE, NULL);
+ if (!btrfs_free_space_bitmap_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();