aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c300
1 files changed, 189 insertions, 111 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 17ed76d18eb6..bd0f45fb38c4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
@@ -125,6 +125,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static void btrfs_close_one_device(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -137,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
struct btrfs_fs_devices *fs_devs;
- fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+ fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
if (!fs_devs)
return ERR_PTR(-ENOMEM);
@@ -219,7 +220,7 @@ static struct btrfs_device *__alloc_device(void)
{
struct btrfs_device *dev;
- dev = kzalloc(sizeof(*dev), GFP_NOFS);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -232,8 +233,9 @@ static struct btrfs_device *__alloc_device(void)
spin_lock_init(&dev->reada_lock);
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
- INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
- INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+ btrfs_device_data_ordered_init(dev);
+ INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
return dev;
}
@@ -731,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* uuid mutex so nothing we touch in here is going to disappear.
*/
if (orig_dev->name) {
- name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ name = rcu_string_strdup(orig_dev->name->str,
+ GFP_KERNEL);
if (!name) {
kfree(device);
goto error;
@@ -1022,16 +1025,16 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
}
/* make sure our super fits in the device */
- if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+ if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
goto error_bdev_put;
/* make sure our super fits in the page */
- if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+ if (sizeof(*disk_super) > PAGE_SIZE)
goto error_bdev_put;
/* make sure our super doesn't straddle pages on disk */
- index = bytenr >> PAGE_CACHE_SHIFT;
- if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+ index = bytenr >> PAGE_SHIFT;
+ if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
goto error_bdev_put;
/* pull in the page with our super */
@@ -1044,7 +1047,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
p = kmap(page);
/* align our pointer to the offset of the super block */
- disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+ disk_super = p + (bytenr & ~PAGE_MASK);
if (btrfs_super_bytenr(disk_super) != bytenr ||
btrfs_super_magic(disk_super) != BTRFS_MAGIC)
@@ -1072,7 +1075,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
error_unmap:
kunmap(page);
- page_cache_release(page);
+ put_page(page);
error_bdev_put:
blkdev_put(bdev, flags);
@@ -1102,7 +1105,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
key.objectid = device->devid;
key.offset = start;
@@ -1182,7 +1185,7 @@ again:
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
@@ -1257,6 +1260,15 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
int ret;
int slot;
struct extent_buffer *l;
+ u64 min_search_start;
+
+ /*
+ * We don't want to overwrite the superblock on the drive nor any area
+ * used by the boot loader (grub for example), so we make sure to start
+ * at an offset of at least 1MB.
+ */
+ min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
+ search_start = max(search_start, min_search_start);
path = btrfs_alloc_path();
if (!path)
@@ -1271,7 +1283,7 @@ again:
goto out;
}
- path->reada = 2;
+ path->reada = READA_FORWARD;
path->search_commit_root = 1;
path->skip_locking = 1;
@@ -1397,18 +1409,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
{
- struct btrfs_root *root = device->dev_root;
- u64 search_start;
-
/* FIXME use last free of some kind */
-
- /*
- * we don't want to overwrite the superblock on the drive,
- * so we make sure to start at an offset of at least 1MB
- */
- search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
return find_free_dev_extent_start(trans->transaction, device,
- num_bytes, search_start, start, len);
+ num_bytes, 0, start, len);
}
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
@@ -1642,7 +1645,6 @@ static void update_dev_time(char *path_name)
return;
file_update_time(filp);
filp_close(filp, NULL);
- return;
}
static int btrfs_rm_dev_item(struct btrfs_root *root,
@@ -1713,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -1973,8 +1975,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
if (srcdev->writeable) {
fs_devices->rw_devices--;
/* zero out the old super if it is writable */
- btrfs_scratch_superblocks(srcdev->bdev,
- rcu_str_deref(srcdev->name));
+ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
}
if (srcdev->bdev)
@@ -2024,8 +2025,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
if (tgtdev->bdev) {
- btrfs_scratch_superblocks(tgtdev->bdev,
- rcu_str_deref(tgtdev->name));
+ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
fs_info->fs_devices->open_devices--;
}
fs_info->fs_devices->num_devices--;
@@ -2288,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
- name = rcu_string_strdup(device_path, GFP_NOFS);
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
kfree(device);
ret = -ENOMEM;
@@ -2749,7 +2749,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
em->start + em->len < chunk_offset) {
/*
* This is a logic error, but we don't want to just rely on the
- * user having built with ASSERT enabled, so if ASSERT doens't
+ * user having built with ASSERT enabled, so if ASSERT doesn't
* do anything we still error out.
*/
ASSERT(0);
@@ -2757,7 +2757,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
@@ -2853,7 +2853,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
if (ret)
return ret;
- trans = btrfs_start_transaction(root, 0);
+ trans = btrfs_start_trans_remove_block_group(root->fs_info,
+ chunk_offset);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_std_error(root->fs_info, ret, NULL);
@@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root,
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root)
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
@@ -3123,7 +3124,7 @@ static int chunk_profiles_filter(u64 chunk_type,
return 1;
}
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
@@ -3156,7 +3157,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
return ret;
}
-static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
u64 chunk_offset, struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
@@ -3400,13 +3401,14 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u32 count_data = 0;
u32 count_meta = 0;
u32 count_sys = 0;
+ int chunk_reserved = 0;
/* step one make some room on all the devices */
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
old_size = btrfs_device_get_total_bytes(device);
size_to_free = div_factor(old_size, 1);
- size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
+ size_to_free = min_t(u64, size_to_free, SZ_1M);
if (!device->writeable ||
btrfs_device_get_total_bytes(device) -
btrfs_device_get_bytes_used(device) > size_to_free ||
@@ -3501,6 +3503,7 @@ again:
ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset);
+
btrfs_release_path(path);
if (!ret) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3537,6 +3540,24 @@ again:
goto loop;
}
+ if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
+ trans = btrfs_start_transaction(chunk_root, 0);
+ if (IS_ERR(trans)) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ ret = PTR_ERR(trans);
+ goto error;
+ }
+
+ ret = btrfs_force_chunk_alloc(trans, chunk_root,
+ BTRFS_BLOCK_GROUP_DATA);
+ btrfs_end_transaction(trans, chunk_root);
+ if (ret < 0) {
+ mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+ goto error;
+ }
+ chunk_reserved = 1;
+ }
+
ret = btrfs_relocate_chunk(chunk_root,
found_key.offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3666,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -3704,14 +3725,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
goto out;
}
- /* allow dup'ed data chunks only in mixed mode */
- if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
- btrfs_err(fs_info, "dup for data is not allowed");
- ret = -EINVAL;
- goto out;
- }
-
/* allow to reduce meta or sys integrity only if force set */
allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10 |
@@ -3737,6 +3750,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
} while (read_seqretry(&fs_info->profiles_lock, seq));
+ if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
+ btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+ btrfs_warn(fs_info,
+ "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
+ bctl->meta.target, bctl->data.target);
+ }
+
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
fs_info->num_tolerated_disk_barrier_failures = min(
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
@@ -3848,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
return -ENOMEM;
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
@@ -4099,7 +4119,7 @@ out:
* Callback for btrfs_uuid_tree_iterate().
* returns:
* 0 check succeeded, the entry is not outdated.
- * < 0 if an error occured.
+ * < 0 if an error occurred.
* > 0 if the check failed, which means the caller shall remove the entry.
*/
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
@@ -4249,7 +4269,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
if (!path)
return -ENOMEM;
- path->reada = 2;
+ path->reada = READA_FORWARD;
lock_chunks(root);
@@ -4441,7 +4461,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
{
/* TODO allow them to set a preferred stripe size */
- return 64 * 1024;
+ return SZ_64K;
}
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
@@ -4509,21 +4529,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ncopies = btrfs_raid_array[index].ncopies;
if (type & BTRFS_BLOCK_GROUP_DATA) {
- max_stripe_size = 1024 * 1024 * 1024;
+ max_stripe_size = SZ_1G;
max_chunk_size = 10 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
/* for larger filesystems, use larger metadata chunks */
- if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
- max_stripe_size = 1024 * 1024 * 1024;
+ if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+ max_stripe_size = SZ_1G;
else
- max_stripe_size = 256 * 1024 * 1024;
+ max_stripe_size = SZ_256M;
max_chunk_size = max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
- max_stripe_size = 32 * 1024 * 1024;
+ max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
@@ -4700,7 +4720,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
@@ -4774,7 +4794,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
u64 dev_offset;
u64 stripe_size;
int i = 0;
- int ret;
+ int ret = 0;
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
read_lock(&em_tree->lock);
@@ -4795,7 +4815,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
@@ -4805,20 +4825,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with the map's stripes, because the device object's id can change
+ * at any time during that final phase of the device replace operation
+ * (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
dev_offset = map->stripes[i].physical;
ret = btrfs_update_device(trans, device);
if (ret)
- goto out;
+ break;
ret = btrfs_alloc_dev_extent(trans, device,
chunk_root->root_key.objectid,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_offset, dev_offset,
stripe_size);
if (ret)
- goto out;
+ break;
+ }
+ if (ret) {
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
+ goto out;
}
stripe = &chunk->stripe;
@@ -4831,6 +4863,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
stripe++;
}
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
btrfs_set_stack_chunk_length(chunk, chunk_size);
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
@@ -4937,7 +4970,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
@@ -5017,7 +5050,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
@@ -5030,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++;
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret;
}
@@ -5053,7 +5086,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
@@ -5074,7 +5107,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
@@ -5233,7 +5266,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
@@ -5293,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret)
goto out;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ else
+ btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5347,35 +5382,33 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* target drive.
*/
for (i = 0; i < tmp_num_stripes; i++) {
- if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it
- * simple, only add the mirror with the
- * lowest physical address
- */
- if (found &&
- physical_of_found <=
- tmp_bbio->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found =
- tmp_bbio->stripes[i].physical;
- }
+ if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+ * In case of DUP, in order to keep it simple, only add
+ * the mirror with the lowest physical address
+ */
+ if (found &&
+ physical_of_found <= tmp_bbio->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = tmp_bbio->stripes[i].physical;
}
- if (found) {
- mirror_num = index_srcdev + 1;
- patch_the_first_stripe_for_dev_replace = 1;
- physical_to_patch_in_first_stripe = physical_of_found;
- } else {
+ btrfs_put_bbio(tmp_bbio);
+
+ if (!found) {
WARN_ON(1);
ret = -EIO;
- btrfs_put_bbio(tmp_bbio);
goto out;
}
- btrfs_put_bbio(tmp_bbio);
+ mirror_num = index_srcdev + 1;
+ patch_the_first_stripe_for_dev_replace = 1;
+ physical_to_patch_in_first_stripe = physical_of_found;
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
@@ -5721,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1;
}
out:
- if (dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ if (dev_replace_is_ongoing) {
+ btrfs_dev_replace_clear_lock_blocking(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ }
free_extent_map(em);
return ret;
}
@@ -5775,7 +5810,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
@@ -6038,7 +6073,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
- if (bbio->raid_map) {
+ if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+ ((rw & WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
@@ -6179,6 +6215,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
@@ -6187,6 +6224,37 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk)) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6203,7 +6271,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
@@ -6211,7 +6278,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
@@ -6446,11 +6513,11 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb)
return -ENOMEM;
- btrfs_set_buffer_uptodate(sb);
+ set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
* The sb extent buffer is artifical and just used to read the system array.
- * btrfs_set_buffer_uptodate() call does not properly mark all it's
+ * set_extent_buffer_uptodate() call does not properly mark all it's
* pages up-to-date when the page is larger: extent does not cover the
* whole page and consequently check_page_uptodate does not find all
* the page's extents up-to-date (the hole beyond sb),
@@ -6460,7 +6527,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
* but sb spans only this function. Add an explicit SetPageUptodate call
* to silence the warning eg. on PowerPC 64.
*/
- if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+ if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
SetPageUptodate(sb->pages[0]);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
@@ -6493,6 +6560,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
goto out_short_read;
num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+ if (!num_stripes) {
+ printk(KERN_ERR
+ "BTRFS: invalid number of stripes %u in sys_array at offset %u\n",
+ num_stripes, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6501,6 +6576,9 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (ret)
break;
} else {
+ printk(KERN_ERR
+ "BTRFS: unexpected item type %u in sys_array at offset %u\n",
+ (u32)key.type, cur_offset);
ret = -EIO;
break;
}
@@ -6632,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
int item_size;
struct btrfs_dev_stats_item *ptr;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
@@ -6680,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
int ret;
int i;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
path = btrfs_alloc_path();
@@ -6902,7 +6980,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
@@ -6930,7 +7008,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
}
}
-void btrfs_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
struct btrfs_device *new_device;