aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r--fs/btrfs/super.c303
1 files changed, 239 insertions, 64 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a1c54a2c787c..5942b9384088 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -23,7 +23,6 @@
#include <linux/miscdevice.h>
#include <linux/magic.h>
#include <linux/slab.h>
-#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/crc32c.h>
#include <linux/btrfs.h>
@@ -49,6 +48,7 @@
#include "block-group.h"
#include "discard.h"
#include "qgroup.h"
+#include "raid56.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
@@ -67,6 +67,52 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
+#ifdef CONFIG_PRINTK
+
+#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+
+/*
+ * Characters to print to indicate error conditions or uncommon filesystem state.
+ * RO is not an error.
+ */
+static const char fs_state_chars[] = {
+ [BTRFS_FS_STATE_ERROR] = 'E',
+ [BTRFS_FS_STATE_REMOUNTING] = 'M',
+ [BTRFS_FS_STATE_RO] = 0,
+ [BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
+ [BTRFS_FS_STATE_DEV_REPLACING] = 'R',
+ [BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
+ [BTRFS_FS_STATE_NO_CSUMS] = 'C',
+ [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
+};
+
+static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
+{
+ unsigned int bit;
+ bool states_printed = false;
+ unsigned long fs_state = READ_ONCE(info->fs_state);
+ char *curr = buf;
+
+ memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
+ curr += sizeof(STATE_STRING_PREFACE) - 1;
+
+ for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
+ WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
+ if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
+ *curr++ = fs_state_chars[bit];
+ states_printed = true;
+ }
+ }
+
+ /* If no states were printed, reset the buffer */
+ if (!states_printed)
+ curr = buf;
+
+ *curr++ = 0;
+}
+#endif
+
/*
* Generally the error codes correspond to their respective errors, but there
* are a few special cases.
@@ -129,6 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{
struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK
+ char statestr[STATE_STRING_BUF_LEN];
const char *errstr;
#endif
@@ -141,6 +188,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
#ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno);
+ btrfs_state_to_string(fs_info, statestr);
if (fmt) {
struct va_format vaf;
va_list args;
@@ -149,12 +197,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt;
vaf.va = &args;
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
- sb->s_id, function, line, errno, errstr, &vaf);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
+ sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args);
} else {
- pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
- sb->s_id, function, line, errno, errstr);
+ pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
+ sb->s_id, statestr, function, line, errno, errstr);
}
#endif
@@ -214,7 +262,7 @@ static struct ratelimit_state printk_limits[] = {
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};
-void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
struct va_format vaf;
@@ -241,11 +289,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args;
if (__ratelimit(ratelimit)) {
- if (fs_info)
- printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
- fs_info->sb->s_id, &vaf);
- else
- printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+ if (fs_info) {
+ char statestr[STATE_STRING_BUF_LEN];
+
+ btrfs_state_to_string(fs_info, statestr);
+ _printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
+ fs_info->sb->s_id, statestr, &vaf);
+ } else {
+ _printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
+ }
}
va_end(args);
@@ -294,12 +346,14 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
const char *function,
- unsigned int line, int errno)
+ unsigned int line, int errno, bool first_hit)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
WRITE_ONCE(trans->aborted, errno);
WRITE_ONCE(trans->transaction->aborted, errno);
+ if (first_hit && errno == -ENOSPC)
+ btrfs_dump_space_info_for_trans_abort(fs_info);
/* Wake up anybody who may be waiting on this transaction */
wake_up(&fs_info->transaction_wait);
wake_up(&fs_info->transaction_blocked_wait);
@@ -574,6 +628,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
int saved_compress_level;
bool saved_compress_force;
int no_compress = 0;
+ const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
@@ -712,6 +767,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
compress_force = false;
no_compress++;
} else {
+ btrfs_err(info, "unrecognized compression value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -770,8 +827,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_thread_pool:
ret = match_int(&args[0], &intarg);
if (ret) {
+ btrfs_err(info, "unrecognized thread_pool value %s",
+ args[0].from);
goto out;
} else if (intarg == 0) {
+ btrfs_err(info, "invalid value 0 for thread_pool");
ret = -EINVAL;
goto out;
}
@@ -832,8 +892,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_ratio:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized metadata_ratio value %s",
+ args[0].from);
goto out;
+ }
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %u",
info->metadata_ratio);
@@ -850,6 +913,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_and_info(info, DISCARD_ASYNC,
"turning on async discard");
} else {
+ btrfs_err(info, "unrecognized discard mode value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -862,6 +927,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_space_cache:
case Opt_space_cache_version:
+ /*
+ * We already set FREE_SPACE_TREE above because we have
+ * compat_ro(FREE_SPACE_TREE) set, and we aren't going
+ * to allow v1 to be set for extent tree v2, simply
+ * ignore this setting if we're extent tree v2.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
if (token == Opt_space_cache ||
strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(info->mount_opt,
@@ -874,6 +947,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_and_info(info, FREE_SPACE_TREE,
"enabling free space tree");
} else {
+ btrfs_err(info, "unrecognized space_cache value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -882,6 +957,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break;
case Opt_no_space_cache:
+ /*
+ * We cannot operate without the free space tree with
+ * extent tree v2, ignore this option.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
if (btrfs_test_opt(info, SPACE_CACHE)) {
btrfs_clear_and_info(info, SPACE_CACHE,
"disabling disk space caching");
@@ -897,6 +978,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
"the 'inode_cache' option is deprecated and has no effect since 5.11");
break;
case Opt_clear_cache:
+ /*
+ * We cannot clear the free space tree with extent tree
+ * v2, ignore this option.
+ */
+ if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
+ break;
btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache");
break;
@@ -943,8 +1030,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_check_integrity_print_mask:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info,
+ "unrecognized check_integrity_print_mask value %s",
+ args[0].from);
goto out;
+ }
info->check_integrity_print_mask = intarg;
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
@@ -959,13 +1050,15 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
goto out;
#endif
case Opt_fatal_errors:
- if (strcmp(args[0].from, "panic") == 0)
+ if (strcmp(args[0].from, "panic") == 0) {
btrfs_set_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else if (strcmp(args[0].from, "bug") == 0)
+ } else if (strcmp(args[0].from, "bug") == 0) {
btrfs_clear_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else {
+ } else {
+ btrfs_err(info, "unrecognized fatal_errors value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
@@ -973,8 +1066,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_commit_interval:
intarg = 0;
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized commit_interval value %s",
+ args[0].from);
+ ret = -EINVAL;
goto out;
+ }
if (intarg == 0) {
btrfs_info(info,
"using default commit interval %us",
@@ -988,8 +1085,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
case Opt_rescue:
ret = parse_rescue_options(info, args[0].from);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_err(info, "unrecognized rescue value %s",
+ args[0].from);
goto out;
+ }
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
@@ -1040,10 +1140,12 @@ out:
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
- if (!ret && btrfs_test_opt(info, SPACE_CACHE))
- btrfs_info(info, "disk space caching is enabled");
- if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
- btrfs_info(info, "using free space tree");
+ if (!ret && !remounting) {
+ if (btrfs_test_opt(info, SPACE_CACHE))
+ btrfs_info(info, "disk space caching is enabled");
+ if (btrfs_test_opt(info, FREE_SPACE_TREE))
+ btrfs_info(info, "using free space tree");
+ }
return ret;
}
@@ -1374,7 +1476,6 @@ static int btrfs_fill_super(struct super_block *sb,
goto fail_close;
}
- cleancache_init_fs(sb);
sb->s_flags |= SB_ACTIVE;
return 0;
@@ -1720,6 +1821,8 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
error = -EBUSY;
} else {
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+ shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
+ s->s_id);
btrfs_sb(s)->bdev_holder = fs_type;
if (!strstr(crc32c_impl(), "generic"))
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
@@ -1833,18 +1936,12 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
old_pool_size, new_pool_size);
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
+ btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->endio_meta_write_workers,
- new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers,
- new_pool_size);
}
static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
@@ -1917,6 +2014,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
+ ret = btrfs_check_features(fs_info, sb);
+ if (ret < 0)
+ goto restore;
+
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
@@ -2145,12 +2246,8 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
if (type & BTRFS_BLOCK_GROUP_RAID0)
num_stripes = nr_devices;
- else if (type & BTRFS_BLOCK_GROUP_RAID1)
- num_stripes = 2;
- else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
- num_stripes = 3;
- else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
- num_stripes = 4;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK)
+ num_stripes = rattr->ncopies;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = 4;
@@ -2174,17 +2271,13 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
/*
- * In order to avoid overwriting the superblock on the drive,
- * btrfs starts at an offset of at least 1MB when doing chunk
- * allocation.
- *
- * This ensures we have at least min_stripe_size free space
- * after excluding 1MB.
+ * Ensure we have at least min_stripe_size on top of the
+ * reserved space on the device.
*/
- if (avail_space <= SZ_1M + min_stripe_size)
+ if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size)
continue;
- avail_space -= SZ_1M;
+ avail_space -= BTRFS_DEVICE_RANGE_RESERVED;
devices_info[i].dev = device;
devices_info[i].max_avail = avail_space;
@@ -2386,6 +2479,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
{
struct btrfs_ioctl_vol_args *vol;
struct btrfs_device *device = NULL;
+ dev_t devt = 0;
int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
@@ -2405,7 +2499,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_FORGET_DEV:
- ret = btrfs_forget_devices(vol->name);
+ if (vol->name[0] != 0) {
+ ret = lookup_bdev(vol->name, &devt);
+ if (ret)
+ break;
+ }
+ ret = btrfs_forget_devices(devt);
break;
case BTRFS_IOC_DEVICES_READY:
mutex_lock(&uuid_mutex);
@@ -2452,11 +2551,87 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans);
}
+static int check_dev_super(struct btrfs_device *dev)
+{
+ struct btrfs_fs_info *fs_info = dev->fs_info;
+ struct btrfs_super_block *sb;
+ u16 csum_type;
+ int ret = 0;
+
+ /* This should be called with fs still frozen. */
+ ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
+
+ /* Missing dev, no need to check. */
+ if (!dev->bdev)
+ return 0;
+
+ /* Only need to check the primary super block. */
+ sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
+
+ /* Verify the checksum. */
+ csum_type = btrfs_super_csum_type(sb);
+ if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
+ btrfs_err(fs_info, "csum type changed, has %u expect %u",
+ csum_type, btrfs_super_csum_type(fs_info->super_copy));
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ if (btrfs_check_super_csum(fs_info, sb)) {
+ btrfs_err(fs_info, "csum for on-disk super block no longer matches");
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ /* Btrfs_validate_super() includes fsid check against super->fsid. */
+ ret = btrfs_validate_super(fs_info, sb, 0);
+ if (ret < 0)
+ goto out;
+
+ if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
+ btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
+ btrfs_super_generation(sb),
+ fs_info->last_trans_committed);
+ ret = -EUCLEAN;
+ goto out;
+ }
+out:
+ btrfs_release_disk_super(sb);
+ return ret;
+}
+
static int btrfs_unfreeze(struct super_block *sb)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
+ struct btrfs_device *device;
+ int ret = 0;
+ /*
+ * Make sure the fs is not changed by accident (like hibernation then
+ * modified by other OS).
+ * If we found anything wrong, we mark the fs error immediately.
+ *
+ * And since the fs is frozen, no one can modify the fs yet, thus
+ * we don't need to hold device_list_mutex.
+ */
+ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+ ret = check_dev_super(device);
+ if (ret < 0) {
+ btrfs_handle_fs_error(fs_info, ret,
+ "super block on devid %llu got modified unexpectedly",
+ device->devid);
+ break;
+ }
+ }
clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
+
+ /*
+ * We still return 0, to allow VFS layer to unfreeze the fs even the
+ * above checks failed. Since the fs is either fine or read-only, we're
+ * safe to continue, without causing further damage.
+ */
return 0;
}
@@ -2564,17 +2739,21 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_compress;
- err = extent_io_init();
+ err = extent_state_init_cachep();
if (err)
goto free_cachep;
- err = extent_state_cache_init();
+ err = extent_buffer_init_cachep();
+ if (err)
+ goto free_extent_cachep;
+
+ err = btrfs_bioset_init();
if (err)
- goto free_extent_io;
+ goto free_eb_cachep;
err = extent_map_init();
if (err)
- goto free_extent_state_cache;
+ goto free_bioset;
err = ordered_data_init();
if (err)
@@ -2596,13 +2775,9 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_delayed_ref;
- err = btrfs_end_io_wq_init();
- if (err)
- goto free_prelim_ref;
-
err = btrfs_interface_init();
if (err)
- goto free_end_io_wq;
+ goto free_prelim_ref;
btrfs_print_mod_info();
@@ -2618,8 +2793,6 @@ static int __init init_btrfs_fs(void)
unregister_ioctl:
btrfs_interface_exit();
-free_end_io_wq:
- btrfs_end_io_wq_exit();
free_prelim_ref:
btrfs_prelim_ref_exit();
free_delayed_ref:
@@ -2632,10 +2805,12 @@ free_ordered_data:
ordered_data_exit();
free_extent_map:
extent_map_exit();
-free_extent_state_cache:
- extent_state_cache_exit();
-free_extent_io:
- extent_io_exit();
+free_bioset:
+ btrfs_bioset_exit();
+free_eb_cachep:
+ extent_buffer_free_cachep();
+free_extent_cachep:
+ extent_state_free_cachep();
free_cachep:
btrfs_destroy_cachep();
free_compress:
@@ -2654,10 +2829,10 @@ static void __exit exit_btrfs_fs(void)
btrfs_prelim_ref_exit();
ordered_data_exit();
extent_map_exit();
- extent_state_cache_exit();
- extent_io_exit();
+ btrfs_bioset_exit();
+ extent_state_free_cachep();
+ extent_buffer_free_cachep();
btrfs_interface_exit();
- btrfs_end_io_wq_exit();
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();