aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 16:44:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 16:44:40 -0800
commit32ee34eddad13cd44ad0cb3e659fe6fd49143b62 (patch)
tree4c392ff23c0be6d00fbf67e7d8b13ce0d6d14999 /fs/btrfs/ctree.h
parentMerge tag 'gfs2-4.21.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2 (diff)
parentbtrfs: Fix typos in comments and strings (diff)
downloadlinux-dev-32ee34eddad13cd44ad0cb3e659fe6fd49143b62.tar.xz
linux-dev-32ee34eddad13cd44ad0cb3e659fe6fd49143b62.zip
Merge tag 'for-4.21-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "New features: - swapfile support - after a long time it's here, with some limitations where COW design does not work well with the swap implementation (nodatacow file, no compression, cannot be snapshotted, not possible on multiple devices, ...), as this is the most restricted but working setup, we'll try to improve that in the future - metadata uuid - an optional incompat feature to assign a new filesystem UUID without overwriting all metadata blocks, stored only in superblock - more balance messages are printed to system log, initial is in the format of the command line that would be used to start it Fixes: - tag pages of a snapshot to better separate pages that are involved in the snapshot (and need to get synced) from newly dirtied pages that could slow down or even livelock the snapshot operation - improved check of filesystem id associated with a device during scan to detect duplicate devices that could be mixed up during mount - fix device replace state transitions, eg. when it ends up interrupted and reboot tries to restart balance too, or when start/cancel ioctls race - fix a crash due to a race when quotas are enabled during snapshot creation - GFP_NOFS/memalloc_nofs_* fixes due to GFP_KERNEL allocations in transaction context - fix fsync of files with multiple hard links in new directories - fix race of send with transaction commits that create snapshots Core changes: - cleanups: * further removals of now-dead fsync code * core function for finding free extent has been split and provides a base for further cleanups to make the logic more understandable * removed lot of indirect callbacks for data and metadata inodes * simplified refcounting and locking for cloned extent buffers * removed redundant function arguments * defines converted to enums where appropriate - separate reserve for delayed refs from global reserve, update logic to do less trickery and ad-hoc heuristics, move out some related expensive operations from transaction commit or file truncate - dev-replace switched from custom locking scheme to semaphore - remove first phase of balance that tried to make some space for the relocation by calling shrink and grow, this did not work as expected and only introduced more error states due to potential resize failures, slightly improves the runtime as the chunks on all devices are not needlessly enumerated - clone and deduplication now use generic helper that adds a few more checks that were missing from the original btrfs implementation of the ioctls" * tag 'for-4.21-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (125 commits) btrfs: Fix typos in comments and strings btrfs: improve error handling of btrfs_add_link Btrfs: use generic_remap_file_range_prep() for cloning and deduplication btrfs: Refactor main loop in extent_readpages btrfs: Remove 1st shrink/grow phase from balance Btrfs: send, fix race with transaction commits that create snapshots Btrfs: use nofs context when initializing security xattrs to avoid deadlock btrfs: run delayed items before dropping the snapshot btrfs: catch cow on deleting snapshots btrfs: extent-tree: cleanup one-shot usage of @blocksize in do_walk_down Btrfs: scrub, move setup of nofs contexts higher in the stack btrfs: scrub: move scrub_setup_ctx allocation out of device_list_mutex btrfs: scrub: pass fs_info to scrub_setup_ctx btrfs: fix truncate throttling btrfs: don't run delayed refs in the end transaction logic btrfs: rework btrfs_check_space_for_delayed_refs btrfs: add new flushing states for the delayed refs rsv btrfs: update may_commit_transaction to use the delayed refs rsv btrfs: introduce delayed_refs_rsv btrfs: only track ref_heads in delayed_ref_updates ...
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h263
1 files changed, 174 insertions, 89 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68f322f600a0..f031a447a047 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -109,13 +109,26 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
}
/*
- * File system states
+ * Runtime (in-memory) states of filesystem
*/
-#define BTRFS_FS_STATE_ERROR 0
-#define BTRFS_FS_STATE_REMOUNTING 1
-#define BTRFS_FS_STATE_TRANS_ABORTED 2
-#define BTRFS_FS_STATE_DEV_REPLACING 3
-#define BTRFS_FS_STATE_DUMMY_FS_INFO 4
+enum {
+ /* Global indicator of serious filesystem errors */
+ BTRFS_FS_STATE_ERROR,
+ /*
+ * Filesystem is being remounted, allow to skip some operations, like
+ * defrag
+ */
+ BTRFS_FS_STATE_REMOUNTING,
+ /* Track if a transaction abort has been reported on this filesystem */
+ BTRFS_FS_STATE_TRANS_ABORTED,
+ /*
+ * Bio operations should be blocked on this filesystem because a source
+ * or target device is being destroyed as part of a device replace
+ */
+ BTRFS_FS_STATE_DEV_REPLACING,
+ /* The btrfs_fs_info created for self-tests */
+ BTRFS_FS_STATE_DUMMY_FS_INFO,
+};
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
@@ -195,9 +208,10 @@ struct btrfs_root_backup {
* it currently lacks any block count etc etc
*/
struct btrfs_super_block {
- u8 csum[BTRFS_CSUM_SIZE];
/* the first 4 fields must match struct btrfs_header */
- u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ u8 csum[BTRFS_CSUM_SIZE];
+ /* FS specific UUID, visible to user */
+ u8 fsid[BTRFS_FSID_SIZE];
__le64 bytenr; /* this block number */
__le64 flags;
@@ -234,8 +248,11 @@ struct btrfs_super_block {
__le64 cache_generation;
__le64 uuid_tree_generation;
+ /* the UUID written into btree blocks */
+ u8 metadata_uuid[BTRFS_FSID_SIZE];
+
/* future expansion */
- __le64 reserved[30];
+ __le64 reserved[28];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
} __attribute__ ((__packed__));
@@ -265,7 +282,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
- BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@@ -316,7 +334,7 @@ struct btrfs_node {
* The slots array records the index of the item or block pointer
* used while walking the tree.
*/
-enum { READA_NONE = 0, READA_BACK, READA_FORWARD };
+enum { READA_NONE, READA_BACK, READA_FORWARD };
struct btrfs_path {
struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
@@ -360,9 +378,7 @@ struct btrfs_dev_replace {
struct btrfs_device *tgtdev;
struct mutex lock_finishing_cancel_unmount;
- rwlock_t lock;
- atomic_t blocking_readers;
- wait_queue_head_t read_lock_wq;
+ struct rw_semaphore rwsem;
struct btrfs_scrub_progress scrub_progress;
@@ -443,13 +459,19 @@ struct btrfs_space_info {
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
};
-#define BTRFS_BLOCK_RSV_GLOBAL 1
-#define BTRFS_BLOCK_RSV_DELALLOC 2
-#define BTRFS_BLOCK_RSV_TRANS 3
-#define BTRFS_BLOCK_RSV_CHUNK 4
-#define BTRFS_BLOCK_RSV_DELOPS 5
-#define BTRFS_BLOCK_RSV_EMPTY 6
-#define BTRFS_BLOCK_RSV_TEMP 7
+/*
+ * Types of block reserves
+ */
+enum {
+ BTRFS_BLOCK_RSV_GLOBAL,
+ BTRFS_BLOCK_RSV_DELALLOC,
+ BTRFS_BLOCK_RSV_TRANS,
+ BTRFS_BLOCK_RSV_CHUNK,
+ BTRFS_BLOCK_RSV_DELOPS,
+ BTRFS_BLOCK_RSV_DELREFS,
+ BTRFS_BLOCK_RSV_EMPTY,
+ BTRFS_BLOCK_RSV_TEMP,
+};
struct btrfs_block_rsv {
u64 size;
@@ -509,18 +531,18 @@ struct btrfs_free_cluster {
};
enum btrfs_caching_type {
- BTRFS_CACHE_NO = 0,
- BTRFS_CACHE_STARTED = 1,
- BTRFS_CACHE_FAST = 2,
- BTRFS_CACHE_FINISHED = 3,
- BTRFS_CACHE_ERROR = 4,
+ BTRFS_CACHE_NO,
+ BTRFS_CACHE_STARTED,
+ BTRFS_CACHE_FAST,
+ BTRFS_CACHE_FINISHED,
+ BTRFS_CACHE_ERROR,
};
enum btrfs_disk_cache_state {
- BTRFS_DC_WRITTEN = 0,
- BTRFS_DC_ERROR = 1,
- BTRFS_DC_CLEAR = 2,
- BTRFS_DC_SETUP = 3,
+ BTRFS_DC_WRITTEN,
+ BTRFS_DC_ERROR,
+ BTRFS_DC_CLEAR,
+ BTRFS_DC_SETUP,
};
struct btrfs_caching_control {
@@ -712,41 +734,61 @@ struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
-#define BTRFS_FS_BARRIER 1
-#define BTRFS_FS_CLOSING_START 2
-#define BTRFS_FS_CLOSING_DONE 3
-#define BTRFS_FS_LOG_RECOVERING 4
-#define BTRFS_FS_OPEN 5
-#define BTRFS_FS_QUOTA_ENABLED 6
-#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9
-#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10
-#define BTRFS_FS_BTREE_ERR 11
-#define BTRFS_FS_LOG1_ERR 12
-#define BTRFS_FS_LOG2_ERR 13
-#define BTRFS_FS_QUOTA_OVERRIDE 14
-/* Used to record internally whether fs has been frozen */
-#define BTRFS_FS_FROZEN 15
-
-/*
- * Indicate that a whole-filesystem exclusive operation is running
- * (device replace, resize, device add/delete, balance)
- */
-#define BTRFS_FS_EXCL_OP 16
-
/*
- * To info transaction_kthread we need an immediate commit so it doesn't
- * need to wait for commit_interval
+ * Block group or device which contains an active swapfile. Used for preventing
+ * unsafe operations while a swapfile is active.
+ *
+ * These are sorted on (ptr, inode) (note that a block group or device can
+ * contain more than one swapfile). We compare the pointer values because we
+ * don't actually care what the object is, we just need a quick check whether
+ * the object exists in the rbtree.
*/
-#define BTRFS_FS_NEED_ASYNC_COMMIT 17
+struct btrfs_swapfile_pin {
+ struct rb_node node;
+ void *ptr;
+ struct inode *inode;
+ /*
+ * If true, ptr points to a struct btrfs_block_group_cache. Otherwise,
+ * ptr points to a struct btrfs_device.
+ */
+ bool is_block_group;
+};
-/*
- * Indicate that balance has been set up from the ioctl and is in the main
- * phase. The fs_info::balance_ctl is initialized.
- */
-#define BTRFS_FS_BALANCE_RUNNING 18
+bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+
+enum {
+ BTRFS_FS_BARRIER,
+ BTRFS_FS_CLOSING_START,
+ BTRFS_FS_CLOSING_DONE,
+ BTRFS_FS_LOG_RECOVERING,
+ BTRFS_FS_OPEN,
+ BTRFS_FS_QUOTA_ENABLED,
+ BTRFS_FS_UPDATE_UUID_TREE_GEN,
+ BTRFS_FS_CREATING_FREE_SPACE_TREE,
+ BTRFS_FS_BTREE_ERR,
+ BTRFS_FS_LOG1_ERR,
+ BTRFS_FS_LOG2_ERR,
+ BTRFS_FS_QUOTA_OVERRIDE,
+ /* Used to record internally whether fs has been frozen */
+ BTRFS_FS_FROZEN,
+ /*
+ * Indicate that a whole-filesystem exclusive operation is running
+ * (device replace, resize, device add/delete, balance)
+ */
+ BTRFS_FS_EXCL_OP,
+ /*
+ * To info transaction_kthread we need an immediate commit so it
+ * doesn't need to wait for commit_interval
+ */
+ BTRFS_FS_NEED_ASYNC_COMMIT,
+ /*
+ * Indicate that balance has been set up from the ioctl and is in the
+ * main phase. The fs_info::balance_ctl is initialized.
+ */
+ BTRFS_FS_BALANCE_RUNNING,
+};
struct btrfs_fs_info {
- u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
unsigned long flags;
struct btrfs_root *extent_root;
@@ -790,6 +832,8 @@ struct btrfs_fs_info {
struct btrfs_block_rsv chunk_block_rsv;
/* block reservation for delayed operations */
struct btrfs_block_rsv delayed_block_rsv;
+ /* block reservation for delayed refs */
+ struct btrfs_block_rsv delayed_refs_rsv;
struct btrfs_block_rsv empty_block_rsv;
@@ -1114,6 +1158,10 @@ struct btrfs_fs_info {
u32 sectorsize;
u32 stripesize;
+ /* Block groups and devices containing active swapfiles. */
+ spinlock_t swapfile_pins_lock;
+ struct rb_root swapfile_pins;
+
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@@ -1133,22 +1181,24 @@ struct btrfs_subvolume_writers {
/*
* The state of btrfs root
*/
-/*
- * btrfs_record_root_in_trans is a multi-step process,
- * and it can race with the balancing code. But the
- * race is very small, and only the first time the root
- * is added to each transaction. So IN_TRANS_SETUP
- * is used to tell us when more checks are required
- */
-#define BTRFS_ROOT_IN_TRANS_SETUP 0
-#define BTRFS_ROOT_REF_COWS 1
-#define BTRFS_ROOT_TRACK_DIRTY 2
-#define BTRFS_ROOT_IN_RADIX 3
-#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 4
-#define BTRFS_ROOT_DEFRAG_RUNNING 5
-#define BTRFS_ROOT_FORCE_COW 6
-#define BTRFS_ROOT_MULTI_LOG_TASKS 7
-#define BTRFS_ROOT_DIRTY 8
+enum {
+ /*
+ * btrfs_record_root_in_trans is a multi-step process, and it can race
+ * with the balancing code. But the race is very small, and only the
+ * first time the root is added to each transaction. So IN_TRANS_SETUP
+ * is used to tell us when more checks are required
+ */
+ BTRFS_ROOT_IN_TRANS_SETUP,
+ BTRFS_ROOT_REF_COWS,
+ BTRFS_ROOT_TRACK_DIRTY,
+ BTRFS_ROOT_IN_RADIX,
+ BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+ BTRFS_ROOT_DEFRAG_RUNNING,
+ BTRFS_ROOT_FORCE_COW,
+ BTRFS_ROOT_MULTI_LOG_TASKS,
+ BTRFS_ROOT_DIRTY,
+ BTRFS_ROOT_DELETING,
+};
/*
* in ram representation of the tree. extent_root is used for all allocations
@@ -1274,6 +1324,9 @@ struct btrfs_root {
u64 qgroup_meta_rsv_pertrans;
u64 qgroup_meta_rsv_prealloc;
+ /* Number of active swapfiles */
+ atomic_t nr_swapfiles;
+
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
u64 alloc_bytenr;
#endif
@@ -2570,10 +2623,10 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
/* extent-tree.c */
enum btrfs_inline_ref_type {
- BTRFS_REF_TYPE_INVALID = 0,
- BTRFS_REF_TYPE_BLOCK = 1,
- BTRFS_REF_TYPE_DATA = 2,
- BTRFS_REF_TYPE_ANY = 3,
+ BTRFS_REF_TYPE_INVALID,
+ BTRFS_REF_TYPE_BLOCK,
+ BTRFS_REF_TYPE_DATA,
+ BTRFS_REF_TYPE_ANY,
};
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
@@ -2599,7 +2652,7 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
}
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
-int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans);
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start);
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
@@ -2713,10 +2766,12 @@ enum btrfs_reserve_flush_enum {
enum btrfs_flush_state {
FLUSH_DELAYED_ITEMS_NR = 1,
FLUSH_DELAYED_ITEMS = 2,
- FLUSH_DELALLOC = 3,
- FLUSH_DELALLOC_WAIT = 4,
- ALLOC_CHUNK = 5,
- COMMIT_TRANS = 6,
+ FLUSH_DELAYED_REFS_NR = 3,
+ FLUSH_DELAYED_REFS = 4,
+ FLUSH_DELALLOC = 5,
+ FLUSH_DELALLOC_WAIT = 6,
+ ALLOC_CHUNK = 7,
+ COMMIT_TRANS = 8,
};
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
@@ -2767,6 +2822,13 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush);
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *src,
+ u64 num_bytes);
int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -3141,7 +3203,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
unsigned int extra_bits,
@@ -3150,9 +3212,16 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
struct btrfs_root *parent_root,
u64 new_dirid);
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags);
+ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
+ unsigned *bits);
+void btrfs_clear_delalloc_extent(struct inode *inode,
+ struct extent_state *state, unsigned *bits);
+void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ struct extent_state *other);
+void btrfs_split_delalloc_extent(struct inode *inode,
+ struct extent_state *orig, u64 split);
+int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
+ unsigned long bio_flags);
void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
@@ -3189,6 +3258,12 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
struct btrfs_trans_handle *trans, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
+int btrfs_run_delalloc_range(void *private_data, struct page *locked_page,
+ u64 start, u64 end, int *page_started, unsigned long *nr_written,
+ struct writeback_control *wbc);
+int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
+void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+ u64 end, int uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@@ -3428,6 +3503,16 @@ static inline void assfail(const char *expr, const char *file, int line)
#define ASSERT(expr) ((void)0)
#endif
+/*
+ * Use that for functions that are conditionally exported for sanity tests but
+ * otherwise static
+ */
+#ifndef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+#define EXPORT_FOR_TESTS static
+#else
+#define EXPORT_FOR_TESTS
+#endif
+
__cold
static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
{