diff options
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 1725 |
1 files changed, 1187 insertions, 538 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 36df977b64d9..9e6d48ff4597 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -17,7 +17,6 @@ #include <linux/wait.h> #include <linux/slab.h> #include <trace/events/btrfs.h> -#include <asm/kmap_types.h> #include <asm/unaligned.h> #include <linux/pagemap.h> #include <linux/btrfs.h> @@ -28,11 +27,13 @@ #include <linux/dynamic_debug.h> #include <linux/refcount.h> #include <linux/crc32c.h> +#include <linux/iomap.h> #include "extent-io-tree.h" #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" #include "block-rsv.h" +#include "locking.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -41,12 +42,18 @@ struct btrfs_delayed_ref_root; struct btrfs_space_info; struct btrfs_block_group; extern struct kmem_cache *btrfs_trans_handle_cachep; -extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; extern struct kmem_cache *btrfs_free_space_cachep; extern struct kmem_cache *btrfs_free_space_bitmap_cachep; struct btrfs_ordered_sum; struct btrfs_ref; +struct btrfs_bio; +struct btrfs_ioctl_encoded_io_args; +struct btrfs_device; +struct btrfs_fs_devices; +struct btrfs_balance_control; +struct btrfs_delayed_root; +struct reloc_control; #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ @@ -66,12 +73,6 @@ struct btrfs_ref; #define BTRFS_OLDEST_GENERATION 0ULL /* - * the max metadata block size. This limit is somewhat artificial, - * but the memmove costs go through the roof for larger blocks. - */ -#define BTRFS_MAX_METADATA_BLOCKSIZE 65536 - -/* * we can actually store much bigger names, but lets not confuse the rest * of linux */ @@ -110,14 +111,6 @@ struct btrfs_ref; #define BTRFS_STAT_CURR 0 #define BTRFS_STAT_PREV 1 -/* - * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size - */ -static inline u32 count_max_extents(u64 size) -{ - return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); -} - static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -136,6 +129,8 @@ enum { * defrag */ BTRFS_FS_STATE_REMOUNTING, + /* Filesystem in RO mode */ + BTRFS_FS_STATE_RO, /* Track if a transaction abort has been reported on this filesystem */ BTRFS_FS_STATE_TRANS_ABORTED, /* @@ -145,6 +140,13 @@ enum { BTRFS_FS_STATE_DEV_REPLACING, /* The btrfs_fs_info created for self-tests */ BTRFS_FS_STATE_DUMMY_FS_INFO, + + BTRFS_FS_STATE_NO_CSUMS, + + /* Indicates there was an error cleaning up a log tree. */ + BTRFS_FS_STATE_LOG_CLEANUP_ERROR, + + BTRFS_FS_STATE_COUNT }; #define BTRFS_BACKREF_REV_MAX 256 @@ -220,6 +222,16 @@ struct btrfs_root_backup { u8 unused_8[10]; } __attribute__ ((__packed__)); +#define BTRFS_SUPER_INFO_OFFSET SZ_64K +#define BTRFS_SUPER_INFO_SIZE 4096 + +/* + * The reserved space at the beginning of each device. + * It covers the primary super block and leaves space for potential use by other + * tools like bootloaders or to lower potential damage of accidental overwrite. + */ +#define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M) + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -239,8 +251,12 @@ struct btrfs_super_block { __le64 chunk_root; __le64 log_root; - /* this will help find the new super based on the log root */ - __le64 log_root_transid; + /* + * This member has never been utilized since the very beginning, thus + * it's always 0 regardless of kernel version. We always use + * generation + 1 to read log tree root. So here we mark it deprecated. + */ + __le64 __unused_log_root_transid; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -269,10 +285,15 @@ struct btrfs_super_block { u8 metadata_uuid[BTRFS_FSID_SIZE]; /* future expansion */ - __le64 reserved[28]; + u8 reserved8[8]; + __le64 reserved[27]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; + + /* Padded to 4096 bytes */ + u8 padding[565]; } __attribute__ ((__packed__)); +static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); /* * Compat flags that we support. If any incompat flags are set other than the @@ -284,11 +305,33 @@ struct btrfs_super_block { #define BTRFS_FEATURE_COMPAT_RO_SUPP \ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ - BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID) + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \ + BTRFS_FEATURE_COMPAT_RO_VERITY | \ + BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE) #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL +#ifdef CONFIG_BTRFS_DEBUG +/* + * Extent tree v2 supported only with CONFIG_BTRFS_DEBUG + */ +#define BTRFS_FEATURE_INCOMPAT_SUPP \ + (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ + BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ + BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ + BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ + BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ + BTRFS_FEATURE_INCOMPAT_ZONED | \ + BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) +#else #define BTRFS_FEATURE_INCOMPAT_SUPP \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ @@ -301,7 +344,9 @@ struct btrfs_super_block { BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ - BTRFS_FEATURE_INCOMPAT_RAID1C34) + BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ + BTRFS_FEATURE_INCOMPAT_ZONED) +#endif #define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) @@ -344,6 +389,27 @@ struct btrfs_node { struct btrfs_key_ptr ptrs[]; } __attribute__ ((__packed__)); +/* Read ahead values for struct btrfs_path.reada */ +enum { + READA_NONE, + READA_BACK, + READA_FORWARD, + /* + * Similar to READA_FORWARD but unlike it: + * + * 1) It will trigger readahead even for leaves that are not close to + * each other on disk; + * 2) It also triggers readahead for nodes; + * 3) During a search, even when a node or leaf is already in memory, it + * will still trigger readahead for other nodes and leaves that follow + * it. + * + * This is meant to be used only when we know we are iterating over the + * entire tree or a very large part of it. + */ + READA_FORWARD_ALWAYS, +}; + /* * btrfs_paths remember the path taken from the root down to the leaf. * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point @@ -352,7 +418,6 @@ struct btrfs_node { * The slots array records the index of the item or block pointer * used while walking the tree. */ -enum { READA_NONE, READA_BACK, READA_FORWARD }; struct btrfs_path { struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; @@ -369,13 +434,19 @@ struct btrfs_path { unsigned int search_for_split:1; unsigned int keep_locks:1; unsigned int skip_locking:1; - unsigned int leave_spinning:1; unsigned int search_commit_root:1; unsigned int need_commit_sem:1; unsigned int skip_release_on_error:1; + /* + * Indicate that new item (btrfs_search_slot) is extending already + * existing item and ins_len contains only the data size and not item + * header (ie. sizeof(struct btrfs_item) is not included). + */ + unsigned int search_for_extension:1; + /* Stop search if any locks need to be taken (for read) */ + unsigned int nowait:1; }; -#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ - sizeof(struct btrfs_item)) + struct btrfs_dev_replace { u64 replace_state; /* see #define above */ time64_t time_started; /* seconds since 1-Jan-1970 */ @@ -432,22 +503,6 @@ struct btrfs_free_cluster { struct list_head block_group_list; }; -enum btrfs_caching_type { - BTRFS_CACHE_NO, - BTRFS_CACHE_STARTED, - BTRFS_CACHE_FAST, - BTRFS_CACHE_FINISHED, - BTRFS_CACHE_ERROR, -}; - -/* - * Tree to record all locked full stripes of a RAID5/6 block group - */ -struct btrfs_full_stripe_locks_tree { - struct rb_root root; - struct mutex lock; -}; - /* Discard control. */ /* * Async discard uses multiple lists to differentiate the discard filter @@ -467,10 +522,11 @@ struct btrfs_discard_ctl { struct btrfs_block_group *block_group; struct list_head discard_list[BTRFS_NR_DISCARD_LISTS]; u64 prev_discard; + u64 prev_discard_time; atomic_t discardable_extents; atomic64_t discardable_bytes; u64 max_discard_size; - unsigned long delay; + u64 delay_ms; u32 iops_limit; u32 kbps_limit; u64 discard_extent_bytes; @@ -478,54 +534,7 @@ struct btrfs_discard_ctl { atomic64_t discard_bytes_saved; }; -/* delayed seq elem */ -struct seq_list { - struct list_head list; - u64 seq; -}; - -#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 } - -#define SEQ_LAST ((u64)-1) - -enum btrfs_orphan_cleanup_state { - ORPHAN_CLEANUP_STARTED = 1, - ORPHAN_CLEANUP_DONE = 2, -}; - -void btrfs_init_async_reclaim_work(struct work_struct *work); - -/* fs_info */ -struct reloc_control; -struct btrfs_device; -struct btrfs_fs_devices; -struct btrfs_balance_control; -struct btrfs_delayed_root; - -/* - * Block group or device which contains an active swapfile. Used for preventing - * unsafe operations while a swapfile is active. - * - * These are sorted on (ptr, inode) (note that a block group or device can - * contain more than one swapfile). We compare the pointer values because we - * don't actually care what the object is, we just need a quick check whether - * the object exists in the rbtree. - */ -struct btrfs_swapfile_pin { - struct rb_node node; - void *ptr; - struct inode *inode; - /* - * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr - * points to a struct btrfs_device. - */ - bool is_block_group; -}; - -bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); - enum { - BTRFS_FS_BARRIER, BTRFS_FS_CLOSING_START, BTRFS_FS_CLOSING_DONE, BTRFS_FS_LOG_RECOVERING, @@ -540,22 +549,17 @@ enum { /* Used to record internally whether fs has been frozen */ BTRFS_FS_FROZEN, /* - * Indicate that a whole-filesystem exclusive operation is running - * (device replace, resize, device add/delete, balance) - */ - BTRFS_FS_EXCL_OP, - /* - * To info transaction_kthread we need an immediate commit so it - * doesn't need to wait for commit_interval - */ - BTRFS_FS_NEED_ASYNC_COMMIT, - /* * Indicate that balance has been set up from the ioctl and is in the * main phase. The fs_info::balance_ctl is initialized. - * Set and cleared while holding fs_info::balance_mutex. */ BTRFS_FS_BALANCE_RUNNING, + /* + * Indicate that relocation of a chunk has started, it's set per chunk + * and is toggled between chunks. + */ + BTRFS_FS_RELOC_RUNNING, + /* Indicate that the cleaner thread is awake and doing something. */ BTRFS_FS_CLEANER_RUNNING, @@ -567,37 +571,89 @@ enum { /* Indicate that the discard workqueue can service discards. */ BTRFS_FS_DISCARD_RUNNING, + + /* Indicate that we need to cleanup space cache v1 */ + BTRFS_FS_CLEANUP_SPACE_CACHE_V1, + + /* Indicate that we can't trust the free space tree for caching yet */ + BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, + + /* Indicate whether there are any tree modification log users */ + BTRFS_FS_TREE_MOD_LOG_USERS, + + /* Indicate that we want the transaction kthread to commit right now. */ + BTRFS_FS_COMMIT_TRANS, + + /* Indicate we have half completed snapshot deletions pending. */ + BTRFS_FS_UNFINISHED_DROPS, + + /* Indicate we have to finish a zone to do next allocation. */ + BTRFS_FS_NEED_ZONE_FINISH, + +#if BITS_PER_LONG == 32 + /* Indicate if we have error/warn message printed on 32bit systems */ + BTRFS_FS_32BIT_ERROR, + BTRFS_FS_32BIT_WARN, +#endif +}; + +/* + * Exclusive operations (device replace, resize, device add/remove, balance) + */ +enum btrfs_exclusive_operation { + BTRFS_EXCLOP_NONE, + BTRFS_EXCLOP_BALANCE_PAUSED, + BTRFS_EXCLOP_BALANCE, + BTRFS_EXCLOP_DEV_ADD, + BTRFS_EXCLOP_DEV_REMOVE, + BTRFS_EXCLOP_DEV_REPLACE, + BTRFS_EXCLOP_RESIZE, + BTRFS_EXCLOP_SWAP_ACTIVATE, +}; + +/* Store data about transaction commits, exported via sysfs. */ +struct btrfs_commit_stats { + /* Total number of commits */ + u64 commit_count; + /* The maximum commit duration so far in ns */ + u64 max_commit_dur; + /* The last commit duration in ns */ + u64 last_commit_dur; + /* The total commit duration in ns */ + u64 total_commit_dur; }; struct btrfs_fs_info { u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; unsigned long flags; - struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *chunk_root; struct btrfs_root *dev_root; struct btrfs_root *fs_root; - struct btrfs_root *csum_root; struct btrfs_root *quota_root; struct btrfs_root *uuid_root; - struct btrfs_root *free_space_root; + struct btrfs_root *data_reloc_root; + struct btrfs_root *block_group_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; + /* The tree that holds the global roots (csum, extent, etc) */ + rwlock_t global_root_lock; + struct rb_root global_root_tree; + spinlock_t fs_roots_radix_lock; struct radix_tree_root fs_roots_radix; /* block group cache stuff */ - spinlock_t block_group_cache_lock; - u64 first_logical_byte; - struct rb_root block_group_cache_tree; + rwlock_t block_group_cache_lock; + struct rb_root_cached block_group_cache_tree; /* keep track of unallocated space */ atomic64_t free_chunk_space; - struct extent_io_tree freed_extents[2]; - struct extent_io_tree *pinned_extents; + /* Track ranges which are used by log trees blocks/logged data extents */ + struct extent_io_tree excluded_extents; /* logical->physical extent mapping */ struct extent_map_tree mapping_tree; @@ -620,6 +676,12 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + /* + * Generation of the last transaction used for block group relocation + * since the filesystem was last mounted (or 0 if none happened yet). + * Must be written and read while holding btrfs_fs_info::commit_root_sem. + */ + u64 last_reloc_trans; u64 avg_delayed_ref_runtime; /* @@ -696,7 +758,6 @@ struct btrfs_fs_info { struct rw_semaphore cleanup_work_sem; struct rw_semaphore subvol_sem; - struct srcu_struct subvol_srcu; spinlock_t trans_lock; /* @@ -753,18 +814,17 @@ struct btrfs_fs_info { * two */ struct btrfs_workqueue *workers; + struct btrfs_workqueue *hipri_workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; - struct btrfs_workqueue *endio_workers; - struct btrfs_workqueue *endio_meta_workers; - struct btrfs_workqueue *endio_raid56_workers; - struct btrfs_workqueue *endio_repair_workers; - struct btrfs_workqueue *rmw_workers; - struct btrfs_workqueue *endio_meta_write_workers; + struct workqueue_struct *endio_workers; + struct workqueue_struct *endio_meta_workers; + struct workqueue_struct *endio_raid56_workers; + struct workqueue_struct *rmw_workers; + struct workqueue_struct *compressed_write_workers; struct btrfs_workqueue *endio_write_workers; struct btrfs_workqueue *endio_freespace_worker; struct btrfs_workqueue *caching_workers; - struct btrfs_workqueue *readahead_workers; /* * fixup workers take dirty pages that didn't properly go through @@ -779,13 +839,13 @@ struct btrfs_fs_info { u32 thread_pool_size; struct kobject *space_info_kobj; - - u64 total_pinned; + struct kobject *qgroups_kobj; + struct kobject *discard_kobj; /* used to keep from writing metadata until there is a nice batch */ struct percpu_counter dirty_metadata_bytes; struct percpu_counter delalloc_bytes; - struct percpu_counter dio_bytes; + struct percpu_counter ordered_bytes; s32 dirty_metadata_batch; s32 delalloc_batch; @@ -834,6 +894,9 @@ struct btrfs_fs_info { struct btrfs_balance_control *balance_ctl; wait_queue_head_t balance_wait_q; + /* Cancellation requests for chunk relocation */ + atomic_t reloc_cancel_req; + u32 data_chunk_allocations; u32 metadata_ratio; @@ -851,9 +914,10 @@ struct btrfs_fs_info { * running. */ refcount_t scrub_workers_refcnt; - struct btrfs_workqueue *scrub_workers; - struct btrfs_workqueue *scrub_wr_completion_workers; - struct btrfs_workqueue *scrub_parity_workers; + struct workqueue_struct *scrub_workers; + struct workqueue_struct *scrub_wr_completion_workers; + struct workqueue_struct *scrub_parity_workers; + struct btrfs_subpage_info *subpage_info; struct btrfs_discard_ctl discard_ctl; @@ -873,7 +937,10 @@ struct btrfs_fs_info { */ struct ulist *qgroup_ulist; - /* protect user change for quota operations */ + /* + * Protect user change for quota operations. If a transaction is needed, + * it must be started before locking this lock. + */ struct mutex qgroup_ioctl_lock; /* list of dirty qgroups to be written at next commit */ @@ -889,21 +956,16 @@ struct btrfs_fs_info { struct completion qgroup_rescan_completion; struct btrfs_work qgroup_rescan_work; bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ + u8 qgroup_drop_subtree_thres; /* filesystem state */ unsigned long fs_state; struct btrfs_delayed_root *delayed_root; - /* readahead tree */ - spinlock_t reada_lock; - struct radix_tree_root reada_tree; - - /* readahead works cnt */ - atomic_t reada_works_cnt; - /* Extent buffer radix tree */ spinlock_t buffer_lock; + /* Entries are eb->start / sectorsize */ struct radix_tree_root buffer_radix; /* next backup root to be overwritten */ @@ -916,28 +978,88 @@ struct btrfs_fs_info { /* Used to reclaim the metadata space in the background. */ struct work_struct async_reclaim_work; + struct work_struct async_data_reclaim_work; + struct work_struct preempt_reclaim_work; + + /* Reclaim partially filled block groups in the background */ + struct work_struct reclaim_bgs_work; + struct list_head reclaim_bgs; + int bg_reclaim_threshold; spinlock_t unused_bgs_lock; struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; - struct mutex delete_unused_bgs_mutex; + /* Protect block groups that are going to be deleted */ + struct mutex reclaim_bgs_lock; /* Cached block sizes */ u32 nodesize; u32 sectorsize; + /* ilog2 of sectorsize, use to avoid 64bit division */ + u32 sectorsize_bits; + u32 csum_size; + u32 csums_per_leaf; u32 stripesize; + /* + * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular + * filesystem, on zoned it depends on the device constraints. + */ + u64 max_extent_size; + /* Block groups and devices containing active swapfiles. */ spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; struct crypto_shash *csum_shash; + /* Type of exclusive operation running, protected by super_lock */ + enum btrfs_exclusive_operation exclusive_operation; + /* - * Number of send operations in progress. - * Updated while holding fs_info::balance_mutex. + * Zone size > 0 when in ZONED mode, otherwise it's used for a check + * if the mode is enabled */ - int send_in_progress; + u64 zone_size; + + /* Max size to emit ZONE_APPEND write command */ + u64 max_zone_append_size; + struct mutex zoned_meta_io_lock; + spinlock_t treelog_bg_lock; + u64 treelog_bg; + + /* + * Start of the dedicated data relocation block group, protected by + * relocation_bg_lock. + */ + spinlock_t relocation_bg_lock; + u64 data_reloc_bg; + struct mutex zoned_data_reloc_io_lock; + + u64 nr_global_roots; + + spinlock_t zone_active_bgs_lock; + struct list_head zone_active_bgs; + + /* Updates are not protected by any lock */ + struct btrfs_commit_stats commit_stats; + + /* + * Last generation where we dropped a non-relocation root. + * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen() + * to change it and to read it, respectively. + */ + u64 last_root_drop_gen; + + /* + * Annotations for transaction events (structures are empty when + * compiled without lockdep). + */ + struct lockdep_map btrfs_trans_num_writers_map; + struct lockdep_map btrfs_trans_num_extwriters_map; + struct lockdep_map btrfs_state_change_map[4]; + struct lockdep_map btrfs_trans_pending_ordered_map; + struct lockdep_map btrfs_ordered_extent_map; #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; @@ -946,19 +1068,90 @@ struct btrfs_fs_info { #ifdef CONFIG_BTRFS_DEBUG struct kobject *debug_kobj; - struct kobject *discard_debug_kobj; + struct list_head allocated_roots; + + spinlock_t eb_leak_lock; + struct list_head allocated_ebs; #endif }; +static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info, + u64 gen) +{ + WRITE_ONCE(fs_info->last_root_drop_gen, gen); +} + +static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info) +{ + return READ_ONCE(fs_info->last_root_drop_gen); +} + static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -struct btrfs_subvolume_writers { - struct percpu_counter counter; - wait_queue_head_t wait; -}; +/* + * Take the number of bytes to be checksummed and figure out how many leaves + * it would require to store the csums for that many bytes. + */ +static inline u64 btrfs_csum_bytes_to_leaves( + const struct btrfs_fs_info *fs_info, u64 csum_bytes) +{ + const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits; + + return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf); +} + +/* + * Use this if we would be adding new items, as we could split nodes as we cow + * down the tree. + */ +static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info, + unsigned num_items) +{ + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; +} + +/* + * Doing a truncate or a modification won't result in new nodes or leaves, just + * what we need for COW. + */ +static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info, + unsigned num_items) +{ + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; +} + +#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ + sizeof(struct btrfs_item)) + +static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) +{ + return fs_info->zone_size > 0; +} + +/* + * Count how many fs_info->max_extent_size cover the @size + */ +static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size) +{ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (!fs_info) + return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); +#endif + + return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); +} + +bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, + enum btrfs_exclusive_operation type); +bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, + enum btrfs_exclusive_operation type); +void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info); +void btrfs_exclop_finish(struct btrfs_fs_info *fs_info); +void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, + enum btrfs_exclusive_operation op); /* * The state of btrfs root @@ -971,7 +1164,28 @@ enum { * is used to tell us when more checks are required */ BTRFS_ROOT_IN_TRANS_SETUP, - BTRFS_ROOT_REF_COWS, + + /* + * Set if tree blocks of this root can be shared by other roots. + * Only subvolume trees and their reloc trees have this bit set. + * Conflicts with TRACK_DIRTY bit. + * + * This affects two things: + * + * - How balance works + * For shareable roots, we need to use reloc tree and do path + * replacement for balance, and need various pre/post hooks for + * snapshot creation to handle them. + * + * While for non-shareable trees, we just simply do a tree search + * with COW. + * + * - How dirty roots are tracked + * For shareable roots, btrfs_record_root_in_trans() is needed to + * track them, while non-subvolume roots have TRACK_DIRTY bit, they + * don't need to set this manually. + */ + BTRFS_ROOT_SHAREABLE, BTRFS_ROOT_TRACK_DIRTY, BTRFS_ROOT_IN_RADIX, BTRFS_ROOT_ORPHAN_ITEM_INSERTED, @@ -989,9 +1203,100 @@ enum { BTRFS_ROOT_DEAD_RELOC_TREE, /* Mark dead root stored on device whose cleanup needs to be resumed */ BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used for subvolume roots and the tree root. */ + BTRFS_ROOT_HAS_LOG_TREE, + /* Qgroup flushing is in progress */ + BTRFS_ROOT_QGROUP_FLUSHING, + /* We started the orphan cleanup for this root. */ + BTRFS_ROOT_ORPHAN_CLEANUP, + /* This root has a drop operation that was started previously. */ + BTRFS_ROOT_UNFINISHED_DROP, + /* This reloc root needs to have its buffers lockdep class reset. */ + BTRFS_ROOT_RESET_LOCKDEP_CLASS, +}; + +enum btrfs_lockdep_trans_states { + BTRFS_LOCKDEP_TRANS_COMMIT_START, + BTRFS_LOCKDEP_TRANS_UNBLOCKED, + BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, + BTRFS_LOCKDEP_TRANS_COMPLETED, }; /* + * Lockdep annotation for wait events. + * + * @owner: The struct where the lockdep map is defined + * @lock: The lockdep map corresponding to a wait event + * + * This macro is used to annotate a wait event. In this case a thread acquires + * the lockdep map as writer (exclusive lock) because it has to block until all + * the threads that hold the lock as readers signal the condition for the wait + * event and release their locks. + */ +#define btrfs_might_wait_for_event(owner, lock) \ + do { \ + rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \ + rwsem_release(&owner->lock##_map, _THIS_IP_); \ + } while (0) + +/* + * Protection for the resource/condition of a wait event. + * + * @owner: The struct where the lockdep map is defined + * @lock: The lockdep map corresponding to a wait event + * + * Many threads can modify the condition for the wait event at the same time + * and signal the threads that block on the wait event. The threads that modify + * the condition and do the signaling acquire the lock as readers (shared + * lock). + */ +#define btrfs_lockdep_acquire(owner, lock) \ + rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_) + +/* + * Used after signaling the condition for a wait event to release the lockdep + * map held by a reader thread. + */ +#define btrfs_lockdep_release(owner, lock) \ + rwsem_release(&owner->lock##_map, _THIS_IP_) + +/* + * Macros for the transaction states wait events, similar to the generic wait + * event macros. + */ +#define btrfs_might_wait_for_state(owner, i) \ + do { \ + rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \ + rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \ + } while (0) + +#define btrfs_trans_state_lockdep_acquire(owner, i) \ + rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_) + +#define btrfs_trans_state_lockdep_release(owner, i) \ + rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_) + +/* Initialization of the lockdep map */ +#define btrfs_lockdep_init_map(owner, lock) \ + do { \ + static struct lock_class_key lock##_key; \ + lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \ + } while (0) + +/* Initialization of the transaction states lockdep maps. */ +#define btrfs_state_lockdep_init_map(owner, lock, state) \ + do { \ + static struct lock_class_key lock##_key; \ + lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \ + &lock##_key, 0); \ + } while (0) + +static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) +{ + clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags); +} + +/* * Record swapped tree blocks of a subvolume tree for delayed subtree trace * code. For detail check comment in fs/btrfs/qgroup.c. */ @@ -1007,6 +1312,8 @@ struct btrfs_qgroup_swapped_blocks { * and for the extent tree extent_root root. */ struct btrfs_root { + struct rb_node rb_node; + struct extent_buffer *node; struct extent_buffer *commit_root; @@ -1024,21 +1331,14 @@ struct btrfs_root { spinlock_t accounting_lock; struct btrfs_block_rsv *block_rsv; - /* free ino cache stuff */ - struct btrfs_free_space_ctl *free_ino_ctl; - enum btrfs_caching_type ino_cache_state; - spinlock_t ino_cache_lock; - wait_queue_head_t ino_cache_wait; - struct btrfs_free_space_ctl *free_ino_pinned; - u64 ino_cache_progress; - struct inode *ino_cache_inode; - struct mutex log_mutex; wait_queue_head_t log_writer_wait; wait_queue_head_t log_commit_wait[2]; struct list_head log_ctxs[2]; + /* Used only for log trees of subvolumes, not for the log root tree */ atomic_t log_writers; atomic_t log_commit[2]; + /* Used only for log trees of subvolumes, not for the log root tree */ atomic_t log_batch; int log_transid; /* No matter the commit succeeds or not*/ @@ -1051,13 +1351,12 @@ struct btrfs_root { u32 type; - u64 highest_objectid; + u64 free_objectid; - u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; - /* the dirty list is only used by non-reference counted roots */ + /* The dirty list is only used by non-shareable roots */ struct list_head dirty_list; struct list_head root_list; @@ -1065,8 +1364,6 @@ struct btrfs_root { spinlock_t log_extents_lock[2]; struct list_head logged_list[2]; - int orphan_cleanup_state; - spinlock_t inode_lock; /* red-black tree that keeps track of in-memory inodes */ struct rb_root inode_tree; @@ -1131,14 +1428,16 @@ struct btrfs_root { * root_item_lock. */ int dedupe_in_progress; - struct btrfs_subvolume_writers *subv_writers; - atomic_t will_be_snapshotted; + /* For exclusion of snapshot creation and nocow writes */ + struct btrfs_drew_lock snapshot_lock; + atomic_t snapshot_force_cow; /* For qgroup metadata reserved space */ spinlock_t qgroup_meta_rsv_lock; u64 qgroup_meta_rsv_pertrans; u64 qgroup_meta_rsv_prealloc; + wait_queue_head_t qgroup_flush_wait; /* Number of active swapfiles */ atomic_t nr_swapfiles; @@ -1146,29 +1445,111 @@ struct btrfs_root { /* Record pairs of swapped blocks for qgroup */ struct btrfs_qgroup_swapped_blocks swapped_blocks; + /* Used only by log trees, when logging csum items */ + struct extent_io_tree log_csum_range; + #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS u64 alloc_bytenr; #endif + +#ifdef CONFIG_BTRFS_DEBUG + struct list_head leak_list; +#endif }; -struct btrfs_clone_extent_info { +/* + * Structure that conveys information about an extent that is going to replace + * all the extents in a file range. + */ +struct btrfs_replace_extent_info { u64 disk_offset; u64 disk_len; u64 data_offset; u64 data_len; u64 file_offset; + /* Pointer to a file extent item of type regular or prealloc. */ char *extent_buf; - u32 item_size; + /* + * Set to true when attempting to replace a file range with a new extent + * described by this structure, set to false when attempting to clone an + * existing extent into a file range. + */ + bool is_new_extent; + /* Indicate if we should update the inode's mtime and ctime. */ + bool update_times; + /* Meaningful only if is_new_extent is true. */ + int qgroup_reserved; + /* + * Meaningful only if is_new_extent is true. + * Used to track how many extent items we have already inserted in a + * subvolume tree that refer to the extent described by this structure, + * so that we know when to create a new delayed ref or update an existing + * one. + */ + int insertions; +}; + +/* Arguments for btrfs_drop_extents() */ +struct btrfs_drop_extents_args { + /* Input parameters */ + + /* + * If NULL, btrfs_drop_extents() will allocate and free its own path. + * If 'replace_extent' is true, this must not be NULL. Also the path + * is always released except if 'replace_extent' is true and + * btrfs_drop_extents() sets 'extent_inserted' to true, in which case + * the path is kept locked. + */ + struct btrfs_path *path; + /* Start offset of the range to drop extents from */ + u64 start; + /* End (exclusive, last byte + 1) of the range to drop extents from */ + u64 end; + /* If true drop all the extent maps in the range */ + bool drop_cache; + /* + * If true it means we want to insert a new extent after dropping all + * the extents in the range. If this is true, the 'extent_item_size' + * parameter must be set as well and the 'extent_inserted' field will + * be set to true by btrfs_drop_extents() if it could insert the new + * extent. + * Note: when this is set to true the path must not be NULL. + */ + bool replace_extent; + /* + * Used if 'replace_extent' is true. Size of the file extent item to + * insert after dropping all existing extents in the range + */ + u32 extent_item_size; + + /* Output parameters */ + + /* + * Set to the minimum between the input parameter 'end' and the end + * (exclusive, last byte + 1) of the last dropped extent. This is always + * set even if btrfs_drop_extents() returns an error. + */ + u64 drop_end; + /* + * The number of allocated bytes found in the range. This can be smaller + * than the range's length when there are holes in the range. + */ + u64 bytes_found; + /* + * Only set if 'replace_extent' is true. Set to true if we were able + * to insert a replacement extent after dropping all extents in the + * range, otherwise set to false by btrfs_drop_extents(). + * Also, if btrfs_drop_extents() has set this to true it means it + * returned with the path locked, otherwise if it has set this to + * false it has returned with the path released. + */ + bool extent_inserted; }; struct btrfs_file_private { void *filldir_buf; }; -static inline u32 btrfs_inode_sectorsize(const struct inode *inode) -{ - return btrfs_sb(inode->i_sb)->sectorsize; -} static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) { @@ -1206,36 +1587,39 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) * * Note: don't forget to add new options to btrfs_show_options() */ -#define BTRFS_MOUNT_NODATASUM (1 << 0) -#define BTRFS_MOUNT_NODATACOW (1 << 1) -#define BTRFS_MOUNT_NOBARRIER (1 << 2) -#define BTRFS_MOUNT_SSD (1 << 3) -#define BTRFS_MOUNT_DEGRADED (1 << 4) -#define BTRFS_MOUNT_COMPRESS (1 << 5) -#define BTRFS_MOUNT_NOTREELOG (1 << 6) -#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) -#define BTRFS_MOUNT_SSD_SPREAD (1 << 8) -#define BTRFS_MOUNT_NOSSD (1 << 9) -#define BTRFS_MOUNT_DISCARD_SYNC (1 << 10) -#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) -#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) -#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) -#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) -#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) -#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) -#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) -#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18) -#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) -#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) -#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) -#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) -#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) -#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24) -#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) -#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) -#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) -#define BTRFS_MOUNT_REF_VERIFY (1 << 28) -#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29) +enum { + BTRFS_MOUNT_NODATASUM = (1UL << 0), + BTRFS_MOUNT_NODATACOW = (1UL << 1), + BTRFS_MOUNT_NOBARRIER = (1UL << 2), + BTRFS_MOUNT_SSD = (1UL << 3), + BTRFS_MOUNT_DEGRADED = (1UL << 4), + BTRFS_MOUNT_COMPRESS = (1UL << 5), + BTRFS_MOUNT_NOTREELOG = (1UL << 6), + BTRFS_MOUNT_FLUSHONCOMMIT = (1UL << 7), + BTRFS_MOUNT_SSD_SPREAD = (1UL << 8), + BTRFS_MOUNT_NOSSD = (1UL << 9), + BTRFS_MOUNT_DISCARD_SYNC = (1UL << 10), + BTRFS_MOUNT_FORCE_COMPRESS = (1UL << 11), + BTRFS_MOUNT_SPACE_CACHE = (1UL << 12), + BTRFS_MOUNT_CLEAR_CACHE = (1UL << 13), + BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED = (1UL << 14), + BTRFS_MOUNT_ENOSPC_DEBUG = (1UL << 15), + BTRFS_MOUNT_AUTO_DEFRAG = (1UL << 16), + BTRFS_MOUNT_USEBACKUPROOT = (1UL << 17), + BTRFS_MOUNT_SKIP_BALANCE = (1UL << 18), + BTRFS_MOUNT_CHECK_INTEGRITY = (1UL << 19), + BTRFS_MOUNT_CHECK_INTEGRITY_DATA = (1UL << 20), + BTRFS_MOUNT_PANIC_ON_FATAL_ERROR = (1UL << 21), + BTRFS_MOUNT_RESCAN_UUID_TREE = (1UL << 22), + BTRFS_MOUNT_FRAGMENT_DATA = (1UL << 23), + BTRFS_MOUNT_FRAGMENT_METADATA = (1UL << 24), + BTRFS_MOUNT_FREE_SPACE_TREE = (1UL << 25), + BTRFS_MOUNT_NOLOGREPLAY = (1UL << 26), + BTRFS_MOUNT_REF_VERIFY = (1UL << 27), + BTRFS_MOUNT_DISCARD_ASYNC = (1UL << 28), + BTRFS_MOUNT_IGNOREBADROOTS = (1UL << 29), + BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 30), +}; #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_MAX_INLINE (2048) @@ -1247,18 +1631,18 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) BTRFS_MOUNT_##opt) #define btrfs_set_and_info(fs_info, opt, fmt, args...) \ -{ \ +do { \ if (!btrfs_test_opt(fs_info, opt)) \ btrfs_info(fs_info, fmt, ##args); \ btrfs_set_opt(fs_info->mount_opt, opt); \ -} +} while (0) #define btrfs_clear_and_info(fs_info, opt, fmt, args...) \ -{ \ +do { \ if (btrfs_test_opt(fs_info, opt)) \ btrfs_info(fs_info, fmt, ##args); \ btrfs_clear_opt(fs_info->mount_opt, opt); \ -} +} while (0) /* * Requests for changes that need to be done during transaction commit. @@ -1268,9 +1652,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) * transaction commit) */ -#define BTRFS_PENDING_SET_INODE_MAP_CACHE (0) -#define BTRFS_PENDING_CLEAR_INODE_MAP_CACHE (1) -#define BTRFS_PENDING_COMMIT (2) +#define BTRFS_PENDING_COMMIT (0) #define btrfs_test_pending(info, opt) \ test_bit(BTRFS_PENDING_##opt, &(info)->pending_changes) @@ -1306,20 +1688,20 @@ do { \ /* * Inode flags */ -#define BTRFS_INODE_NODATASUM (1 << 0) -#define BTRFS_INODE_NODATACOW (1 << 1) -#define BTRFS_INODE_READONLY (1 << 2) -#define BTRFS_INODE_NOCOMPRESS (1 << 3) -#define BTRFS_INODE_PREALLOC (1 << 4) -#define BTRFS_INODE_SYNC (1 << 5) -#define BTRFS_INODE_IMMUTABLE (1 << 6) -#define BTRFS_INODE_APPEND (1 << 7) -#define BTRFS_INODE_NODUMP (1 << 8) -#define BTRFS_INODE_NOATIME (1 << 9) -#define BTRFS_INODE_DIRSYNC (1 << 10) -#define BTRFS_INODE_COMPRESS (1 << 11) - -#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) +#define BTRFS_INODE_NODATASUM (1U << 0) +#define BTRFS_INODE_NODATACOW (1U << 1) +#define BTRFS_INODE_READONLY (1U << 2) +#define BTRFS_INODE_NOCOMPRESS (1U << 3) +#define BTRFS_INODE_PREALLOC (1U << 4) +#define BTRFS_INODE_SYNC (1U << 5) +#define BTRFS_INODE_IMMUTABLE (1U << 6) +#define BTRFS_INODE_APPEND (1U << 7) +#define BTRFS_INODE_NODUMP (1U << 8) +#define BTRFS_INODE_NOATIME (1U << 9) +#define BTRFS_INODE_DIRSYNC (1U << 10) +#define BTRFS_INODE_COMPRESS (1U << 11) + +#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31) #define BTRFS_INODE_FLAG_MASK \ (BTRFS_INODE_NODATASUM | \ @@ -1336,20 +1718,25 @@ do { \ BTRFS_INODE_COMPRESS | \ BTRFS_INODE_ROOT_ITEM_INIT) +#define BTRFS_INODE_RO_VERITY (1U << 0) + +#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY) + struct btrfs_map_token { - const struct extent_buffer *eb; + struct extent_buffer *eb; char *kaddr; unsigned long offset; }; #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ - ((bytes) >> (fs_info)->sb->s_blocksize_bits) + ((bytes) >> (fs_info)->sectorsize_bits) static inline void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb) { token->eb = eb; - token->kaddr = NULL; + token->kaddr = page_address(eb->pages[0]); + token->offset = 0; } /* some macros to generate set/get functions for the struct fields. This @@ -1360,6 +1747,16 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token, #define cpu_to_le8(v) (v) #define __le8 u8 +static inline u8 get_unaligned_le8(const void *p) +{ + return *(u8 *)p; +} + +static inline void put_unaligned_le8(u8 val, void *p) +{ + *(u8 *)p = val; +} + #define read_eb_member(eb, ptr, type, member, result) (\ read_extent_buffer(eb, (char *)(result), \ ((unsigned long)(ptr)) + \ @@ -1373,15 +1770,14 @@ static inline void btrfs_init_map_token(struct btrfs_map_token *token, sizeof(((type *)0)->member))) #define DECLARE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \ - const void *ptr, unsigned long off, \ - struct btrfs_map_token *token); \ -void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \ - unsigned long off, u##bits val, \ - struct btrfs_map_token *token); \ +u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ + const void *ptr, unsigned long off); \ +void btrfs_set_token_##bits(struct btrfs_map_token *token, \ + const void *ptr, unsigned long off, \ + u##bits val); \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off); \ -void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \ +void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ unsigned long off, u##bits val); DECLARE_BTRFS_SETGET_BITS(8) @@ -1393,69 +1789,66 @@ DECLARE_BTRFS_SETGET_BITS(64) static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ const type *s) \ { \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ return btrfs_get_##bits(eb, s, offsetof(type, member)); \ } \ -static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \ +static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ u##bits val) \ { \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \ } \ -static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\ - const type *s, \ - struct btrfs_map_token *token) \ +static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ + const type *s) \ { \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + return btrfs_get_token_##bits(token, s, offsetof(type, member));\ } \ -static inline void btrfs_set_token_##name(struct extent_buffer *eb, \ - type *s, u##bits val, \ - struct btrfs_map_token *token) \ +static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ + type *s, u##bits val) \ { \ - BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ - btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ } #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ { \ - const type *p = page_address(eb->pages[0]); \ - u##bits res = le##bits##_to_cpu(p->member); \ - return res; \ + const type *p = page_address(eb->pages[0]) + \ + offset_in_page(eb->start); \ + return get_unaligned_le##bits(&p->member); \ } \ -static inline void btrfs_set_##name(struct extent_buffer *eb, \ +static inline void btrfs_set_##name(const struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = page_address(eb->pages[0]); \ - p->member = cpu_to_le##bits(val); \ + type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \ + put_unaligned_le##bits(val, &p->member); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(const type *s) \ { \ - return le##bits##_to_cpu(s->member); \ + return get_unaligned_le##bits(&s->member); \ } \ static inline void btrfs_set_##name(type *s, u##bits val) \ { \ - s->member = cpu_to_le##bits(val); \ + put_unaligned_le##bits(val, &s->member); \ } - -static inline u64 btrfs_device_total_bytes(struct extent_buffer *eb, +static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s) { - BUILD_BUG_ON(sizeof(u64) != - sizeof(((struct btrfs_dev_item *)0))->total_bytes); + static_assert(sizeof(u64) == + sizeof(((struct btrfs_dev_item *)0))->total_bytes); return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes)); } -static inline void btrfs_set_device_total_bytes(struct extent_buffer *eb, +static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb, struct btrfs_dev_item *s, u64 val) { - BUILD_BUG_ON(sizeof(u64) != - sizeof(((struct btrfs_dev_item *)0))->total_bytes); + static_assert(sizeof(u64) == + sizeof(((struct btrfs_dev_item *)0))->total_bytes); WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); } @@ -1554,13 +1947,13 @@ static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); } -static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, +static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb, struct btrfs_chunk *c, int nr) { return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); } -static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, +static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb, struct btrfs_chunk *c, int nr) { return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); @@ -1640,31 +2033,21 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, chunk_offset, 64); BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); - -static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) -{ - unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); - return (unsigned long)dev + ptr; -} - BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, generation, 64); BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); -BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32); - - BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); -static inline void btrfs_tree_block_key(struct extent_buffer *eb, +static inline void btrfs_tree_block_key(const struct extent_buffer *eb, struct btrfs_tree_block_info *item, struct btrfs_disk_key *key) { read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); } -static inline void btrfs_set_tree_block_key(struct extent_buffer *eb, +static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb, struct btrfs_tree_block_info *item, struct btrfs_disk_key *key) { @@ -1702,12 +2085,6 @@ static inline u32 btrfs_extent_inline_ref_size(int type) return 0; } -BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64); -BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0, - generation, 64); -BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64); -BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32); - /* struct btrfs_node */ BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); @@ -1716,7 +2093,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, generation, 64); -static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) +static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + @@ -1724,7 +2101,7 @@ static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, +static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb, int nr, u64 val) { unsigned long ptr; @@ -1733,7 +2110,7 @@ static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) +static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr) { unsigned long ptr; ptr = offsetof(struct btrfs_node, ptrs) + @@ -1741,7 +2118,7 @@ static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr) return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb, +static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb, int nr, u64 val) { unsigned long ptr; @@ -1759,7 +2136,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr) void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr); -static inline void btrfs_set_node_key(struct extent_buffer *eb, +static inline void btrfs_set_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr; @@ -1769,8 +2146,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb, } /* struct btrfs_item */ -BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); -BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); +BTRFS_SETGET_FUNCS(raw_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(raw_item_size, struct btrfs_item, size, 32); BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); @@ -1785,25 +2162,36 @@ static inline struct btrfs_item *btrfs_item_nr(int nr) return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline u32 btrfs_item_end(const struct extent_buffer *eb, - struct btrfs_item *item) -{ - return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); +#define BTRFS_ITEM_SETGET_FUNCS(member) \ +static inline u32 btrfs_item_##member(const struct extent_buffer *eb, \ + int slot) \ +{ \ + return btrfs_raw_item_##member(eb, btrfs_item_nr(slot)); \ +} \ +static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \ + int slot, u32 val) \ +{ \ + btrfs_set_raw_item_##member(eb, btrfs_item_nr(slot), val); \ +} \ +static inline u32 btrfs_token_item_##member(struct btrfs_map_token *token, \ + int slot) \ +{ \ + struct btrfs_item *item = btrfs_item_nr(slot); \ + return btrfs_token_raw_item_##member(token, item); \ +} \ +static inline void btrfs_set_token_item_##member(struct btrfs_map_token *token, \ + int slot, u32 val) \ +{ \ + struct btrfs_item *item = btrfs_item_nr(slot); \ + btrfs_set_token_raw_item_##member(token, item, val); \ } -static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr) -{ - return btrfs_item_end(eb, btrfs_item_nr(nr)); -} - -static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr) -{ - return btrfs_item_offset(eb, btrfs_item_nr(nr)); -} +BTRFS_ITEM_SETGET_FUNCS(offset) +BTRFS_ITEM_SETGET_FUNCS(size); -static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr) +static inline u32 btrfs_item_data_end(const struct extent_buffer *eb, int nr) { - return btrfs_item_size(eb, btrfs_item_nr(nr)); + return btrfs_item_offset(eb, nr) + btrfs_item_size(eb, nr); } static inline void btrfs_item_key(const struct extent_buffer *eb, @@ -1883,6 +2271,52 @@ BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); +#ifdef __LITTLE_ENDIAN + +/* + * Optimized helpers for little-endian architectures where CPU and on-disk + * structures have the same endianness and we can skip conversions. + */ + +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key, + const struct btrfs_disk_key *disk_key) +{ + memcpy(cpu_key, disk_key, sizeof(struct btrfs_key)); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key, + const struct btrfs_key *cpu_key) +{ + memcpy(disk_key, cpu_key, sizeof(struct btrfs_key)); +} + +static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *cpu_key, int nr) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_node_key(eb, disk_key, nr); +} + +static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *cpu_key, int nr) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_item_key(eb, disk_key, nr); +} + +static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, + struct btrfs_key *cpu_key) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_dir_item_key(eb, item, disk_key); +} + +#else + static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, const struct btrfs_disk_key *disk) { @@ -1924,6 +2358,8 @@ static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, btrfs_disk_key_to_cpu(key, &disk_key); } +#endif + /* struct btrfs_header */ BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, @@ -1971,16 +2407,6 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline unsigned long btrfs_header_fsid(void) -{ - return offsetof(struct btrfs_header, fsid); -} - -static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb) -{ - return offsetof(struct btrfs_header, chunk_tree_uuid); -} - static inline int btrfs_is_leaf(const struct extent_buffer *eb) { return btrfs_header_level(eb) == 0; @@ -1996,6 +2422,7 @@ BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8); BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); @@ -2017,14 +2444,21 @@ BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, static inline bool btrfs_root_readonly(const struct btrfs_root *root) { + /* Byte-swap the constant at compile time, root_item::flags is LE */ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; } static inline bool btrfs_root_dead(const struct btrfs_root *root) { + /* Byte-swap the constant at compile time, root_item::flags is LE */ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); @@ -2177,8 +2611,6 @@ BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, chunk_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, log_root, 64); -BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block, - log_root_transid, 64); BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, log_root_level, 8); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, @@ -2212,7 +2644,7 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, int btrfs_super_csum_size(const struct btrfs_super_block *s); const char *btrfs_super_csum_name(u16 csum_type); const char *btrfs_super_csum_driver(u16 csum_type); -size_t __const btrfs_get_num_csums(void); +size_t __attribute_const__ btrfs_get_num_csums(void); /* @@ -2226,11 +2658,12 @@ static inline unsigned int leaf_data_end(const struct extent_buffer *leaf) if (nr == 0) return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); - return btrfs_item_offset_nr(leaf, nr - 1); + return btrfs_item_offset(leaf, nr - 1); } /* struct btrfs_file_extent_item */ -BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, + type, 8); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item, disk_bytenr, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, @@ -2239,6 +2672,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item, generation, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item, num_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, + struct btrfs_file_extent_item, ram_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes, struct btrfs_file_extent_item, disk_num_bytes, 64); BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, @@ -2255,6 +2690,7 @@ static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; } +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, @@ -2281,9 +2717,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, */ static inline u32 btrfs_file_extent_inline_item_len( const struct extent_buffer *eb, - struct btrfs_item *e) + int nr) { - return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; + return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START; } /* btrfs_qgroup_status_item */ @@ -2375,11 +2811,11 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(BTRFS_LEAF_DATA_OFFSET + \ - btrfs_item_offset_nr(leaf, slot))) + btrfs_item_offset(leaf, slot))) #define btrfs_item_ptr_offset(leaf, slot) \ ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \ - btrfs_item_offset_nr(leaf, slot))) + btrfs_item_offset(leaf, slot))) static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length) { @@ -2424,27 +2860,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, enum btrfs_inline_ref_type is_data); u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); -u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes); - -/* - * Use this if we would be adding new items, as we could split nodes as we cow - * down the tree. - */ -static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info, - unsigned num_items) -{ - return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; -} - -/* - * Doing a truncate or a modification won't result in new nodes or leaves, just - * what we need for COW. - */ -static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info, - unsigned num_items) -{ - return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; -} int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, u64 start, u64 num_bytes); @@ -2458,21 +2873,23 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 offset, int metadata, u64 *refs, u64 *flags); -int btrfs_pin_extent(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 num, int reserved); -int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info, +int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, + int reserved); +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes); int btrfs_exclude_logged_extents(struct extent_buffer *eb); int btrfs_cross_ref_exist(struct btrfs_root *root, - u64 objectid, u64 offset, u64 bytenr); + u64 objectid, u64 offset, u64 bytenr, bool strict, + struct btrfs_path *path); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, const struct btrfs_disk_key *key, int level, u64 hint, - u64 empty_size); + u64 empty_size, + enum btrfs_lock_nesting nest); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, @@ -2490,34 +2907,62 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, int full_backref); int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 flags, - int level, int is_data); + struct extent_buffer *eb, u64 flags, int level); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len, int delalloc); -int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, +int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len); -void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref); -int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr); -void btrfs_get_block_group_trimming(struct btrfs_block_group *cache); -void btrfs_put_block_group_trimming(struct btrfs_block_group *cache); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); +/* + * Different levels for to flush space when doing space reservations. + * + * The higher the level, the more methods we try to reclaim space. + */ enum btrfs_reserve_flush_enum { /* If we are in the transaction, we can't flush anything.*/ BTRFS_RESERVE_NO_FLUSH, + /* - * Flushing delalloc may cause deadlock somewhere, in this - * case, use FLUSH LIMIT + * Flush space by: + * - Running delayed inode items + * - Allocating a new chunk */ BTRFS_RESERVE_FLUSH_LIMIT, + + /* + * Flush space by: + * - Running delayed inode items + * - Running delayed refs + * - Running delalloc and waiting for ordered extents + * - Allocating a new chunk + */ BTRFS_RESERVE_FLUSH_EVICT, + + /* + * Flush space by above mentioned methods and by: + * - Running delayed iputs + * - Committing transaction + * + * Can be interrupted by a fatal signal. + */ + BTRFS_RESERVE_FLUSH_DATA, + BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, BTRFS_RESERVE_FLUSH_ALL, + + /* + * Pretty much the same as FLUSH_ALL, but can also steal space from + * global rsv. + * + * Can be interrupted by a fatal signal. + */ + BTRFS_RESERVE_FLUSH_ALL_STEAL, }; enum btrfs_flush_state { @@ -2527,20 +2972,22 @@ enum btrfs_flush_state { FLUSH_DELAYED_REFS = 4, FLUSH_DELALLOC = 5, FLUSH_DELALLOC_WAIT = 6, - ALLOC_CHUNK = 7, - ALLOC_CHUNK_FORCE = 8, - RUN_DELAYED_IPUTS = 9, - COMMIT_TRANS = 10, + FLUSH_DELALLOC_FULL = 7, + ALLOC_CHUNK = 8, + ALLOC_CHUNK_FORCE = 9, + RUN_DELAYED_IPUTS = 10, + COMMIT_TRANS = 11, }; int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); -void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, +void btrfs_subvolume_release_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv); void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); -int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); +int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, + u64 disk_num_bytes, bool noflush); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end); @@ -2557,7 +3004,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, - int level, int *slot); + int *slot); int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2); int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -2568,8 +3015,6 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, struct btrfs_path *path, const struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); -struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); -struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, u64 min_trans); @@ -2582,7 +3027,8 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret); + struct extent_buffer **cow_ret, + enum btrfs_lock_nesting nest); int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -2628,16 +3074,42 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, return btrfs_del_items(trans, root, path, path->slots[0], 1); } -void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - u32 total_data, u32 total_size, int nr); +/* + * Describes a batch of items to insert in a btree. This is used by + * btrfs_insert_empty_items(). + */ +struct btrfs_item_batch { + /* + * Pointer to an array containing the keys of the items to insert (in + * sorted order). + */ + const struct btrfs_key *keys; + /* Pointer to an array containing the data size for each item to insert. */ + const u32 *data_sizes; + /* + * The sum of data sizes for all items. The caller can compute this while + * setting up the data_sizes array, so it ends up being more efficient + * than having btrfs_insert_empty_items() or setup_item_for_insert() + * doing it, as it would avoid an extra loop over a potentially large + * array, and in the case of setup_item_for_insert(), we would be doing + * it while holding a write lock on a leaf and often on upper level nodes + * too, unnecessarily increasing the size of a critical section. + */ + u32 total_data_size; + /* Size of the keys and data_sizes arrays (number of items in the batch). */ + int nr; +}; + +void btrfs_setup_item_for_insert(struct btrfs_root *root, + struct btrfs_path *path, + const struct btrfs_key *key, + u32 data_size); int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - int nr); + const struct btrfs_item_batch *batch); static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2645,13 +3117,52 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, const struct btrfs_key *key, u32 data_size) { - return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); + struct btrfs_item_batch batch; + + batch.keys = key; + batch.data_sizes = &data_size; + batch.total_data_size = data_size; + batch.nr = 1; + + return btrfs_insert_empty_items(trans, root, path, &batch); } -int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq); + +int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *path); + +int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_path *path); + +/* + * Search in @root for a given @key, and store the slot found in @found_key. + * + * @root: The root node of the tree. + * @key: The key we are looking for. + * @found_key: Will hold the found item. + * @path: Holds the current slot/leaf. + * @iter_ret: Contains the value returned from btrfs_search_slot or + * btrfs_get_next_valid_item, whichever was executed last. + * + * The @iter_ret is an output variable that will contain the return value of + * btrfs_search_slot, if it encountered an error, or the value returned from + * btrfs_get_next_valid_item otherwise. That return value can be 0, if a valid + * slot was found, 1 if there were no more leaves, and <0 if there was an error. + * + * It's recommended to use a separate variable for iter_ret and then use it to + * set the function return value so there's no confusion of the 0/1/errno + * values stemming from btrfs_search_slot. + */ +#define btrfs_for_each_slot(root, key, found_key, path, iter_ret) \ + for (iter_ret = btrfs_search_slot(NULL, (root), (key), (path), 0, 0); \ + (iter_ret) >= 0 && \ + (iter_ret = btrfs_get_next_valid_item((root), (found_key), (path))) == 0; \ + (path)->slots[0]++ \ + ) + static inline int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *p, u64 time_seq) { @@ -2660,14 +3171,25 @@ static inline int btrfs_next_old_item(struct btrfs_root *root, return btrfs_next_old_leaf(root, p, time_seq); return 0; } + +/* + * Search the tree again to find a leaf with greater keys. + * + * Returns 0 if it found something or 1 if there are no greater leaves. + * Returns < 0 on error. + */ +static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + return btrfs_next_old_leaf(root, path, 0); +} + static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { return btrfs_next_old_item(root, p, 0); } int btrfs_leaf_free_space(struct extent_buffer *leaf); -int __must_check btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - int update_ref, int for_reloc); +int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, + int for_reloc); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, @@ -2689,35 +3211,27 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) * If we remount the fs to be R/O or umount the fs, the cleaner needn't do * anything except sleeping. This function is used to check the status of * the fs. + * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, + * since setting and checking for SB_RDONLY in the superblock's flags is not + * atomic. */ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) { - return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info); + return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || + btrfs_fs_closing(fs_info); } -static inline void free_fs_info(struct btrfs_fs_info *fs_info) +static inline void btrfs_set_sb_rdonly(struct super_block *sb) { - kfree(fs_info->balance_ctl); - kfree(fs_info->delayed_root); - kfree(fs_info->extent_root); - kfree(fs_info->tree_root); - kfree(fs_info->chunk_root); - kfree(fs_info->dev_root); - kfree(fs_info->csum_root); - kfree(fs_info->quota_root); - kfree(fs_info->uuid_root); - kfree(fs_info->free_space_root); - kfree(fs_info->super_copy); - kfree(fs_info->super_for_commit); - kvfree(fs_info); + sb->s_flags |= SB_RDONLY; + set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); } -/* tree mod log functions from ctree.c */ -u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, - struct seq_list *elem); -int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); +static inline void btrfs_clear_sb_rdonly(struct super_block *sb) +{ + sb->s_flags &= ~SB_RDONLY; + clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +} /* root-item.c */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, @@ -2750,9 +3264,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, u64 subid); int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, u64 subid); -int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, - int (*check_func)(struct btrfs_fs_info *, u8 *, u8, - u64)); +int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); /* dir-item.c */ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, @@ -2769,7 +3281,7 @@ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, - u64 objectid, const char *name, int name_len, + u64 index, const char *name, int name_len, int mod); struct btrfs_dir_item * btrfs_search_dir_index_item(struct btrfs_root *root, @@ -2801,48 +3313,13 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 offset); int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); -/* inode-item.c */ -int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, u64 index); -int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, u64 *index); -int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 objectid); -int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, - struct btrfs_key *location, int mod); - -struct btrfs_inode_extref * -btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - const char *name, int name_len, - u64 inode_objectid, u64 ref_objectid, int ins_len, - int cow); - -struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, - int slot, const char *name, - int name_len); -struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( - struct extent_buffer *leaf, int slot, u64 ref_objectid, - const char *name, int name_len); /* file-item.c */ -struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); -blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u64 offset, u8 *dst); -int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 pos, - u64 disk_offset, u64 disk_num_bytes, - u64 num_bytes, u64 offset, u64 ram_bytes, - u8 compression, u8 encryption, u16 other_encoding); +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst); +int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid, u64 pos, + u64 num_bytes); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -2850,88 +3327,113 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums); -blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, - u64 file_start, int contig); +blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, + u64 offset, bool one_ordered); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, - struct list_head *list, int search_commit); + struct list_head *list, int search_commit, + bool nowait); void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, const struct btrfs_path *path, struct btrfs_file_extent_item *fi, const bool new_inline, struct extent_map *em); +int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, + u64 len); +int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, + u64 len); +void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size); +u64 btrfs_file_extent_end(const struct btrfs_path *path); /* inode.c */ -struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, - u64 start, u64 len); +void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num); +void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio, + int mirror_num, enum btrfs_compression_type compress_type); +int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, + u32 pgoff, u8 *csum, const u8 * const csum_expected); +int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, u32 pgoff); +unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, + u64 start, u64 end); +int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, u32 pgoff); noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, - u64 *ram_bytes); + u64 *ram_bytes, bool nowait, bool strict); void __btrfs_del_delalloc_inode(struct btrfs_root *root, struct btrfs_inode *inode); struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *dir, struct btrfs_inode *inode, const char *name, int name_len); int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_inode *parent_inode, struct btrfs_inode *inode, const char *name, int name_len, int add_backref, u64 index); int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry); -int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, - int front); -int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, u64 new_size, - u32 min_type); - -int btrfs_start_delalloc_snapshot(struct btrfs_root *root); -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr); -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, +int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, + int front); + +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, + bool in_reclaim_context); +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state); -int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - struct btrfs_root *parent_root, - u64 new_dirid); +struct btrfs_new_inode_args { + /* Input */ + struct inode *dir; + struct dentry *dentry; + struct inode *inode; + bool orphan; + bool subvol; + + /* + * Output from btrfs_new_inode_prepare(), input to + * btrfs_create_new_inode(). + */ + struct posix_acl *default_acl; + struct posix_acl *acl; +}; +int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args, + unsigned int *trans_num_items); +int btrfs_create_new_inode(struct btrfs_trans_handle *trans, + struct btrfs_new_inode_args *args); +void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args); +struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns, + struct inode *dir); void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, - unsigned *bits); + u32 bits); void btrfs_clear_delalloc_extent(struct inode *inode, - struct extent_state *state, unsigned *bits); + struct extent_state *state, u32 bits); void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, struct extent_state *other); void btrfs_split_delalloc_extent(struct inode *inode, struct extent_state *orig, u64 split); -int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, - unsigned long bio_flags); -void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end); +void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end); vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); -int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); -int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); void btrfs_free_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); int __init btrfs_init_cachep(void); void __cold btrfs_destroy_cachep(void); -struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, +struct inode *btrfs_iget_path(struct super_block *s, u64 ino, struct btrfs_root *root, struct btrfs_path *path); -struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root); +struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root); struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 end); int btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode); + struct btrfs_root *root, struct btrfs_inode *inode); int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode); + struct btrfs_root *root, struct btrfs_inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); -int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); +int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size); void btrfs_add_delayed_iput(struct inode *inode); void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info); int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info); @@ -2942,23 +3444,55 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, struct btrfs_trans_handle *trans, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint); -int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, struct writeback_control *wbc); -int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end); -void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, - u64 end, int uptodate); +int btrfs_writepage_cow_fixup(struct page *page); +void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, + struct page *page, u64 start, + u64 end, bool uptodate); +int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info, + int compress_type); +int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode, + u64 file_offset, u64 disk_bytenr, + u64 disk_io_size, + struct page **pages); +ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, + struct btrfs_ioctl_encoded_io_args *encoded); +ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, + const struct btrfs_ioctl_encoded_io_args *encoded); + +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); + extern const struct dentry_operations btrfs_dentry_operations; +/* Inode locking type flags, by default the exclusive lock is taken */ +#define BTRFS_ILOCK_SHARED (1U << 0) +#define BTRFS_ILOCK_TRY (1U << 1) +#define BTRFS_ILOCK_MMAP (1U << 2) + +int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags); +void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags); +void btrfs_update_inode_bytes(struct btrfs_inode *inode, + const u64 add_bytes, + const u64 del_bytes); +void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end); + /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int btrfs_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa); int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); int __pure btrfs_is_empty_uuid(u8 *uuid); -int btrfs_defrag_file(struct inode *inode, struct file *file, +int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, - u64 newer_than, unsigned long max_pages); + u64 newer_than, unsigned long max_to_defrag); void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, @@ -2968,37 +3502,33 @@ void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int __init btrfs_auto_defrag_init(void); void __cold btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, - struct btrfs_inode *inode); + struct btrfs_inode *inode, u32 extent_thresh); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end, - int skip_pinned); extern const struct file_operations btrfs_file_operations; -int __btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - struct btrfs_path *path, u64 start, u64 end, - u64 *drop_end, int drop_cache, - int replace_extent, - u32 extent_item_size, - int *key_inserted); int btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, u64 start, - u64 end, int drop_cache); -int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, - const u64 start, const u64 end, - struct btrfs_clone_extent_info *clone_info, + struct btrfs_root *root, struct btrfs_inode *inode, + struct btrfs_drop_extents_args *args); +int btrfs_replace_file_extents(struct btrfs_inode *inode, + struct btrfs_path *path, const u64 start, + const u64 end, + struct btrfs_replace_extent_info *extent_info, struct btrfs_trans_handle **trans_out); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end); +ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, + const struct btrfs_ioctl_encoded_io_args *encoded); int btrfs_release_file(struct inode *inode, struct file *file); -int btrfs_dirty_pages(struct inode *inode, struct page **pages, +int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, - struct extent_state **cached); + struct extent_state **cached, bool noreserve); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); +int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes, bool nowait); +void btrfs_check_nocow_unlock(struct btrfs_inode *inode); +bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, + u64 *delalloc_start_ret, u64 *delalloc_end_ret); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, @@ -3008,17 +3538,37 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int btrfs_parse_options(struct btrfs_fs_info *info, char *options, unsigned long new_flags); int btrfs_sync_fs(struct super_block *sb, int wait); +char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, + u64 subvol_objectid); static inline __printf(2, 3) __cold void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) { } -#ifdef CONFIG_PRINTK +#ifdef CONFIG_PRINTK_INDEX + +#define btrfs_printk(fs_info, fmt, args...) \ +do { \ + printk_index_subsys_emit("%sBTRFS %s (device %s): ", NULL, fmt); \ + _btrfs_printk(fs_info, fmt, ##args); \ +} while (0) + +__printf(2, 3) +__cold +void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); + +#elif defined(CONFIG_PRINTK) + +#define btrfs_printk(fs_info, fmt, args...) \ + _btrfs_printk(fs_info, fmt, ##args) + __printf(2, 3) __cold -void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); +void _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...); + #else + #define btrfs_printk(fs_info, fmt, args...) \ btrfs_no_printk(fs_info, fmt, ##args) #endif @@ -3171,6 +3721,52 @@ static inline void assertfail(const char *expr, const char* file, int line) { } #define ASSERT(expr) (void)(expr) #endif +#if BITS_PER_LONG == 32 +#define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT) +/* + * The warning threshold is 5/8th of the MAX_LFS_FILESIZE that limits the logical + * addresses of extents. + * + * For 4K page size it's about 10T, for 64K it's 160T. + */ +#define BTRFS_32BIT_EARLY_WARN_THRESHOLD (BTRFS_32BIT_MAX_FILE_SIZE * 5 / 8) +void btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info); +void btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info); +#endif + +/* + * Get the correct offset inside the page of extent buffer. + * + * @eb: target extent buffer + * @start: offset inside the extent buffer + * + * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. + */ +static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb, + unsigned long offset) +{ + /* + * For sectorsize == PAGE_SIZE case, eb->start will always be aligned + * to PAGE_SIZE, thus adding it won't cause any difference. + * + * For sectorsize < PAGE_SIZE, we must only read the data that belongs + * to the eb, thus we have to take the eb->start into consideration. + */ + return offset_in_page(offset + eb->start); +} + +static inline unsigned long get_eb_page_index(unsigned long offset) +{ + /* + * For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough. + * + * For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE, + * and have ensured that all tree blocks are contained in one page, + * thus we always get index == 0. + */ + return offset >> PAGE_SHIFT; +} + /* * Use that for functions that are conditionally exported for sanity tests but * otherwise static @@ -3198,21 +3794,26 @@ const char * __attribute_const__ btrfs_decode_error(int errno); __cold void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, const char *function, - unsigned int line, int errno); + unsigned int line, int errno, bool first_hit); + +bool __cold abort_should_print_stack(int errno); /* * Call btrfs_abort_transaction as early as possible when an error condition is - * detected, that way the exact line number is reported. + * detected, that way the exact stack trace is reported for some errors. */ #define btrfs_abort_transaction(trans, errno) \ do { \ + bool first = false; \ /* Report first abort since mount */ \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ &((trans)->fs_info->fs_state))) { \ - if ((errno) != -EIO) { \ - WARN(1, KERN_DEBUG \ + first = true; \ + if (WARN(abort_should_print_stack(errno), \ + KERN_DEBUG \ "BTRFS: Transaction aborted (error %d)\n", \ - (errno)); \ + (errno))) { \ + /* Stack trace printed. */ \ } else { \ btrfs_debug((trans)->fs_info, \ "Transaction aborted (error %d)", \ @@ -3220,15 +3821,34 @@ do { \ } \ } \ __btrfs_abort_transaction((trans), __func__, \ - __LINE__, (errno)); \ + __LINE__, (errno), first); \ } while (0) +#ifdef CONFIG_PRINTK_INDEX + #define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ -do { \ - __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ - (errno), fmt, ##args); \ +do { \ + printk_index_subsys_emit( \ + "BTRFS: error (device %s%s) in %s:%d: errno=%d %s", \ + KERN_CRIT, fmt); \ + __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ + (errno), fmt, ##args); \ } while (0) +#else + +#define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ + __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ + (errno), fmt, ##args) + +#endif + +#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ + &(fs_info)->fs_state))) +#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ + (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ + &(fs_info)->fs_state))) + __printf(5, 6) __cold void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, @@ -3372,17 +3992,19 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -struct posix_acl *btrfs_get_acl(struct inode *inode, int type); -int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -int btrfs_init_acl(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir); +struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); +int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type); +int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode, + struct posix_acl *acl, int type); #else #define btrfs_get_acl NULL #define btrfs_set_acl NULL -static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) +static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct posix_acl *acl, + int type) { - return 0; + return -EOPNOTSUPP; } #endif @@ -3392,8 +4014,8 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_recover_relocation(struct btrfs_root *root); -int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); +int btrfs_recover_relocation(struct btrfs_fs_info *fs_info); +int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len); int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *cow); @@ -3401,6 +4023,10 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve); int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending); +int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info); +struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, + u64 bytenr); +int btrfs_should_ignore_reloc_root(struct btrfs_root *root); /* scrub.c */ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, @@ -3412,16 +4038,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *info); int btrfs_scrub_cancel_dev(struct btrfs_device *dev); int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress); -static inline void btrfs_init_full_stripe_locks_tree( - struct btrfs_full_stripe_locks_tree *locks_root) -{ - locks_root->root = RB_ROOT; - mutex_init(&locks_root->lock); -} /* dev-replace.c */ void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); -void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) @@ -3429,21 +4048,6 @@ static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) btrfs_bio_counter_sub(fs_info, 1); } -/* reada.c */ -struct reada_control { - struct btrfs_fs_info *fs_info; /* tree to prefetch */ - struct btrfs_key key_start; - struct btrfs_key key_end; /* exclusive */ - atomic_t elems; - struct kref refcnt; - wait_queue_head_t wait; -}; -struct reada_control *btrfs_reada_add(struct btrfs_root *root, - struct btrfs_key *start, struct btrfs_key *end); -int btrfs_reada_wait(void *handle); -void btrfs_reada_detach(void *handle); -int btree_readahead_hook(struct extent_buffer *eb, int err); - static inline int is_fstree(u64 rootid) { if (rootid == BTRFS_FS_TREE_OBJECTID || @@ -3458,13 +4062,40 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) return signal_pending(current); } -#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len)) +/* verity.c */ +#ifdef CONFIG_FS_VERITY + +extern const struct fsverity_operations btrfs_verityops; +int btrfs_drop_verity_items(struct btrfs_inode *inode); +int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size); + +BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item, + encryption, 8); +BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item, + size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption, + struct btrfs_verity_descriptor_item, encryption, 8); +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size, + struct btrfs_verity_descriptor_item, size, 64); + +#else + +static inline int btrfs_drop_verity_items(struct btrfs_inode *inode) +{ + return 0; +} + +static inline int btrfs_get_verity_descriptor(struct inode *inode, void *buf, + size_t buf_size) +{ + return -EPERM; +} + +#endif /* Sanity test specific functions */ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -void btrfs_test_inode_set_ops(struct inode *inode); void btrfs_test_destroy_inode(struct inode *inode); - static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) { return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); @@ -3476,4 +4107,22 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) } #endif +static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) +{ + return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; +} + +/* + * We use page status Private2 to indicate there is an ordered extent with + * unfinished IO. + * + * Rename the Private2 accessors to Ordered, to improve readability. + */ +#define PageOrdered(page) PagePrivate2(page) +#define SetPageOrdered(page) SetPagePrivate2(page) +#define ClearPageOrdered(page) ClearPagePrivate2(page) +#define folio_test_ordered(folio) folio_test_private_2(folio) +#define folio_set_ordered(folio) folio_set_private_2(folio) +#define folio_clear_ordered(folio) folio_clear_private_2(folio) + #endif |