aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 14:54:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-03 14:54:52 -0700
commit353767e4aaeb7bc818273dfacbb01dd36a9db47a (patch)
tree8ad42b0ce92f1eef0e3b0f61149532f855aa6211 /include
parentMerge tag 'efi-efivars-removal-for-v5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi (diff)
parentbtrfs: don't call btrfs_page_set_checked in finish_compressed_bio_read (diff)
downloadwireguard-linux-353767e4aaeb7bc818273dfacbb01dd36a9db47a.tar.xz
wireguard-linux-353767e4aaeb7bc818273dfacbb01dd36a9db47a.zip
Merge tag 'for-5.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "This brings some long awaited changes, the send protocol bump, otherwise lots of small improvements and fixes. The main core part is reworking bio handling, cleaning up the submission and endio and improving error handling. There are some changes outside of btrfs adding helpers or updating API, listed at the end of the changelog. Features: - sysfs: - export chunk size, in debug mode add tunable for setting its size - show zoned among features (was only in debug mode) - show commit stats (number, last/max/total duration) - send protocol updated to 2 - new commands: - ability write larger data chunks than 64K - send raw compressed extents (uses the encoded data ioctls), ie. no decompression on send side, no compression needed on receive side if supported - send 'otime' (inode creation time) among other timestamps - send file attributes (a.k.a file flags and xflags) - this is first version bump, backward compatibility on send and receive side is provided - there are still some known and wanted commands that will be implemented in the near future, another version bump will be needed, however we want to minimize that to avoid causing usability issues - print checksum type and implementation at mount time - don't print some messages at mount (mentioned as people asked about it), we want to print messages namely for new features so let's make some space for that - big metadata - this has been supported for a long time and is not a feature that's worth mentioning - skinny metadata - same reason, set by default by mkfs Performance improvements: - reduced amount of reserved metadata for delayed items - when inserted items can be batched into one leaf - when deleting batched directory index items - when deleting delayed items used for deletion - overall improved count of files/sec, decreased subvolume lock contention - metadata item access bounds checker micro-optimized, with a few percent of improved runtime for metadata-heavy operations - increase direct io limit for read to 256 sectors, improved throughput by 3x on sample workload Notable fixes: - raid56 - reduce parity writes, skip sectors of stripe when there are no data updates - restore reading from on-disk data instead of using stripe cache, this reduces chances to damage correct data due to RMW cycle - refuse to replay log with unknown incompat read-only feature bit set - zoned - fix page locking when COW fails in the middle of allocation - improved tracking of active zones, ZNS drives may limit the number and there are ENOSPC errors due to that limit and not actual lack of space - adjust maximum extent size for zone append so it does not cause late ENOSPC due to underreservation - mirror reading error messages show the mirror number - don't fallback to buffered IO for NOWAIT direct IO writes, we don't have the NOWAIT semantics for buffered io yet - send, fix sending link commands for existing file paths when there are deleted and created hardlinks for same files - repair all mirrors for profiles with more than 1 copy (raid1c34) - fix repair of compressed extents, unify where error detection and repair happen Core changes: - bio completion cleanups - don't double defer compression bios - simplify endio workqueues - add more data to btrfs_bio to avoid allocation for read requests - rework bio error handling so it's same what block layer does, the submission works and errors are consumed in endio - when asynchronous bio offload fails fall back to synchronous checksum calculation to avoid errors under writeback or memory pressure - new trace points - raid56 events - ordered extent operations - super block log_root_transid deprecated (never used) - mixed_backref and big_metadata sysfs feature files removed, they've been default for sufficiently long time, there are no known users and mixed_backref could be confused with mixed_groups Non-btrfs changes, API updates: - minor highmem API update to cover const arguments - switch all kmap/kmap_atomic to kmap_local - remove redundant flush_dcache_page() - address_space_operations::writepage callback removed - add bdev_max_segments() helper" * tag 'for-5.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (163 commits) btrfs: don't call btrfs_page_set_checked in finish_compressed_bio_read btrfs: fix repair of compressed extents btrfs: remove the start argument to check_data_csum and export btrfs: pass a btrfs_bio to btrfs_repair_one_sector btrfs: simplify the pending I/O counting in struct compressed_bio btrfs: repair all known bad mirrors btrfs: merge btrfs_dev_stat_print_on_error with its only caller btrfs: join running log transaction when logging new name btrfs: simplify error handling in btrfs_lookup_dentry btrfs: send: always use the rbtree based inode ref management infrastructure btrfs: send: fix sending link commands for existing file paths btrfs: send: introduce recorded_ref_alloc and recorded_ref_free btrfs: zoned: wait until zone is finished when allocation didn't progress btrfs: zoned: write out partially allocated region btrfs: zoned: activate necessary block group btrfs: zoned: activate metadata block group on flush_space btrfs: zoned: disable metadata overcommit for zoned btrfs: zoned: introduce space_info->active_total_bytes btrfs: zoned: finish least available block group on data bg allocation btrfs: let can_allocate_chunk return error ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/highmem-internal.h10
-rw-r--r--include/trace/events/btrfs.h158
-rw-r--r--include/uapi/linux/btrfs.h10
4 files changed, 177 insertions, 6 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d04bdf549efa..dccdf1551c62 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1167,6 +1167,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev)
return queue_max_zone_append_sectors(bdev_get_queue(bdev));
}
+static inline unsigned int bdev_max_segments(struct block_device *bdev)
+{
+ return queue_max_segments(bdev_get_queue(bdev));
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
int retval = 512;
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index cddb42ff0473..034b1106d022 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -8,7 +8,7 @@
#ifdef CONFIG_KMAP_LOCAL
void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
-void kunmap_local_indexed(void *vaddr);
+void kunmap_local_indexed(const void *vaddr);
void kmap_local_fork(struct task_struct *tsk);
void __kmap_local_sched_out(void);
void __kmap_local_sched_in(void);
@@ -89,7 +89,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
return __kmap_local_pfn_prot(pfn, kmap_prot);
}
-static inline void __kunmap_local(void *vaddr)
+static inline void __kunmap_local(const void *vaddr)
{
kunmap_local_indexed(vaddr);
}
@@ -121,7 +121,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
return __kmap_local_pfn_prot(pfn, kmap_prot);
}
-static inline void __kunmap_atomic(void *addr)
+static inline void __kunmap_atomic(const void *addr)
{
kunmap_local_indexed(addr);
pagefault_enable();
@@ -197,7 +197,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
return kmap_local_page(pfn_to_page(pfn));
}
-static inline void __kunmap_local(void *addr)
+static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
@@ -224,7 +224,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
return kmap_atomic(pfn_to_page(pfn));
}
-static inline void __kunmap_atomic(void *addr)
+static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 9ae94ef3e270..73df80d462dc 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -30,6 +30,8 @@ struct btrfs_qgroup;
struct extent_io_tree;
struct prelim_ref;
struct btrfs_space_info;
+struct btrfs_raid_bio;
+struct raid56_bio_trace_info;
#define show_ref_type(type) \
__print_symbolic(type, \
@@ -596,6 +598,70 @@ DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put,
TP_ARGS(inode, ordered)
);
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_lookup,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_lookup_range,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_lookup_first_range,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_lookup_for_logging,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_lookup_first,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_split,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_dec_test_pending,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_mark_finished,
+
+ TP_PROTO(const struct btrfs_inode *inode,
+ const struct btrfs_ordered_extent *ordered),
+
+ TP_ARGS(inode, ordered)
+);
+
DECLARE_EVENT_CLASS(btrfs__writepage,
TP_PROTO(const struct page *page, const struct inode *inode,
@@ -2258,6 +2324,98 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
TP_ARGS(fs_info, sinfo, old, diff)
);
+DECLARE_EVENT_CLASS(btrfs_raid56_bio,
+
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, full_stripe )
+ __field( u64, physical )
+ __field( u64, devid )
+ __field( u32, offset )
+ __field( u32, len )
+ __field( u8, opf )
+ __field( u8, total_stripes )
+ __field( u8, real_stripes )
+ __field( u8, nr_data )
+ __field( u8, stripe_nr )
+ ),
+
+ TP_fast_assign_btrfs(rbio->bioc->fs_info,
+ __entry->full_stripe = rbio->bioc->raid_map[0];
+ __entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ __entry->len = bio->bi_iter.bi_size;
+ __entry->opf = bio_op(bio);
+ __entry->devid = trace_info->devid;
+ __entry->offset = trace_info->offset;
+ __entry->stripe_nr = trace_info->stripe_nr;
+ __entry->total_stripes = rbio->bioc->num_stripes;
+ __entry->real_stripes = rbio->real_stripes;
+ __entry->nr_data = rbio->nr_data;
+ ),
+ /*
+ * For type output, we need to output things like "DATA1"
+ * (the first data stripe), "DATA2" (the second data stripe),
+ * "PQ1" (P stripe),"PQ2" (Q stripe), "REPLACE0" (replace target device).
+ */
+ TP_printk_btrfs(
+"full_stripe=%llu devid=%lld type=%s%d offset=%d opf=0x%x physical=%llu len=%u",
+ __entry->full_stripe, __entry->devid,
+ (__entry->stripe_nr < __entry->nr_data) ? "DATA" :
+ ((__entry->stripe_nr < __entry->real_stripes) ? "PQ" :
+ "REPLACE"),
+ (__entry->stripe_nr < __entry->nr_data) ?
+ (__entry->stripe_nr + 1) :
+ ((__entry->stripe_nr < __entry->real_stripes) ?
+ (__entry->stripe_nr - __entry->nr_data + 1) : 0),
+ __entry->offset, __entry->opf, __entry->physical, __entry->len)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 3d0edbe3b991..7ada84e4a3ed 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -777,11 +777,19 @@ struct btrfs_ioctl_received_subvol_args {
*/
#define BTRFS_SEND_FLAG_VERSION 0x8
+/*
+ * Send compressed data using the ENCODED_WRITE command instead of decompressing
+ * the data and sending it with the WRITE command. This requires protocol
+ * version >= 2.
+ */
+#define BTRFS_SEND_FLAG_COMPRESSED 0x10
+
#define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
BTRFS_SEND_FLAG_OMIT_END_CMD | \
- BTRFS_SEND_FLAG_VERSION)
+ BTRFS_SEND_FLAG_VERSION | \
+ BTRFS_SEND_FLAG_COMPRESSED)
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */