aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/platform/acgcc.h9
-rw-r--r--include/asm-generic/cacheflush.h6
-rw-r--r--include/asm-generic/io.h2
-rw-r--r--include/clocksource/arm_arch_timer.h2
-rw-r--r--include/kunit/test.h6
-rw-r--r--include/linux/acpi.h5
-rw-r--r--include/linux/arch_topology.h5
-rw-r--r--include/linux/arm-smccc.h10
-rw-r--r--include/linux/ata.h1
-rw-r--r--include/linux/backing-dev.h25
-rw-r--r--include/linux/bio.h147
-rw-r--r--include/linux/blk-crypto-profile.h166
-rw-r--r--include/linux/blk-integrity.h183
-rw-r--r--include/linux/blk-mq.h583
-rw-r--r--include/linux/blk_types.h55
-rw-r--r--include/linux/blkdev.h949
-rw-r--r--include/linux/blktrace_api.h2
-rw-r--r--include/linux/bpf.h7
-rw-r--r--include/linux/bpf_types.h8
-rw-r--r--include/linux/bvec.h2
-rw-r--r--include/linux/cc_platform.h88
-rw-r--r--include/linux/cdrom.h1
-rw-r--r--include/linux/compiler-gcc.h10
-rw-r--r--include/linux/compiler_types.h5
-rw-r--r--include/linux/context_tracking.h2
-rw-r--r--include/linux/cpuhotplug.h4
-rw-r--r--include/linux/debug_locks.h2
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/dma-resv.h2
-rw-r--r--include/linux/dsa/mv88e6xxx.h13
-rw-r--r--include/linux/dsa/ocelot.h49
-rw-r--r--include/linux/dsa/sja1105.h40
-rw-r--r--include/linux/elevator.h181
-rw-r--r--include/linux/elfcore.h2
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/filter.h7
-rw-r--r--include/linux/firewire.h11
-rw-r--r--include/linux/flex_proportions.h9
-rw-r--r--include/linux/fortify-string.h77
-rw-r--r--include/linux/fs.h14
-rw-r--r--include/linux/fscrypt.h3
-rw-r--r--include/linux/genhd.h39
-rw-r--r--include/linux/gfp.h22
-rw-r--r--include/linux/highmem-internal.h11
-rw-r--r--include/linux/highmem.h37
-rw-r--r--include/linux/huge_mm.h15
-rw-r--r--include/linux/ieee80211.h30
-rw-r--r--include/linux/iomap.h5
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/irq_work.h8
-rw-r--r--include/linux/irqchip.h20
-rw-r--r--include/linux/irqdesc.h9
-rw-r--r--include/linux/kasan.h9
-rw-r--r--include/linux/kernel.h13
-rw-r--r--include/linux/kernel_stat.h1
-rw-r--r--include/linux/keyslot-manager.h120
-rw-r--r--include/linux/ksm.h4
-rw-r--r--include/linux/libata.h15
-rw-r--r--include/linux/lockdep.h17
-rw-r--r--include/linux/lockdep_types.h2
-rw-r--r--include/linux/mem_encrypt.h4
-rw-r--r--include/linux/memcontrol.h264
-rw-r--r--include/linux/memory.h5
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h10
-rw-r--r--include/linux/mm.h239
-rw-r--r--include/linux/mm_inline.h103
-rw-r--r--include/linux/mm_types.h81
-rw-r--r--include/linux/mmc/host.h4
-rw-r--r--include/linux/mmdebug.h20
-rw-r--r--include/linux/netfs.h77
-rw-r--r--include/linux/nvme-fc-driver.h7
-rw-r--r--include/linux/nvme-rdma.h2
-rw-r--r--include/linux/nvme.h30
-rw-r--r--include/linux/of_fdt.h1
-rw-r--r--include/linux/page-flags.h290
-rw-r--r--include/linux/page_idle.h99
-rw-r--r--include/linux/page_owner.h8
-rw-r--r--include/linux/page_ref.h158
-rw-r--r--include/linux/pagemap.h585
-rw-r--r--include/linux/part_stat.h1
-rw-r--r--include/linux/percpu-refcount.h33
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/platform_data/usb-omap1.h2
-rw-r--r--include/linux/preempt.h5
-rw-r--r--include/linux/qcom_scm.h71
-rw-r--r--include/linux/rmap.h10
-rw-r--r--include/linux/rwlock.h15
-rw-r--r--include/linux/rwlock_api_smp.h6
-rw-r--r--include/linux/sbitmap.h24
-rw-r--r--include/linux/sched.h65
-rw-r--r--include/linux/sched/idle.h4
-rw-r--r--include/linux/sched/mm.h29
-rw-r--r--include/linux/sched/task.h3
-rw-r--r--include/linux/sched/task_stack.h4
-rw-r--r--include/linux/sched/topology.h9
-rw-r--r--include/linux/secretmem.h2
-rw-r--r--include/linux/signal.h6
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/spi/spi.h3
-rw-r--r--include/linux/spinlock.h13
-rw-r--r--include/linux/spinlock_api_smp.h9
-rw-r--r--include/linux/spinlock_up.h1
-rw-r--r--include/linux/stddef.h65
-rw-r--r--include/linux/string.h44
-rw-r--r--include/linux/swap.h17
-rw-r--r--include/linux/syscalls.h7
-rw-r--r--include/linux/t10-pi.h2
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/linux/topology.h13
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_recursion.h49
-rw-r--r--include/linux/user_namespace.h2
-rw-r--r--include/linux/vmstat.h113
-rw-r--r--include/linux/wait.h3
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--include/linux/writeback.h23
-rw-r--r--include/linux/ww_mutex.h15
-rw-r--r--include/linux/xz.h106
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/mctp.h2
-rw-r--r--include/net/mptcp.h4
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h1
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/net/netns/netfilter.h6
-rw-r--r--include/net/sctp/sm.h6
-rw-r--r--include/net/sock.h9
-rw-r--r--include/net/tcp.h5
-rw-r--r--include/net/tls.h11
-rw-r--r--include/net/udp.h5
-rw-r--r--include/scsi/sas.h12
-rw-r--r--include/scsi/scsi_cmnd.h3
-rw-r--r--include/scsi/scsi_device.h2
-rw-r--r--include/soc/arc/timers.h4
-rw-r--r--include/soc/mscc/ocelot.h55
-rw-r--r--include/soc/mscc/ocelot_ptp.h3
-rw-r--r--include/soc/mscc/ocelot_vcap.h4
-rw-r--r--include/sound/hda_codec.h1
-rw-r--r--include/trace/events/block.h6
-rw-r--r--include/trace/events/cachefiles.h4
-rw-r--r--include/trace/events/erofs.h2
-rw-r--r--include/trace/events/io_uring.h61
-rw-r--r--include/trace/events/kyber.h19
-rw-r--r--include/trace/events/pagemap.h46
-rw-r--r--include/trace/events/writeback.h28
-rw-r--r--include/uapi/asm-generic/fcntl.h4
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--include/uapi/drm/mga_drm.h22
-rw-r--r--include/uapi/linux/bcache.h445
-rw-r--r--include/uapi/linux/btrfs.h11
-rw-r--r--include/uapi/linux/cdrom.h19
-rw-r--r--include/uapi/linux/dlm_device.h4
-rw-r--r--include/uapi/linux/futex.h25
-rw-r--r--include/uapi/linux/hyperv.h2
-rw-r--r--include/uapi/linux/io_uring.h1
-rw-r--r--include/uapi/linux/mctp.h7
-rw-r--r--include/uapi/linux/perf_event.h34
-rw-r--r--include/uapi/linux/stddef.h37
-rw-r--r--include/uapi/linux/xfrm.h15
-rw-r--r--include/uapi/misc/habanalabs.h6
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h4
-rw-r--r--include/uapi/sound/asoc.h4
-rw-r--r--include/xen/xen-ops.h15
164 files changed, 3881 insertions, 2940 deletions
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index fb172a03a753..20ecb004f5a4 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -22,9 +22,14 @@ typedef __builtin_va_list va_list;
#define va_arg(v, l) __builtin_va_arg(v, l)
#define va_copy(d, s) __builtin_va_copy(d, s)
#else
+#ifdef __KERNEL__
#include <linux/stdarg.h>
-#endif
-#endif
+#else
+/* Used to build acpi tools */
+#include <stdarg.h>
+#endif /* __KERNEL__ */
+#endif /* ACPI_USE_BUILTIN_STDARG */
+#endif /* ! va_arg */
#define ACPI_INLINE __inline__
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 4a674db4e1fa..fedc0dfa4877 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
static inline void flush_dcache_page(struct page *page)
{
}
+
+static inline void flush_dcache_folio(struct folio *folio) { }
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
#endif
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
#ifndef flush_dcache_mmap_lock
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index cc7338f9e0d1..7ce93aaf69f8 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -957,7 +957,7 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
#ifndef iounmap
#define iounmap iounmap
-static inline void iounmap(void __iomem *addr)
+static inline void iounmap(volatile void __iomem *addr)
{
}
#endif
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 73c7139c866f..e715bdb720d5 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -24,7 +24,7 @@
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
- ARCH_TIMER_REG_TVAL,
+ ARCH_TIMER_REG_CVAL,
};
enum arch_timer_ppi_nr {
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 24b40e5c160b..018e776a34b9 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -613,7 +613,7 @@ void kunit_remove_resource(struct kunit *test, struct kunit_resource *res);
* and is automatically cleaned up after the test case concludes. See &struct
* kunit_resource for more information.
*/
-void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags);
+void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp);
/**
* kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*.
@@ -657,9 +657,9 @@ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp)
*
* See kcalloc() and kunit_kmalloc_array() for more information.
*/
-static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags)
+static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t gfp)
{
- return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO);
+ return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO);
}
void kunit_cleanup(struct kunit *test);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 974d497a897d..fbc2146050a4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1353,6 +1353,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
#ifdef CONFIG_ACPI_PPTT
int acpi_pptt_cpu_is_thread(unsigned int cpu);
int find_acpi_cpu_topology(unsigned int cpu, int level);
+int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
@@ -1365,6 +1366,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
{
return -EINVAL;
}
+static inline int find_acpi_cpu_topology_cluster(unsigned int cpu)
+{
+ return -EINVAL;
+}
static inline int find_acpi_cpu_topology_package(unsigned int cpu)
{
return -EINVAL;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index f180240dc95f..b97cea83b25e 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -62,10 +62,12 @@ void topology_set_thermal_pressure(const struct cpumask *cpus,
struct cpu_topology {
int thread_id;
int core_id;
+ int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
+ cpumask_t cluster_sibling;
cpumask_t llc_sibling;
};
@@ -73,13 +75,16 @@ struct cpu_topology {
extern struct cpu_topology cpu_topology[NR_CPUS];
#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id)
+#define topology_cluster_id(cpu) (cpu_topology[cpu].cluster_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+#define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling)
#define topology_llc_cpumask(cpu) (&cpu_topology[cpu].llc_sibling)
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+const struct cpumask *cpu_clustergroup_mask(int cpu);
void update_siblings_masks(unsigned int cpu);
void remove_cpu_topology(unsigned int cpuid);
void reset_cpu_topology(void);
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 7d1cabe15262..63ccb5252190 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
* from register 0 to 3 on return from the SMC instruction. An optional
* quirk structure provides vendor specific behavior.
*/
+#ifdef CONFIG_HAVE_ARM_SMCCC
asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3, unsigned long a4,
unsigned long a5, unsigned long a6, unsigned long a7,
struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
+#else
+static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3, unsigned long a4,
+ unsigned long a5, unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *res, struct arm_smccc_quirk *quirk)
+{
+ *res = (struct arm_smccc_res){};
+}
+#endif
/**
* __arm_smccc_hvc() - make HVC calls
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 1b44f40c7700..199e47e97d64 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -329,6 +329,7 @@ enum {
ATA_LOG_SECURITY = 0x06,
ATA_LOG_SATA_SETTINGS = 0x08,
ATA_LOG_ZONED_INFORMATION = 0x09,
+ ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
/* Identify device SATA settings log:*/
ATA_LOG_DEVSLP_OFFSET = 0x30,
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ac7f231b8825..9c14f0a8dbe5 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -12,13 +12,13 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/sched.h>
-#include <linux/blkdev.h>
#include <linux/device.h>
#include <linux/writeback.h>
-#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
#include <linux/slab.h>
+struct blkcg;
+
static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
{
kref_get(&bdi->refcnt);
@@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
return atomic_long_read(&bdi->tot_write_bandwidth);
}
-static inline void __add_wb_stat(struct bdi_writeback *wb,
+static inline void wb_stat_mod(struct bdi_writeback *wb,
enum wb_stat_item item, s64 amount)
{
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
@@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, 1);
+ wb_stat_mod(wb, item, 1);
}
static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, -1);
+ wb_stat_mod(wb, item, -1);
}
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
@@ -133,20 +133,7 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb)
return test_bit(WB_writeback_running, &wb->state);
}
-static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
-{
- struct super_block *sb;
-
- if (!inode)
- return &noop_backing_dev_info;
-
- sb = inode->i_sb;
-#ifdef CONFIG_BLOCK
- if (sb_is_blkdev_sb(sb))
- return I_BDEV(inode)->bd_disk->bdi;
-#endif
- return sb->s_bdi;
-}
+struct backing_dev_info *inode_to_bdi(struct inode *inode);
static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
{
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 00952e92eae1..fe6bdfbbef66 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -6,19 +6,10 @@
#define __LINUX_BIO_H
#include <linux/mempool.h>
-#include <linux/ioprio.h>
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
#include <linux/uio.h>
-#define BIO_DEBUG
-
-#ifdef BIO_DEBUG
-#define BIO_BUG_ON BUG_ON
-#else
-#define BIO_BUG_ON
-#endif
-
#define BIO_MAX_VECS 256U
static inline unsigned int bio_max_segs(unsigned int nr_segs)
@@ -78,22 +69,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio)
bio_op(bio) == REQ_OP_WRITE_ZEROES;
}
-static inline bool bio_mergeable(struct bio *bio)
-{
- if (bio->bi_opf & REQ_NOMERGE_FLAGS)
- return false;
-
- return true;
-}
-
-static inline unsigned int bio_cur_bytes(struct bio *bio)
-{
- if (bio_has_data(bio))
- return bio_iovec(bio).bv_len;
- else /* dataless requests such as discard */
- return bio->bi_iter.bi_size;
-}
-
static inline void *bio_data(struct bio *bio)
{
if (bio_has_data(bio))
@@ -102,25 +77,6 @@ static inline void *bio_data(struct bio *bio)
return NULL;
}
-/**
- * bio_full - check if the bio is full
- * @bio: bio to check
- * @len: length of one segment to be added
- *
- * Return true if @bio is full and one segment with @len bytes can't be
- * added to the bio, otherwise return false
- */
-static inline bool bio_full(struct bio *bio, unsigned len)
-{
- if (bio->bi_vcnt >= bio->bi_max_vecs)
- return true;
-
- if (bio->bi_iter.bi_size > UINT_MAX - len)
- return true;
-
- return false;
-}
-
static inline bool bio_next_segment(const struct bio *bio,
struct bvec_iter_all *iter)
{
@@ -163,6 +119,28 @@ static inline void bio_advance_iter_single(const struct bio *bio,
bvec_iter_advance_single(bio->bi_io_vec, iter, bytes);
}
+void __bio_advance(struct bio *, unsigned bytes);
+
+/**
+ * bio_advance - increment/complete a bio by some number of bytes
+ * @bio: bio to advance
+ * @bytes: number of bytes to complete
+ *
+ * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
+ * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
+ * be updated on the last bvec as well.
+ *
+ * @bio will then represent the remaining, uncompleted portion of the io.
+ */
+static inline void bio_advance(struct bio *bio, unsigned int nbytes)
+{
+ if (nbytes == bio->bi_iter.bi_size) {
+ bio->bi_iter.bi_size = 0;
+ return;
+ }
+ __bio_advance(bio, nbytes);
+}
+
#define __bio_for_each_segment(bvl, bio, iter, start) \
for (iter = (start); \
(iter).bi_size && \
@@ -265,37 +243,6 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
bio->bi_flags &= ~(1U << bit);
}
-static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
-{
- *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
-}
-
-static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
-{
- struct bvec_iter iter = bio->bi_iter;
- int idx;
-
- bio_get_first_bvec(bio, bv);
- if (bv->bv_len == bio->bi_iter.bi_size)
- return; /* this bio only has a single bvec */
-
- bio_advance_iter(bio, &iter, iter.bi_size);
-
- if (!iter.bi_bvec_done)
- idx = iter.bi_idx - 1;
- else /* in the middle of bvec */
- idx = iter.bi_idx;
-
- *bv = bio->bi_io_vec[idx];
-
- /*
- * iter.bi_bvec_done records actual length of the last bvec
- * if this bio ends in the middle of one io vector
- */
- if (iter.bi_bvec_done)
- bv->bv_len = iter.bi_bvec_done;
-}
-
static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
{
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
@@ -424,7 +371,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
}
-extern blk_qc_t submit_bio(struct bio *);
+void submit_bio(struct bio *bio);
extern void bio_endio(struct bio *);
@@ -456,8 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
struct request_queue;
extern int submit_bio_wait(struct bio *bio);
-extern void bio_advance(struct bio *, unsigned);
-
extern void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs);
extern void bio_uninit(struct bio *);
@@ -469,12 +414,11 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
int bio_add_zone_append_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset);
-bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off, bool *same_page);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
-void bio_release_pages(struct bio *bio, bool mark_dirty);
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
+void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
@@ -482,27 +426,16 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter);
extern void bio_copy_data(struct bio *dst, struct bio *src);
extern void bio_free_pages(struct bio *bio);
-void bio_truncate(struct bio *bio, unsigned new_size);
void guard_bio_eod(struct bio *bio);
void zero_fill_bio(struct bio *bio);
-extern const char *bio_devname(struct bio *bio, char *buffer);
+static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
+{
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ __bio_release_pages(bio, mark_dirty);
+}
-#define bio_set_dev(bio, bdev) \
-do { \
- bio_clear_flag(bio, BIO_REMAPPED); \
- if ((bio)->bi_bdev != (bdev)) \
- bio_clear_flag(bio, BIO_THROTTLED); \
- (bio)->bi_bdev = (bdev); \
- bio_associate_blkg(bio); \
-} while (0)
-
-#define bio_copy_dev(dst, src) \
-do { \
- bio_clear_flag(dst, BIO_REMAPPED); \
- (dst)->bi_bdev = (src)->bi_bdev; \
- bio_clone_blkg_association(dst, src); \
-} while (0)
+extern const char *bio_devname(struct bio *bio, char *buffer);
#define bio_dev(bio) \
disk_devt((bio)->bi_bdev->bd_disk)
@@ -521,6 +454,22 @@ static inline void bio_clone_blkg_association(struct bio *dst,
struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio_clear_flag(bio, BIO_REMAPPED);
+ if (bio->bi_bdev != bdev)
+ bio_clear_flag(bio, BIO_THROTTLED);
+ bio->bi_bdev = bdev;
+ bio_associate_blkg(bio);
+}
+
+static inline void bio_copy_dev(struct bio *dst, struct bio *src)
+{
+ bio_clear_flag(dst, BIO_REMAPPED);
+ dst->bi_bdev = src->bi_bdev;
+ bio_clone_blkg_association(dst, src);
+}
+
/*
* BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
*
@@ -784,7 +733,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
*/
static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
{
- bio->bi_opf |= REQ_HIPRI;
+ bio->bi_opf |= REQ_POLLED;
if (!is_sync_kiocb(kiocb))
bio->bi_opf |= REQ_NOWAIT;
}
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
new file mode 100644
index 000000000000..bbab65bd5428
--- /dev/null
+++ b/include/linux/blk-crypto-profile.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_BLK_CRYPTO_PROFILE_H
+#define __LINUX_BLK_CRYPTO_PROFILE_H
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+
+struct blk_crypto_profile;
+
+/**
+ * struct blk_crypto_ll_ops - functions to control inline encryption hardware
+ *
+ * Low-level operations for controlling inline encryption hardware. This
+ * interface must be implemented by storage drivers that support inline
+ * encryption. All functions may sleep, are serialized by profile->lock, and
+ * are never called while profile->dev (if set) is runtime-suspended.
+ */
+struct blk_crypto_ll_ops {
+
+ /**
+ * @keyslot_program: Program a key into the inline encryption hardware.
+ *
+ * Program @key into the specified @slot in the inline encryption
+ * hardware, overwriting any key that the keyslot may already contain.
+ * The keyslot is guaranteed to not be in-use by any I/O.
+ *
+ * This is required if the device has keyslots. Otherwise (i.e. if the
+ * device is a layered device, or if the device is real hardware that
+ * simply doesn't have the concept of keyslots) it is never called.
+ *
+ * Must return 0 on success, or -errno on failure.
+ */
+ int (*keyslot_program)(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot);
+
+ /**
+ * @keyslot_evict: Evict a key from the inline encryption hardware.
+ *
+ * If the device has keyslots, this function must evict the key from the
+ * specified @slot. The slot will contain @key, but there should be no
+ * need for the @key argument to be used as @slot should be sufficient.
+ * The keyslot is guaranteed to not be in-use by any I/O.
+ *
+ * If the device doesn't have keyslots itself, this function must evict
+ * @key from any underlying devices. @slot won't be valid in this case.
+ *
+ * If there are no keyslots and no underlying devices, this function
+ * isn't required.
+ *
+ * Must return 0 on success, or -errno on failure.
+ */
+ int (*keyslot_evict)(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot);
+};
+
+/**
+ * struct blk_crypto_profile - inline encryption profile for a device
+ *
+ * This struct contains a storage device's inline encryption capabilities (e.g.
+ * the supported crypto algorithms), driver-provided functions to control the
+ * inline encryption hardware (e.g. programming and evicting keys), and optional
+ * device-independent keyslot management data.
+ */
+struct blk_crypto_profile {
+
+ /* public: Drivers must initialize the following fields. */
+
+ /**
+ * @ll_ops: Driver-provided functions to control the inline encryption
+ * hardware, e.g. program and evict keys.
+ */
+ struct blk_crypto_ll_ops ll_ops;
+
+ /**
+ * @max_dun_bytes_supported: The maximum number of bytes supported for
+ * specifying the data unit number (DUN). Specifically, the range of
+ * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1.
+ */
+ unsigned int max_dun_bytes_supported;
+
+ /**
+ * @modes_supported: Array of bitmasks that specifies whether each
+ * combination of crypto mode and data unit size is supported.
+ * Specifically, the i'th bit of modes_supported[crypto_mode] is set if
+ * crypto_mode can be used with a data unit size of (1 << i). Note that
+ * only data unit sizes that are powers of 2 can be supported.
+ */
+ unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX];
+
+ /**
+ * @dev: An optional device for runtime power management. If the driver
+ * provides this device, it will be runtime-resumed before any function
+ * in @ll_ops is called and will remain resumed during the call.
+ */
+ struct device *dev;
+
+ /* private: The following fields shouldn't be accessed by drivers. */
+
+ /* Number of keyslots, or 0 if not applicable */
+ unsigned int num_slots;
+
+ /*
+ * Serializes all calls to functions in @ll_ops as well as all changes
+ * to @slot_hashtable. This can also be taken in read mode to look up
+ * keyslots while ensuring that they can't be changed concurrently.
+ */
+ struct rw_semaphore lock;
+
+ /* List of idle slots, with least recently used slot at front */
+ wait_queue_head_t idle_slots_wait_queue;
+ struct list_head idle_slots;
+ spinlock_t idle_slots_lock;
+
+ /*
+ * Hash table which maps struct *blk_crypto_key to keyslots, so that we
+ * can find a key's keyslot in O(1) time rather than O(num_slots).
+ * Protected by 'lock'.
+ */
+ struct hlist_head *slot_hashtable;
+ unsigned int log_slot_ht_size;
+
+ /* Per-keyslot data */
+ struct blk_crypto_keyslot *slots;
+};
+
+int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+ unsigned int num_slots);
+
+int devm_blk_crypto_profile_init(struct device *dev,
+ struct blk_crypto_profile *profile,
+ unsigned int num_slots);
+
+unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
+
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ struct blk_crypto_keyslot **slot_ptr);
+
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
+
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+ const struct blk_crypto_config *cfg);
+
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key);
+
+void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
+
+void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
+
+void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
+ const struct blk_crypto_profile *child);
+
+bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
+ const struct blk_crypto_profile *reference);
+
+void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
+ const struct blk_crypto_profile *src);
+
+#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
new file mode 100644
index 000000000000..8a038ea0717e
--- /dev/null
+++ b/include/linux/blk-integrity.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BLK_INTEGRITY_H
+#define _LINUX_BLK_INTEGRITY_H
+
+#include <linux/blk-mq.h>
+
+struct request;
+
+enum blk_integrity_flags {
+ BLK_INTEGRITY_VERIFY = 1 << 0,
+ BLK_INTEGRITY_GENERATE = 1 << 1,
+ BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
+ BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
+};
+
+struct blk_integrity_iter {
+ void *prot_buf;
+ void *data_buf;
+ sector_t seed;
+ unsigned int data_size;
+ unsigned short interval;
+ const char *disk_name;
+};
+
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
+
+struct blk_integrity_profile {
+ integrity_processing_fn *generate_fn;
+ integrity_processing_fn *verify_fn;
+ integrity_prepare_fn *prepare_fn;
+ integrity_complete_fn *complete_fn;
+ const char *name;
+};
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+void blk_integrity_register(struct gendisk *, struct blk_integrity *);
+void blk_integrity_unregister(struct gendisk *);
+int blk_integrity_compare(struct gendisk *, struct gendisk *);
+int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
+ struct scatterlist *);
+int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+ struct blk_integrity *bi = &disk->queue->integrity;
+
+ if (!bi->profile)
+ return NULL;
+
+ return bi;
+}
+
+static inline struct blk_integrity *
+bdev_get_integrity(struct block_device *bdev)
+{
+ return blk_get_integrity(bdev->bd_disk);
+}
+
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+ return q->integrity.profile;
+}
+
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+ unsigned int segs)
+{
+ q->limits.max_integrity_segments = segs;
+}
+
+static inline unsigned short
+queue_max_integrity_segments(const struct request_queue *q)
+{
+ return q->limits.max_integrity_segments;
+}
+
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi: blk_integrity profile for device
+ * @sectors: Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device. Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
+static inline bool blk_integrity_rq(struct request *rq)
+{
+ return rq->cmd_flags & REQ_INTEGRITY;
+}
+
+/*
+ * Return the first bvec that contains integrity data. Only drivers that are
+ * limited to a single integrity segment should use this helper.
+ */
+static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+{
+ if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
+ return NULL;
+ return rq->bio->bi_integrity->bip_vec;
+}
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+ struct bio *b)
+{
+ return 0;
+}
+static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+ struct bio *b,
+ struct scatterlist *s)
+{
+ return 0;
+}
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+{
+ return NULL;
+}
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+ return NULL;
+}
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+ return false;
+}
+static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+{
+ return 0;
+}
+static inline void blk_integrity_register(struct gendisk *d,
+ struct blk_integrity *b)
+{
+}
+static inline void blk_integrity_unregister(struct gendisk *d)
+{
+}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+ unsigned int segs)
+{
+}
+static inline unsigned short
+queue_max_integrity_segments(const struct request_queue *q)
+{
+ return 0;
+}
+
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return 0;
+}
+static inline int blk_integrity_rq(struct request *rq)
+{
+ return 0;
+}
+
+static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+{
+ return NULL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#endif /* _LINUX_BLK_INTEGRITY_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 13ba1861e688..8682663e7368 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -6,10 +6,226 @@
#include <linux/sbitmap.h>
#include <linux/srcu.h>
#include <linux/lockdep.h>
+#include <linux/scatterlist.h>
+#include <linux/prefetch.h>
struct blk_mq_tags;
struct blk_flush_queue;
+#define BLKDEV_MIN_RQ 4
+#define BLKDEV_DEFAULT_RQ 128
+
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
+
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* drive already may have started this one */
+#define RQF_STARTED ((__force req_flags_t)(1 << 1))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
+/* vaguely specified driver internal error. Ignored by the block layer */
+#define RQF_FAILED ((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET ((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
+/* account into disk and partition IO statistics */
+#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
+/* runtime pm request */
+#define RQF_PM ((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED ((__force req_flags_t)(1 << 16))
+/* track IO completion time */
+#define RQF_STATS ((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+ bio chain. */
+#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
+/* The per-zone write lock is held for this request */
+#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
+/* already slept for hybrid poll */
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
+/* ->timeout has been called, don't expire again */
+#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
+/* queue has elevator attached */
+#define RQF_ELV ((__force req_flags_t)(1 << 22))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+
+enum mq_rq_state {
+ MQ_RQ_IDLE = 0,
+ MQ_RQ_IN_FLIGHT = 1,
+ MQ_RQ_COMPLETE = 2,
+};
+
+/*
+ * Try to put the fields that are referenced together in the same cacheline.
+ *
+ * If you modify this structure, make sure to update blk_rq_init() and
+ * especially blk_mq_rq_ctx_init() to take care of the added fields.
+ */
+struct request {
+ struct request_queue *q;
+ struct blk_mq_ctx *mq_ctx;
+ struct blk_mq_hw_ctx *mq_hctx;
+
+ unsigned int cmd_flags; /* op and common flags */
+ req_flags_t rq_flags;
+
+ int tag;
+ int internal_tag;
+
+ unsigned int timeout;
+
+ /* the following two fields are internal, NEVER access directly */
+ unsigned int __data_len; /* total data len */
+ sector_t __sector; /* sector cursor */
+
+ struct bio *bio;
+ struct bio *biotail;
+
+ union {
+ struct list_head queuelist;
+ struct request *rq_next;
+ };
+
+ struct gendisk *rq_disk;
+ struct block_device *part;
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+ /* Time that the first bio started allocating this request. */
+ u64 alloc_time_ns;
+#endif
+ /* Time that this request was allocated for this IO. */
+ u64 start_time_ns;
+ /* Time that I/O was submitted to the device. */
+ u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+ unsigned short wbt_flags;
+#endif
+ /*
+ * rq sectors used for blk stats. It has the same value
+ * with blk_rq_sectors(rq), except that it never be zeroed
+ * by completion.
+ */
+ unsigned short stats_sectors;
+
+ /*
+ * Number of scatter-gather DMA addr+len pairs after
+ * physical address coalescing is performed.
+ */
+ unsigned short nr_phys_segments;
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ unsigned short nr_integrity_segments;
+#endif
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+ struct bio_crypt_ctx *crypt_ctx;
+ struct blk_crypto_keyslot *crypt_keyslot;
+#endif
+
+ unsigned short write_hint;
+ unsigned short ioprio;
+
+ enum mq_rq_state state;
+ refcount_t ref;
+
+ unsigned long deadline;
+
+ /*
+ * The hash is used inside the scheduler, and killed once the
+ * request reaches the dispatch list. The ipi_list is only used
+ * to queue the request for softirq completion, which is long
+ * after the request has been unhashed (and even removed from
+ * the dispatch list).
+ */
+ union {
+ struct hlist_node hash; /* merge hash */
+ struct llist_node ipi_list;
+ };
+
+ /*
+ * The rb_node is only used inside the io scheduler, requests
+ * are pruned when moved to the dispatch queue. So let the
+ * completion_data share space with the rb_node.
+ */
+ union {
+ struct rb_node rb_node; /* sort/lookup */
+ struct bio_vec special_vec;
+ void *completion_data;
+ int error_count; /* for legacy drivers, don't use */
+ };
+
+
+ /*
+ * Three pointers are available for the IO schedulers, if they need
+ * more they have to dynamically allocate it. Flush requests are
+ * never put on the IO scheduler. So let the flush fields share
+ * space with the elevator data.
+ */
+ union {
+ struct {
+ struct io_cq *icq;
+ void *priv[2];
+ } elv;
+
+ struct {
+ unsigned int seq;
+ struct list_head list;
+ rq_end_io_fn *saved_end_io;
+ } flush;
+ };
+
+ union {
+ struct __call_single_data csd;
+ u64 fifo_time;
+ };
+
+ /*
+ * completion callback.
+ */
+ rq_end_io_fn *end_io;
+ void *end_io_data;
+};
+
+#define req_op(req) \
+ ((req)->cmd_flags & REQ_OP_MASK)
+
+static inline bool blk_rq_is_passthrough(struct request *rq)
+{
+ return blk_op_is_passthrough(req_op(rq));
+}
+
+static inline unsigned short req_get_ioprio(struct request *req)
+{
+ return req->ioprio;
+}
+
+#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
+
+#define rq_dma_dir(rq) \
+ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
+
+enum blk_eh_timer_return {
+ BLK_EH_DONE, /* drivers has completed the command */
+ BLK_EH_RESET_TIMER, /* reset timer and try again */
+};
+
+#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
+#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
+
/**
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
* block device
@@ -126,9 +342,6 @@ struct blk_mq_hw_ctx {
unsigned long queued;
/** @run: Number of dispatched requests. */
unsigned long run;
-#define BLK_MQ_MAX_DISPATCH_ORDER 7
- /** @dispatched: Number of dispatch requests by queue. */
- unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
/** @numa_node: NUMA node the storage adapter has been connected to. */
unsigned int numa_node;
@@ -148,13 +361,6 @@ struct blk_mq_hw_ctx {
/** @kobj: Kernel object for sysfs. */
struct kobject kobj;
- /** @poll_considered: Count times blk_poll() was called. */
- unsigned long poll_considered;
- /** @poll_invoked: Count how many requests blk_poll() polled. */
- unsigned long poll_invoked;
- /** @poll_success: Count how many polled requests were completed. */
- unsigned long poll_success;
-
#ifdef CONFIG_BLK_DEBUG_FS
/**
* @debugfs_dir: debugfs directory for this hardware queue. Named
@@ -232,13 +438,11 @@ enum hctx_type {
* @flags: Zero or more BLK_MQ_F_* flags.
* @driver_data: Pointer to data owned by the block driver that created this
* tag set.
- * @active_queues_shared_sbitmap:
- * number of active request queues per tag set.
- * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
- * @__breserved_tags:
- * A shared reserved tags sbitmap, used over all hctx's
* @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
* elements.
+ * @shared_tags:
+ * Shared set of tags. Has @nr_hw_queues elements. If set,
+ * shared by all @tags.
* @tag_list_lock: Serializes tag_list accesses.
* @tag_list: List of the request queues that use this tag set. See also
* request_queue.tag_set_list.
@@ -255,12 +459,11 @@ struct blk_mq_tag_set {
unsigned int timeout;
unsigned int flags;
void *driver_data;
- atomic_t active_queues_shared_sbitmap;
- struct sbitmap_queue __bitmap_tags;
- struct sbitmap_queue __breserved_tags;
struct blk_mq_tags **tags;
+ struct blk_mq_tags *shared_tags;
+
struct mutex tag_list_lock;
struct list_head tag_list;
};
@@ -330,7 +533,7 @@ struct blk_mq_ops {
/**
* @poll: Called to poll for completion of a specific tag.
*/
- int (*poll)(struct blk_mq_hw_ctx *);
+ int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *);
/**
* @complete: Mark the request as complete.
@@ -364,11 +567,6 @@ struct blk_mq_ops {
unsigned int);
/**
- * @initialize_rq_fn: Called from inside blk_get_request().
- */
- void (*initialize_rq_fn)(struct request *rq);
-
- /**
* @cleanup_rq: Called before freeing one request which isn't completed
* yet, and usually for freeing the driver private data.
*/
@@ -432,6 +630,8 @@ enum {
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
+#define BLK_MQ_NO_HCTX_IDX (-1U)
+
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
struct lock_class_key *lkclass);
#define blk_mq_alloc_disk(set, queuedata) \
@@ -451,8 +651,6 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
unsigned int set_flags);
void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
-void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
-
void blk_mq_free_request(struct request *rq);
bool blk_mq_queue_inflight(struct request_queue *q);
@@ -471,7 +669,40 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
unsigned int op, blk_mq_req_flags_t flags,
unsigned int hctx_idx);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+
+/*
+ * Tag address space map.
+ */
+struct blk_mq_tags {
+ unsigned int nr_tags;
+ unsigned int nr_reserved_tags;
+
+ atomic_t active_queues;
+
+ struct sbitmap_queue bitmap_tags;
+ struct sbitmap_queue breserved_tags;
+
+ struct request **rqs;
+ struct request **static_rqs;
+ struct list_head page_list;
+
+ /*
+ * used to clear request reference in rqs[] before freeing one
+ * request pool
+ */
+ spinlock_t lock;
+};
+
+static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
+ unsigned int tag)
+{
+ if (tag < tags->nr_tags) {
+ prefetch(tags->rqs[tag]);
+ return tags->rqs[tag];
+ }
+
+ return NULL;
+}
enum {
BLK_MQ_UNIQUE_TAG_BITS = 16,
@@ -524,6 +755,35 @@ static inline void blk_mq_set_request_complete(struct request *rq)
void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, blk_status_t error);
void __blk_mq_end_request(struct request *rq, blk_status_t error);
+void blk_mq_end_request_batch(struct io_comp_batch *ib);
+
+/*
+ * Only need start/end time stamping if we have iostat or
+ * blk stats enabled, or using an IO scheduler.
+ */
+static inline bool blk_mq_need_time_stamp(struct request *rq)
+{
+ return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+}
+
+/*
+ * Batched completions only work when there is no I/O error and no special
+ * ->end_io handler.
+ */
+static inline bool blk_mq_add_to_batch(struct request *req,
+ struct io_comp_batch *iob, int ioerror,
+ void (*complete)(struct io_comp_batch *))
+{
+ if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror)
+ return false;
+ if (!iob->complete)
+ iob->complete = complete;
+ else if (iob->complete != complete)
+ return false;
+ iob->need_ts |= blk_mq_need_time_stamp(req);
+ rq_list_add(&iob->req_list, req);
+ return true;
+}
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
@@ -605,16 +865,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
for ((i) = 0; (i) < (hctx)->nr_ctx && \
({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
-static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- if (rq->tag != -1)
- return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
-
- return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
- BLK_QC_T_INTERNAL;
-}
-
static inline void blk_mq_cleanup_rq(struct request *rq)
{
if (rq->q->mq_ops->cleanup_rq)
@@ -633,8 +883,265 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
rq->rq_disk = bio->bi_bdev->bd_disk;
}
-blk_qc_t blk_mq_submit_bio(struct bio *bio);
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
struct lock_class_key *key);
+static inline bool rq_is_sync(struct request *rq)
+{
+ return op_is_sync(rq->cmd_flags);
+}
+
+void blk_rq_init(struct request_queue *q, struct request *rq);
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+ struct bio_set *bs, gfp_t gfp_mask,
+ int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
+void blk_rq_unprep_clone(struct request *rq);
+blk_status_t blk_insert_cloned_request(struct request_queue *q,
+ struct request *rq);
+
+struct rq_map_data {
+ struct page **pages;
+ int page_order;
+ int nr_entries;
+ unsigned long offset;
+ int null_mapped;
+ int from_user;
+};
+
+int blk_rq_map_user(struct request_queue *, struct request *,
+ struct rq_map_data *, void __user *, unsigned long, gfp_t);
+int blk_rq_map_user_iov(struct request_queue *, struct request *,
+ struct rq_map_data *, const struct iov_iter *, gfp_t);
+int blk_rq_unmap_user(struct bio *);
+int blk_rq_map_kern(struct request_queue *, struct request *, void *,
+ unsigned int, gfp_t);
+int blk_rq_append_bio(struct request *rq, struct bio *bio);
+void blk_execute_rq_nowait(struct gendisk *, struct request *, int,
+ rq_end_io_fn *);
+blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
+ int at_head);
+
+struct req_iterator {
+ struct bvec_iter iter;
+ struct bio *bio;
+};
+
+#define __rq_for_each_bio(_bio, rq) \
+ if ((rq->bio)) \
+ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
+
+#define rq_for_each_segment(bvl, _rq, _iter) \
+ __rq_for_each_bio(_iter.bio, _rq) \
+ bio_for_each_segment(bvl, _iter.bio, _iter.iter)
+
+#define rq_for_each_bvec(bvl, _rq, _iter) \
+ __rq_for_each_bio(_iter.bio, _rq) \
+ bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
+
+#define rq_iter_last(bvec, _iter) \
+ (_iter.bio->bi_next == NULL && \
+ bio_iter_last(bvec, _iter.iter))
+
+/*
+ * blk_rq_pos() : the current sector
+ * blk_rq_bytes() : bytes left in the entire request
+ * blk_rq_cur_bytes() : bytes left in the current segment
+ * blk_rq_err_bytes() : bytes left till the next error boundary
+ * blk_rq_sectors() : sectors left in the entire request
+ * blk_rq_cur_sectors() : sectors left in the current segment
+ * blk_rq_stats_sectors() : sectors of the entire request used for stats
+ */
+static inline sector_t blk_rq_pos(const struct request *rq)
+{
+ return rq->__sector;
+}
+
+static inline unsigned int blk_rq_bytes(const struct request *rq)
+{
+ return rq->__data_len;
+}
+
+static inline int blk_rq_cur_bytes(const struct request *rq)
+{
+ if (!rq->bio)
+ return 0;
+ if (!bio_has_data(rq->bio)) /* dataless requests such as discard */
+ return rq->bio->bi_iter.bi_size;
+ return bio_iovec(rq->bio).bv_len;
+}
+
+unsigned int blk_rq_err_bytes(const struct request *rq);
+
+static inline unsigned int blk_rq_sectors(const struct request *rq)
+{
+ return blk_rq_bytes(rq) >> SECTOR_SHIFT;
+}
+
+static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
+{
+ return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
+}
+
+static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
+{
+ return rq->stats_sectors;
+}
+
+/*
+ * Some commands like WRITE SAME have a payload or data transfer size which
+ * is different from the size of the request. Any driver that supports such
+ * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
+ * calculate the data transfer size.
+ */
+static inline unsigned int blk_rq_payload_bytes(struct request *rq)
+{
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+ return rq->special_vec.bv_len;
+ return blk_rq_bytes(rq);
+}
+
+/*
+ * Return the first full biovec in the request. The caller needs to check that
+ * there are any bvecs before calling this helper.
+ */
+static inline struct bio_vec req_bvec(struct request *rq)
+{
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+ return rq->special_vec;
+ return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
+}
+
+static inline unsigned int blk_rq_count_bios(struct request *rq)
+{
+ unsigned int nr_bios = 0;
+ struct bio *bio;
+
+ __rq_for_each_bio(bio, rq)
+ nr_bios++;
+
+ return nr_bios;
+}
+
+void blk_steal_bios(struct bio_list *list, struct request *rq);
+
+/*
+ * Request completion related functions.
+ *
+ * blk_update_request() completes given number of bytes and updates
+ * the request without completing it.
+ */
+bool blk_update_request(struct request *rq, blk_status_t error,
+ unsigned int nr_bytes);
+void blk_abort_request(struct request *);
+
+/*
+ * Number of physical segments as sent to the device.
+ *
+ * Normally this is the number of discontiguous data segments sent by the
+ * submitter. But for data-less command like discard we might have no
+ * actual data segments submitted, but the driver might have to add it's
+ * own special payload. In that case we still return 1 here so that this
+ * special payload will be mapped.
+ */
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+ return 1;
+ return rq->nr_phys_segments;
+}
+
+/*
+ * Number of discard segments (or ranges) the driver needs to fill in.
+ * Each discard bio merged into a request is counted as one segment.
+ */
+static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
+{
+ return max_t(unsigned short, rq->nr_phys_segments, 1);
+}
+
+int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
+ struct scatterlist *sglist, struct scatterlist **last_sg);
+static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
+ struct scatterlist *sglist)
+{
+ struct scatterlist *last_sg = NULL;
+
+ return __blk_rq_map_sg(q, rq, sglist, &last_sg);
+}
+void blk_dump_rq_flags(struct request *, char *);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+static inline unsigned int blk_rq_zone_no(struct request *rq)
+{
+ return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
+}
+
+static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
+{
+ return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
+}
+
+bool blk_req_needs_zone_write_lock(struct request *rq);
+bool blk_req_zone_write_trylock(struct request *rq);
+void __blk_req_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_unlock(struct request *rq);
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+ if (blk_req_needs_zone_write_lock(rq))
+ __blk_req_zone_write_lock(rq);
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+ if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
+ __blk_req_zone_write_unlock(rq);
+}
+
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return rq->q->seq_zones_wlock &&
+ test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ if (!blk_req_needs_zone_write_lock(rq))
+ return true;
+ return !blk_req_zone_is_write_locked(rq);
+}
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline bool blk_req_needs_zone_write_lock(struct request *rq)
+{
+ return false;
+}
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+}
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return false;
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ return true;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
#endif
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+void rq_flush_dcache_pages(struct request *rq);
+#else
+static inline void rq_flush_dcache_pages(struct request *rq)
+{
+}
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+#endif /* BLK_MQ_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index be622b5a21ed..fe065c394fff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -20,8 +20,26 @@ struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
struct bio_crypt_ctx;
+/*
+ * The basic unit of block I/O is a sector. It is used in a number of contexts
+ * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
+ * bytes. Variables of type sector_t represent an offset or size that is a
+ * multiple of 512 bytes. Hence these two constants.
+ */
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+#ifndef SECTOR_SIZE
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#endif
+
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
struct block_device {
sector_t bd_start_sect;
+ sector_t bd_nr_sectors;
struct disk_stats __percpu *bd_stats;
unsigned long bd_stamp;
bool bd_read_only; /* read-only policy */
@@ -38,6 +56,7 @@ struct block_device {
u8 bd_partno;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct gendisk * bd_disk;
+ struct request_queue * bd_queue;
/* The counter of freeze processes */
int bd_fsfreeze_count;
@@ -208,6 +227,9 @@ static inline void bio_issue_init(struct bio_issue *issue,
((u64)size << BIO_ISSUE_SIZE_SHIFT));
}
+typedef unsigned int blk_qc_t;
+#define BLK_QC_T_NONE -1U
+
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
@@ -227,8 +249,8 @@ struct bio {
struct bvec_iter bi_iter;
+ blk_qc_t bi_cookie;
bio_end_io_t *bi_end_io;
-
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
/*
@@ -384,7 +406,7 @@ enum req_flag_bits {
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
- __REQ_HIPRI,
+ __REQ_POLLED, /* caller polls for completion using bio_poll */
/* for driver use */
__REQ_DRV,
@@ -409,7 +431,7 @@ enum req_flag_bits {
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
-#define REQ_HIPRI (1ULL << __REQ_HIPRI)
+#define REQ_POLLED (1ULL << __REQ_POLLED)
#define REQ_DRV (1ULL << __REQ_DRV)
#define REQ_SWAP (1ULL << __REQ_SWAP)
@@ -431,8 +453,6 @@ enum stat_group {
#define bio_op(bio) \
((bio)->bi_opf & REQ_OP_MASK)
-#define req_op(req) \
- ((req)->cmd_flags & REQ_OP_MASK)
/* obsolete, don't use in new code */
static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
@@ -497,31 +517,6 @@ static inline int op_stat_group(unsigned int op)
return op_is_write(op);
}
-typedef unsigned int blk_qc_t;
-#define BLK_QC_T_NONE -1U
-#define BLK_QC_T_SHIFT 16
-#define BLK_QC_T_INTERNAL (1U << 31)
-
-static inline bool blk_qc_t_valid(blk_qc_t cookie)
-{
- return cookie != BLK_QC_T_NONE;
-}
-
-static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
-{
- return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
-}
-
-static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
-{
- return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
-}
-
-static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
-{
- return (cookie & BLK_QC_T_INTERNAL) != 0;
-}
-
struct blk_rq_stat {
u64 mean;
u64 min;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12b9dbcc980e..bd4370baccca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -3,8 +3,6 @@
#define _LINUX_BLKDEV_H
#include <linux/sched.h>
-#include <linux/sched/clock.h>
-#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/llist.h>
@@ -12,17 +10,11 @@
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
-#include <linux/mempool.h>
-#include <linux/pfn.h>
#include <linux/bio.h>
-#include <linux/stringify.h>
#include <linux/gfp.h>
-#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
-#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
-#include <linux/pm.h>
#include <linux/sbitmap.h>
struct module;
@@ -33,14 +25,12 @@ struct request;
struct sg_io_hdr;
struct blkcg_gq;
struct blk_flush_queue;
+struct kiocb;
struct pr_ops;
struct rq_qos;
struct blk_queue_stats;
struct blk_stat_callback;
-struct blk_keyslot_manager;
-
-#define BLKDEV_MIN_RQ 4
-#define BLKDEV_MAX_RQ 128 /* Default maximum */
+struct blk_crypto_profile;
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
@@ -54,186 +44,13 @@ struct blk_keyslot_manager;
*/
#define BLKCG_MAX_POLS 6
-typedef void (rq_end_io_fn)(struct request *, blk_status_t);
-
-/*
- * request flags */
-typedef __u32 __bitwise req_flags_t;
-
-/* drive already may have started this one */
-#define RQF_STARTED ((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
-/* request for flush sequence */
-#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
-/* merge of different types, fail separately */
-#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
-/* track inflight for MQ */
-#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
-/* don't call prep for this one */
-#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
-/* vaguely specified driver internal error. Ignored by the block layer */
-#define RQF_FAILED ((__force req_flags_t)(1 << 10))
-/* don't warn about errors */
-#define RQF_QUIET ((__force req_flags_t)(1 << 11))
-/* elevator private data attached */
-#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12))
-/* account into disk and partition IO statistics */
-#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
-/* runtime pm request */
-#define RQF_PM ((__force req_flags_t)(1 << 15))
-/* on IO scheduler merge hash */
-#define RQF_HASHED ((__force req_flags_t)(1 << 16))
-/* track IO completion time */
-#define RQF_STATS ((__force req_flags_t)(1 << 17))
-/* Look at ->special_vec for the actual data payload instead of the
- bio chain. */
-#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
-/* The per-zone write lock is held for this request */
-#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
-/* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
-/* ->timeout has been called, don't expire again */
-#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
-
-/* flags that prevent us from merging requests: */
-#define RQF_NOMERGE_FLAGS \
- (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
-
-/*
- * Request state for blk-mq.
- */
-enum mq_rq_state {
- MQ_RQ_IDLE = 0,
- MQ_RQ_IN_FLIGHT = 1,
- MQ_RQ_COMPLETE = 2,
-};
-
-/*
- * Try to put the fields that are referenced together in the same cacheline.
- *
- * If you modify this structure, make sure to update blk_rq_init() and
- * especially blk_mq_rq_ctx_init() to take care of the added fields.
- */
-struct request {
- struct request_queue *q;
- struct blk_mq_ctx *mq_ctx;
- struct blk_mq_hw_ctx *mq_hctx;
-
- unsigned int cmd_flags; /* op and common flags */
- req_flags_t rq_flags;
-
- int tag;
- int internal_tag;
-
- /* the following two fields are internal, NEVER access directly */
- unsigned int __data_len; /* total data len */
- sector_t __sector; /* sector cursor */
-
- struct bio *bio;
- struct bio *biotail;
-
- struct list_head queuelist;
-
- /*
- * The hash is used inside the scheduler, and killed once the
- * request reaches the dispatch list. The ipi_list is only used
- * to queue the request for softirq completion, which is long
- * after the request has been unhashed (and even removed from
- * the dispatch list).
- */
- union {
- struct hlist_node hash; /* merge hash */
- struct llist_node ipi_list;
- };
-
- /*
- * The rb_node is only used inside the io scheduler, requests
- * are pruned when moved to the dispatch queue. So let the
- * completion_data share space with the rb_node.
- */
- union {
- struct rb_node rb_node; /* sort/lookup */
- struct bio_vec special_vec;
- void *completion_data;
- int error_count; /* for legacy drivers, don't use */
- };
-
- /*
- * Three pointers are available for the IO schedulers, if they need
- * more they have to dynamically allocate it. Flush requests are
- * never put on the IO scheduler. So let the flush fields share
- * space with the elevator data.
- */
- union {
- struct {
- struct io_cq *icq;
- void *priv[2];
- } elv;
-
- struct {
- unsigned int seq;
- struct list_head list;
- rq_end_io_fn *saved_end_io;
- } flush;
- };
-
- struct gendisk *rq_disk;
- struct block_device *part;
-#ifdef CONFIG_BLK_RQ_ALLOC_TIME
- /* Time that the first bio started allocating this request. */
- u64 alloc_time_ns;
-#endif
- /* Time that this request was allocated for this IO. */
- u64 start_time_ns;
- /* Time that I/O was submitted to the device. */
- u64 io_start_time_ns;
-
-#ifdef CONFIG_BLK_WBT
- unsigned short wbt_flags;
-#endif
- /*
- * rq sectors used for blk stats. It has the same value
- * with blk_rq_sectors(rq), except that it never be zeroed
- * by completion.
- */
- unsigned short stats_sectors;
-
- /*
- * Number of scatter-gather DMA addr+len pairs after
- * physical address coalescing is performed.
- */
- unsigned short nr_phys_segments;
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- unsigned short nr_integrity_segments;
-#endif
-
-#ifdef CONFIG_BLK_INLINE_ENCRYPTION
- struct bio_crypt_ctx *crypt_ctx;
- struct blk_ksm_keyslot *crypt_keyslot;
-#endif
-
- unsigned short write_hint;
- unsigned short ioprio;
-
- enum mq_rq_state state;
- refcount_t ref;
-
- unsigned int timeout;
- unsigned long deadline;
-
- union {
- struct __call_single_data csd;
- u64 fifo_time;
- };
+static inline int blk_validate_block_size(unsigned int bsize)
+{
+ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+ return -EINVAL;
- /*
- * completion callback.
- */
- rq_end_io_fn *end_io;
- void *end_io_data;
-};
+ return 0;
+}
static inline bool blk_op_is_passthrough(unsigned int op)
{
@@ -241,35 +58,6 @@ static inline bool blk_op_is_passthrough(unsigned int op)
return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
}
-static inline bool blk_rq_is_passthrough(struct request *rq)
-{
- return blk_op_is_passthrough(req_op(rq));
-}
-
-static inline unsigned short req_get_ioprio(struct request *req)
-{
- return req->ioprio;
-}
-
-#include <linux/elevator.h>
-
-struct blk_queue_ctx;
-
-struct bio_vec;
-
-enum blk_eh_timer_return {
- BLK_EH_DONE, /* drivers has completed the command */
- BLK_EH_RESET_TIMER, /* reset timer and try again */
-};
-
-enum blk_queue_state {
- Queue_down,
- Queue_up,
-};
-
-#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
-#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
-
/*
* Zoned block device models (zoned limit).
*
@@ -370,6 +158,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
#endif /* CONFIG_BLK_DEV_ZONED */
+/*
+ * Independent access ranges: struct blk_independent_access_range describes
+ * a range of contiguous sectors that can be accessed using device command
+ * execution resources that are independent from the resources used for
+ * other access ranges. This is typically found with single-LUN multi-actuator
+ * HDDs where each access range is served by a different set of heads.
+ * The set of independent ranges supported by the device is defined using
+ * struct blk_independent_access_ranges. The independent ranges must not overlap
+ * and must include all sectors within the disk capacity (no sector holes
+ * allowed).
+ * For a device with multiple ranges, requests targeting sectors in different
+ * ranges can be executed in parallel. A request can straddle an access range
+ * boundary.
+ */
+struct blk_independent_access_range {
+ struct kobject kobj;
+ struct request_queue *queue;
+ sector_t sector;
+ sector_t nr_sectors;
+};
+
+struct blk_independent_access_ranges {
+ struct kobject kobj;
+ bool sysfs_registered;
+ unsigned int nr_ia_ranges;
+ struct blk_independent_access_range ia_range[];
+};
+
struct request_queue {
struct request *last_merge;
struct elevator_queue *elevator;
@@ -444,8 +260,7 @@ struct request_queue {
unsigned int dma_alignment;
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
- /* Inline crypto capabilities */
- struct blk_keyslot_manager *ksm;
+ struct blk_crypto_profile *crypto_profile;
#endif
unsigned int rq_timeout;
@@ -457,10 +272,9 @@ struct request_queue {
struct timer_list timeout;
struct work_struct timeout_work;
- atomic_t nr_active_requests_shared_sbitmap;
+ atomic_t nr_active_requests_shared_tags;
- struct sbitmap_queue sched_bitmap_tags;
- struct sbitmap_queue sched_breserved_tags;
+ struct blk_mq_tags *sched_shared_tags;
struct list_head icq_list;
#ifdef CONFIG_BLK_CGROUP
@@ -536,6 +350,8 @@ struct request_queue {
*/
struct mutex mq_freeze_lock;
+ int quiesce_depth;
+
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
struct bio_set bio_split;
@@ -549,10 +365,14 @@ struct request_queue {
bool mq_sysfs_init_done;
- size_t cmd_size;
-
#define BLK_MAX_WRITE_HINTS 5
u64 write_hints[BLK_MAX_WRITE_HINTS];
+
+ /*
+ * Independent sector access ranges. This is always NULL for
+ * devices that do not have multiple independent access ranges.
+ */
+ struct blk_independent_access_ranges *ia_ranges;
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
@@ -579,7 +399,6 @@ struct request_queue {
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
-#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
@@ -613,8 +432,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-#define blk_queue_scsi_passthrough(q) \
- test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
#define blk_queue_pci_p2pdma(q) \
test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -638,11 +455,6 @@ extern void blk_clear_pm_only(struct request_queue *q);
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
-
-#define rq_dma_dir(rq) \
- (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
-
#define dma_map_bvec(dev, bv, dir, attrs) \
dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
(dir), (attrs))
@@ -758,42 +570,6 @@ static inline unsigned int queue_max_active_zones(const struct request_queue *q)
}
#endif /* CONFIG_BLK_DEV_ZONED */
-static inline bool rq_is_sync(struct request *rq)
-{
- return op_is_sync(rq->cmd_flags);
-}
-
-static inline bool rq_mergeable(struct request *rq)
-{
- if (blk_rq_is_passthrough(rq))
- return false;
-
- if (req_op(rq) == REQ_OP_FLUSH)
- return false;
-
- if (req_op(rq) == REQ_OP_WRITE_ZEROES)
- return false;
-
- if (req_op(rq) == REQ_OP_ZONE_APPEND)
- return false;
-
- if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
- return false;
- if (rq->rq_flags & RQF_NOMERGE_FLAGS)
- return false;
-
- return true;
-}
-
-static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
-{
- if (bio_page(a) == bio_page(b) &&
- bio_offset(a) == bio_offset(b))
- return true;
-
- return false;
-}
-
static inline unsigned int blk_queue_depth(struct request_queue *q)
{
if (q->queue_depth)
@@ -808,83 +584,20 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
#define BLK_MIN_SG_TIMEOUT (7 * HZ)
-struct rq_map_data {
- struct page **pages;
- int page_order;
- int nr_entries;
- unsigned long offset;
- int null_mapped;
- int from_user;
-};
-
-struct req_iterator {
- struct bvec_iter iter;
- struct bio *bio;
-};
-
/* This should not be used directly - use rq_for_each_segment */
#define for_each_bio(_bio) \
for (; _bio; _bio = _bio->bi_next)
-#define __rq_for_each_bio(_bio, rq) \
- if ((rq->bio)) \
- for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
-#define rq_for_each_segment(bvl, _rq, _iter) \
- __rq_for_each_bio(_iter.bio, _rq) \
- bio_for_each_segment(bvl, _iter.bio, _iter.iter)
-
-#define rq_for_each_bvec(bvl, _rq, _iter) \
- __rq_for_each_bio(_iter.bio, _rq) \
- bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
-
-#define rq_iter_last(bvec, _iter) \
- (_iter.bio->bi_next == NULL && \
- bio_iter_last(bvec, _iter.iter))
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
-#endif
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-extern void rq_flush_dcache_pages(struct request *rq);
-#else
-static inline void rq_flush_dcache_pages(struct request *rq)
-{
-}
-#endif
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
-blk_qc_t submit_bio_noacct(struct bio *bio);
-extern void blk_rq_init(struct request_queue *q, struct request *rq);
-extern void blk_put_request(struct request *);
-extern struct request *blk_get_request(struct request_queue *, unsigned int op,
- blk_mq_req_flags_t flags);
+void submit_bio_noacct(struct bio *bio);
+
extern int blk_lld_busy(struct request_queue *q);
-extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
- struct bio_set *bs, gfp_t gfp_mask,
- int (*bio_ctr)(struct bio *, struct bio *, void *),
- void *data);
-extern void blk_rq_unprep_clone(struct request *rq);
-extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
- struct request *rq);
-int blk_rq_append_bio(struct request *rq, struct bio *bio);
extern void blk_queue_split(struct bio **);
extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
extern void blk_queue_exit(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
-extern int blk_rq_map_user(struct request_queue *, struct request *,
- struct rq_map_data *, void __user *, unsigned long,
- gfp_t);
-extern int blk_rq_unmap_user(struct bio *);
-extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
-extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
- struct rq_map_data *, const struct iov_iter *,
- gfp_t);
-extern void blk_execute_rq_nowait(struct gendisk *,
- struct request *, int, rq_end_io_fn *);
-
-blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
- int at_head);
/* Helper to convert REQ_OP_XXX to its string format XXX */
extern const char *blk_op_str(unsigned int op);
@@ -892,69 +605,17 @@ extern const char *blk_op_str(unsigned int op);
int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno);
-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
+/* only poll the hardware once, don't continue until a completion was found */
+#define BLK_POLL_ONESHOT (1 << 0)
+/* do not sleep to wait for the expected completion time */
+#define BLK_POLL_NOSLEEP (1 << 1)
+int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags);
+int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
+ unsigned int flags);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{
- return bdev->bd_disk->queue; /* this is never NULL */
-}
-
-/*
- * The basic unit of block I/O is a sector. It is used in a number of contexts
- * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
- * bytes. Variables of type sector_t represent an offset or size that is a
- * multiple of 512 bytes. Hence these two constants.
- */
-#ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
-#endif
-#ifndef SECTOR_SIZE
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
-#endif
-
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_MASK (PAGE_SECTORS - 1)
-
-/*
- * blk_rq_pos() : the current sector
- * blk_rq_bytes() : bytes left in the entire request
- * blk_rq_cur_bytes() : bytes left in the current segment
- * blk_rq_err_bytes() : bytes left till the next error boundary
- * blk_rq_sectors() : sectors left in the entire request
- * blk_rq_cur_sectors() : sectors left in the current segment
- * blk_rq_stats_sectors() : sectors of the entire request used for stats
- */
-static inline sector_t blk_rq_pos(const struct request *rq)
-{
- return rq->__sector;
-}
-
-static inline unsigned int blk_rq_bytes(const struct request *rq)
-{
- return rq->__data_len;
-}
-
-static inline int blk_rq_cur_bytes(const struct request *rq)
-{
- return rq->bio ? bio_cur_bytes(rq->bio) : 0;
-}
-
-extern unsigned int blk_rq_err_bytes(const struct request *rq);
-
-static inline unsigned int blk_rq_sectors(const struct request *rq)
-{
- return blk_rq_bytes(rq) >> SECTOR_SHIFT;
-}
-
-static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
-{
- return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
-}
-
-static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
-{
- return rq->stats_sectors;
+ return bdev->bd_queue; /* this is never NULL */
}
#ifdef CONFIG_BLK_DEV_ZONED
@@ -973,42 +634,8 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio)
return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev),
bio->bi_iter.bi_sector);
}
-
-static inline unsigned int blk_rq_zone_no(struct request *rq)
-{
- return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
-}
-
-static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
-{
- return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
-}
#endif /* CONFIG_BLK_DEV_ZONED */
-/*
- * Some commands like WRITE SAME have a payload or data transfer size which
- * is different from the size of the request. Any driver that supports such
- * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
- * calculate the data transfer size.
- */
-static inline unsigned int blk_rq_payload_bytes(struct request *rq)
-{
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- return rq->special_vec.bv_len;
- return blk_rq_bytes(rq);
-}
-
-/*
- * Return the first full biovec in the request. The caller needs to check that
- * there are any bvecs before calling this helper.
- */
-static inline struct bio_vec req_bvec(struct request *rq)
-{
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- return rq->special_vec;
- return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
-}
-
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
int op)
{
@@ -1048,47 +675,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
return min(q->limits.max_sectors, chunk_sectors);
}
-static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
- sector_t offset)
-{
- struct request_queue *q = rq->q;
-
- if (blk_rq_is_passthrough(rq))
- return q->limits.max_hw_sectors;
-
- if (!q->limits.chunk_sectors ||
- req_op(rq) == REQ_OP_DISCARD ||
- req_op(rq) == REQ_OP_SECURE_ERASE)
- return blk_queue_get_max_sectors(q, req_op(rq));
-
- return min(blk_max_size_offset(q, offset, 0),
- blk_queue_get_max_sectors(q, req_op(rq)));
-}
-
-static inline unsigned int blk_rq_count_bios(struct request *rq)
-{
- unsigned int nr_bios = 0;
- struct bio *bio;
-
- __rq_for_each_bio(bio, rq)
- nr_bios++;
-
- return nr_bios;
-}
-
-void blk_steal_bios(struct bio_list *list, struct request *rq);
-
-/*
- * Request completion related functions.
- *
- * blk_update_request() completes given number of bytes and updates
- * the request without completing it.
- */
-extern bool blk_update_request(struct request *rq, blk_status_t error,
- unsigned int nr_bytes);
-
-extern void blk_abort_request(struct request *);
-
/*
* Access functions for manipulating queue properties
*/
@@ -1133,46 +719,24 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
-extern void blk_queue_required_elevator_features(struct request_queue *q,
- unsigned int features);
-extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
- struct device *dev);
-/*
- * Number of physical segments as sent to the device.
- *
- * Normally this is the number of discontiguous data segments sent by the
- * submitter. But for data-less command like discard we might have no
- * actual data segments submitted, but the driver might have to add it's
- * own special payload. In that case we still return 1 here so that this
- * special payload will be mapped.
- */
-static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
-{
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- return 1;
- return rq->nr_phys_segments;
-}
+struct blk_independent_access_ranges *
+disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
+void disk_set_independent_access_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *iars);
/*
- * Number of discard segments (or ranges) the driver needs to fill in.
- * Each discard bio merged into a request is counted as one segment.
+ * Elevator features for blk_queue_required_elevator_features:
*/
-static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
-{
- return max_t(unsigned short, rq->nr_phys_segments, 1);
-}
-
-int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
- struct scatterlist *sglist, struct scatterlist **last_sg);
-static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
- struct scatterlist *sglist)
-{
- struct scatterlist *last_sg = NULL;
+/* Supports zoned block devices sequential write constraint */
+#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
+/* Supports scheduling on multiple hardware queues */
+#define ELEVATOR_F_MQ_AWARE (1U << 1)
- return __blk_rq_map_sg(q, rq, sglist, &last_sg);
-}
-extern void blk_dump_rq_flags(struct request *, char *);
+extern void blk_queue_required_elevator_features(struct request_queue *q,
+ unsigned int features);
+extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
+ struct device *dev);
bool __must_check blk_get_queue(struct request_queue *);
extern void blk_put_queue(struct request_queue *);
@@ -1187,19 +751,24 @@ extern void blk_set_queue_dying(struct request_queue *);
* as the lock contention for request_queue lock is reduced.
*
* It is ok not to disable preemption when adding the request to the plug list
- * or when attempting a merge, because blk_schedule_flush_list() will only flush
- * the plug list when the task sleeps by itself. For details, please see
- * schedule() where blk_schedule_flush_plug() is called.
+ * or when attempting a merge. For details, please see schedule() where
+ * blk_flush_plug() is called.
*/
struct blk_plug {
- struct list_head mq_list; /* blk-mq requests */
- struct list_head cb_list; /* md requires an unplug callback */
+ struct request *mq_list; /* blk-mq requests */
+
+ /* if ios_left is > 1, we can batch tag/rq allocations */
+ struct request *cached_rq;
+ unsigned short nr_ios;
+
unsigned short rq_count;
+
bool multiple_queues;
+ bool has_elevator;
bool nowait;
+
+ struct list_head cb_list; /* md requires an unplug callback */
};
-#define BLK_MAX_REQUEST_COUNT 16
-#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
struct blk_plug_cb;
typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
@@ -1211,32 +780,17 @@ struct blk_plug_cb {
extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
void *data, int size);
extern void blk_start_plug(struct blk_plug *);
+extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
extern void blk_finish_plug(struct blk_plug *);
-extern void blk_flush_plug_list(struct blk_plug *, bool);
-
-static inline void blk_flush_plug(struct task_struct *tsk)
-{
- struct blk_plug *plug = tsk->plug;
- if (plug)
- blk_flush_plug_list(plug, false);
-}
-
-static inline void blk_schedule_flush_plug(struct task_struct *tsk)
-{
- struct blk_plug *plug = tsk->plug;
-
- if (plug)
- blk_flush_plug_list(plug, true);
-}
+void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
return plug &&
- (!list_empty(&plug->mq_list) ||
- !list_empty(&plug->cb_list));
+ (plug->mq_list || !list_empty(&plug->cb_list));
}
int blkdev_issue_flush(struct block_device *bdev);
@@ -1245,23 +799,23 @@ long nr_blockdev_pages(void);
struct blk_plug {
};
-static inline void blk_start_plug(struct blk_plug *plug)
+static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
+ unsigned short nr_ios)
{
}
-static inline void blk_finish_plug(struct blk_plug *plug)
+static inline void blk_start_plug(struct blk_plug *plug)
{
}
-static inline void blk_flush_plug(struct task_struct *task)
+static inline void blk_finish_plug(struct blk_plug *plug)
{
}
-static inline void blk_schedule_flush_plug(struct task_struct *task)
+static inline void blk_flush_plug(struct blk_plug *plug, bool async)
{
}
-
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
return false;
@@ -1499,22 +1053,6 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
return offset << SECTOR_SHIFT;
}
-/*
- * Two cases of handling DISCARD merge:
- * If max_discard_segments > 1, the driver takes every bio
- * as a range and send them to controller together. The ranges
- * needn't to be contiguous.
- * Otherwise, the bios/requests will be handled as same as
- * others which should be contiguous.
- */
-static inline bool blk_discard_mergable(struct request *req)
-{
- if (req_op(req) == REQ_OP_DISCARD &&
- queue_max_discard_segments(req->q) > 1)
- return true;
- return false;
-}
-
static inline int bdev_discard_alignment(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1628,210 +1166,36 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
MODULE_ALIAS("block-major-" __stringify(major) "-*")
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-enum blk_integrity_flags {
- BLK_INTEGRITY_VERIFY = 1 << 0,
- BLK_INTEGRITY_GENERATE = 1 << 1,
- BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2,
- BLK_INTEGRITY_IP_CHECKSUM = 1 << 3,
-};
-
-struct blk_integrity_iter {
- void *prot_buf;
- void *data_buf;
- sector_t seed;
- unsigned int data_size;
- unsigned short interval;
- const char *disk_name;
-};
-
-typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
-typedef void (integrity_prepare_fn) (struct request *);
-typedef void (integrity_complete_fn) (struct request *, unsigned int);
-
-struct blk_integrity_profile {
- integrity_processing_fn *generate_fn;
- integrity_processing_fn *verify_fn;
- integrity_prepare_fn *prepare_fn;
- integrity_complete_fn *complete_fn;
- const char *name;
-};
-
-extern void blk_integrity_register(struct gendisk *, struct blk_integrity *);
-extern void blk_integrity_unregister(struct gendisk *);
-extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
-extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
- struct scatterlist *);
-extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
-{
- struct blk_integrity *bi = &disk->queue->integrity;
-
- if (!bi->profile)
- return NULL;
-
- return bi;
-}
-
-static inline
-struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
-{
- return blk_get_integrity(bdev->bd_disk);
-}
-
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
- return q->integrity.profile;
-}
-
-static inline bool blk_integrity_rq(struct request *rq)
-{
- return rq->cmd_flags & REQ_INTEGRITY;
-}
-
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
- unsigned int segs)
-{
- q->limits.max_integrity_segments = segs;
-}
-
-static inline unsigned short
-queue_max_integrity_segments(const struct request_queue *q)
-{
- return q->limits.max_integrity_segments;
-}
-
-/**
- * bio_integrity_intervals - Return number of integrity intervals for a bio
- * @bi: blk_integrity profile for device
- * @sectors: Size of the bio in 512-byte sectors
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the data integrity
- * interval size of the storage device. Convert the block layer sectors
- * to the appropriate number of integrity intervals.
- */
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
- unsigned int sectors)
-{
- return sectors >> (bi->interval_exp - 9);
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
- unsigned int sectors)
-{
- return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
-}
-
-/*
- * Return the first bvec that contains integrity data. Only drivers that are
- * limited to a single integrity segment should use this helper.
- */
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
-{
- if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
- return NULL;
- return rq->bio->bi_integrity->bip_vec;
-}
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-struct bio;
-struct block_device;
-struct gendisk;
-struct blk_integrity;
-
-static inline int blk_integrity_rq(struct request *rq)
-{
- return 0;
-}
-static inline int blk_rq_count_integrity_sg(struct request_queue *q,
- struct bio *b)
-{
- return 0;
-}
-static inline int blk_rq_map_integrity_sg(struct request_queue *q,
- struct bio *b,
- struct scatterlist *s)
-{
- return 0;
-}
-static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
-{
- return NULL;
-}
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
-{
- return NULL;
-}
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
- return false;
-}
-static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
-{
- return 0;
-}
-static inline void blk_integrity_register(struct gendisk *d,
- struct blk_integrity *b)
-{
-}
-static inline void blk_integrity_unregister(struct gendisk *d)
-{
-}
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
- unsigned int segs)
-{
-}
-static inline unsigned short queue_max_integrity_segments(const struct request_queue *q)
-{
- return 0;
-}
-
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
- unsigned int sectors)
-{
- return 0;
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
- unsigned int sectors)
-{
- return 0;
-}
-
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
-{
- return NULL;
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
+bool blk_crypto_register(struct blk_crypto_profile *profile,
+ struct request_queue *q);
-void blk_ksm_unregister(struct request_queue *q);
+void blk_crypto_unregister(struct request_queue *q);
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
-static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm,
- struct request_queue *q)
+static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
+ struct request_queue *q)
{
return true;
}
-static inline void blk_ksm_unregister(struct request_queue *q) { }
+static inline void blk_crypto_unregister(struct request_queue *q) { }
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+enum blk_unique_id {
+ /* these match the Designator Types specified in SPC */
+ BLK_UID_T10 = 1,
+ BLK_UID_EUI64 = 2,
+ BLK_UID_NAA = 3,
+};
+
+#define NFL4_UFLG_MASK 0x0000003F
struct block_device_operations {
- blk_qc_t (*submit_bio) (struct bio *bio);
+ void (*submit_bio)(struct bio *bio);
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
@@ -1847,6 +1211,9 @@ struct block_device_operations {
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
+ /* returns the length of the identifier or a negative errno: */
+ int (*get_unique_id)(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id id_type);
struct module *owner;
const struct pr_ops *pr_ops;
@@ -1869,60 +1236,6 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
-#ifdef CONFIG_BLK_DEV_ZONED
-bool blk_req_needs_zone_write_lock(struct request *rq);
-bool blk_req_zone_write_trylock(struct request *rq);
-void __blk_req_zone_write_lock(struct request *rq);
-void __blk_req_zone_write_unlock(struct request *rq);
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
- if (blk_req_needs_zone_write_lock(rq))
- __blk_req_zone_write_lock(rq);
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
- if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
- __blk_req_zone_write_unlock(rq);
-}
-
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
- return rq->q->seq_zones_wlock &&
- test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
- if (!blk_req_needs_zone_write_lock(rq))
- return true;
- return !blk_req_zone_is_write_locked(rq);
-}
-#else
-static inline bool blk_req_needs_zone_write_lock(struct request *rq)
-{
- return false;
-}
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
-}
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
- return false;
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
- return true;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
static inline void blk_wake_io_task(struct task_struct *waiter)
{
/*
@@ -1991,6 +1304,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
int sync_blockdev(struct block_device *bdev);
+int sync_blockdev_nowait(struct block_device *bdev);
+void sync_bdevs(bool wait);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
@@ -1999,10 +1314,54 @@ static inline int sync_blockdev(struct block_device *bdev)
{
return 0;
}
+static inline int sync_blockdev_nowait(struct block_device *bdev)
+{
+ return 0;
+}
+static inline void sync_bdevs(bool wait)
+{
+}
#endif
int fsync_bdev(struct block_device *bdev);
int freeze_bdev(struct block_device *bdev);
int thaw_bdev(struct block_device *bdev);
+struct io_comp_batch {
+ struct request *req_list;
+ bool need_ts;
+ void (*complete)(struct io_comp_batch *);
+};
+
+#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
+
+#define rq_list_add(listptr, rq) do { \
+ (rq)->rq_next = *(listptr); \
+ *(listptr) = rq; \
+} while (0)
+
+#define rq_list_pop(listptr) \
+({ \
+ struct request *__req = NULL; \
+ if ((listptr) && *(listptr)) { \
+ __req = *(listptr); \
+ *(listptr) = __req->rq_next; \
+ } \
+ __req; \
+})
+
+#define rq_list_peek(listptr) \
+({ \
+ struct request *__req = NULL; \
+ if ((listptr) && *(listptr)) \
+ __req = *(listptr); \
+ __req; \
+})
+
+#define rq_list_for_each(listptr, pos) \
+ for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \
+
+#define rq_list_next(rq) (rq)->rq_next
+#define rq_list_empty(list) ((list) == (struct request *) NULL)
+
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index a083e15df608..22501a293fa5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -2,7 +2,7 @@
#ifndef BLKTRACE_H
#define BLKTRACE_H
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/relay.h>
#include <linux/compat.h>
#include <uapi/linux/blktrace_api.h>
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 020a7d5bf470..3db6f6c95489 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -929,8 +929,11 @@ struct bpf_array_aux {
* stored in the map to make sure that all callers and callees have
* the same prog type and JITed flag.
*/
- enum bpf_prog_type type;
- bool jited;
+ struct {
+ spinlock_t lock;
+ enum bpf_prog_type type;
+ bool jited;
+ } owner;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..bbe1eefa4c8a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-#ifdef CONFIG_NET
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 0e9bdd42dafb..35c25dff651a 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -44,7 +44,7 @@ struct bvec_iter {
unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
-};
+} __packed;
struct bvec_iter_all {
struct bio_vec bv;
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
new file mode 100644
index 000000000000..a075b70b9a70
--- /dev/null
+++ b/include/linux/cc_platform.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#ifndef _LINUX_CC_PLATFORM_H
+#define _LINUX_CC_PLATFORM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+/**
+ * enum cc_attr - Confidential computing attributes
+ *
+ * These attributes represent confidential computing features that are
+ * currently active.
+ */
+enum cc_attr {
+ /**
+ * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active
+ *
+ * The platform/OS is running with active memory encryption. This
+ * includes running either as a bare-metal system or a hypervisor
+ * and actively using memory encryption or as a guest/virtual machine
+ * and actively using memory encryption.
+ *
+ * Examples include SME, SEV and SEV-ES.
+ */
+ CC_ATTR_MEM_ENCRYPT,
+
+ /**
+ * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active
+ *
+ * The platform/OS is running as a bare-metal system or a hypervisor
+ * and actively using memory encryption.
+ *
+ * Examples include SME.
+ */
+ CC_ATTR_HOST_MEM_ENCRYPT,
+
+ /**
+ * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active
+ *
+ * The platform/OS is running as a guest/virtual machine and actively
+ * using memory encryption.
+ *
+ * Examples include SEV and SEV-ES.
+ */
+ CC_ATTR_GUEST_MEM_ENCRYPT,
+
+ /**
+ * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active
+ *
+ * The platform/OS is running as a guest/virtual machine and actively
+ * using memory encryption and register state encryption.
+ *
+ * Examples include SEV-ES.
+ */
+ CC_ATTR_GUEST_STATE_ENCRYPT,
+};
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+
+/**
+ * cc_platform_has() - Checks if the specified cc_attr attribute is active
+ * @attr: Confidential computing attribute to check
+ *
+ * The cc_platform_has() function will return an indicator as to whether the
+ * specified Confidential Computing attribute is currently active.
+ *
+ * Context: Any context
+ * Return:
+ * * TRUE - Specified Confidential Computing attribute is active
+ * * FALSE - Specified Confidential Computing attribute is not active
+ */
+bool cc_platform_has(enum cc_attr attr);
+
+#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
+
+static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+
+#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
+
+#endif /* _LINUX_CC_PLATFORM_H */
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index c4fef00abdf3..0a89f111e00e 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -64,6 +64,7 @@ struct cdrom_device_info {
int for_data;
int (*exit)(struct cdrom_device_info *);
int mrw_mode_page;
+ __s64 last_media_change_ms;
};
struct cdrom_device_ops {
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index bd2b881c6b63..7bbd8df02532 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -41,8 +41,6 @@
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-
#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
@@ -124,6 +122,14 @@
#endif
/*
+ * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel,
+ * matching the defines used by Clang.
+ */
+#ifdef __SANITIZE_HWADDRESS__
+#define __SANITIZE_ADDRESS__
+#endif
+
+/*
* Turn individual warnings and errors on and off locally, depending
* on version.
*/
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index b6ff83a714ca..05ceb2e92b0e 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -290,11 +290,6 @@ struct ftrace_likely_data {
(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
-/* Compile time object size, -1 for unknown */
-#ifndef __compiletime_object_size
-# define __compiletime_object_size(obj) -1
-#endif
-
#ifdef __OPTIMIZE__
# define __compiletime_assert(condition, msg, prefix, suffix) \
do { \
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 4d7fced3a39f..7a14807c9d1a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -105,7 +105,7 @@ static inline void user_exit_irqoff(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
-static inline bool context_tracking_guest_enter(void) { return false; }
+static __always_inline bool context_tracking_guest_enter(void) { return false; }
static inline void context_tracking_guest_exit(void) { }
#endif /* !CONFIG_CONTEXT_TRACKING */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 832d8a74fa59..991911048857 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -72,6 +72,8 @@ enum cpuhp_state {
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
+ /* Must be after CPUHP_MM_VMSTAT_DEAD */
+ CPUHP_MM_DEMOTION_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
@@ -240,6 +242,8 @@ enum cpuhp_state {
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
+ /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+ CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 3f49e65169c6..dbb409d77d4f 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -47,8 +47,6 @@ extern int debug_locks_off(void);
# define locking_selftest() do { } while (0)
#endif
-struct task_struct;
-
#ifdef CONFIG_LOCKDEP
extern void debug_show_all_locks(void);
extern void debug_show_held_locks(struct task_struct *task);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 114553b487ef..a7df155ea49b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -576,9 +576,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md,
struct dm_table *t);
/*
- * Table keyslot manager functions
+ * Table blk_crypto_profile functions
*/
-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm);
+void dm_destroy_crypto_profile(struct blk_crypto_profile *profile);
/*-----------------------------------------------------------------
* Macros.
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index e1ca2080a1ff..39fefb86780b 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -173,7 +173,7 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj,
*/
static inline bool __must_check dma_resv_trylock(struct dma_resv *obj)
{
- return ww_mutex_trylock(&obj->lock);
+ return ww_mutex_trylock(&obj->lock, NULL);
}
/**
diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h
new file mode 100644
index 000000000000..8c3d45eca46b
--- /dev/null
+++ b/include/linux/dsa/mv88e6xxx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright 2021 NXP
+ */
+
+#ifndef _NET_DSA_TAG_MV88E6XXX_H
+#define _NET_DSA_TAG_MV88E6XXX_H
+
+#include <linux/if_vlan.h>
+
+#define MV88E6XXX_VID_STANDALONE 0
+#define MV88E6XXX_VID_BRIDGED (VLAN_N_VID - 1)
+
+#endif
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 435777a0073c..8ae999f587c4 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -5,7 +5,28 @@
#ifndef _NET_DSA_TAG_OCELOT_H
#define _NET_DSA_TAG_OCELOT_H
+#include <linux/kthread.h>
#include <linux/packing.h>
+#include <linux/skbuff.h>
+
+struct ocelot_skb_cb {
+ struct sk_buff *clone;
+ unsigned int ptp_class; /* valid only for clones */
+ u8 ptp_cmd;
+ u8 ts_id;
+};
+
+#define OCELOT_SKB_CB(skb) \
+ ((struct ocelot_skb_cb *)((skb)->cb))
+
+#define IFH_TAG_TYPE_C 0
+#define IFH_TAG_TYPE_S 1
+
+#define IFH_REW_OP_NOOP 0x0
+#define IFH_REW_OP_DSCP 0x1
+#define IFH_REW_OP_ONE_STEP_PTP 0x2
+#define IFH_REW_OP_TWO_STEP_PTP 0x3
+#define IFH_REW_OP_ORIGIN_PTP 0x5
#define OCELOT_TAG_LEN 16
#define OCELOT_SHORT_PREFIX_LEN 4
@@ -140,6 +161,17 @@
* +------+------+------+------+------+------+------+------+
*/
+struct felix_deferred_xmit_work {
+ struct dsa_port *dp;
+ struct sk_buff *skb;
+ struct kthread_work work;
+};
+
+struct felix_port {
+ void (*xmit_work_fn)(struct kthread_work *work);
+ struct kthread_worker *xmit_worker;
+};
+
static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val)
{
packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0);
@@ -215,4 +247,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid)
packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0);
}
+/* Determine the PTP REW_OP to use for injecting the given skb */
+static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
+{
+ struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
+ u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd;
+ u32 rew_op = 0;
+
+ if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) {
+ rew_op = ptp_cmd;
+ rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
+ } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
+ rew_op = ptp_cmd;
+ }
+
+ return rew_op;
+}
+
#endif
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 171106202fe5..9e07079528a5 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,6 +48,10 @@ struct sja1105_tagger_data {
spinlock_t meta_lock;
unsigned long state;
u8 ts_id;
+ /* Used on SJA1110 where meta frames are generated only for
+ * 2-step TX timestamps
+ */
+ struct sk_buff_head skb_txtstamp_queue;
};
struct sja1105_skb_cb {
@@ -69,42 +73,24 @@ struct sja1105_port {
bool hwts_tx_en;
};
-enum sja1110_meta_tstamp {
- SJA1110_META_TSTAMP_TX = 0,
- SJA1110_META_TSTAMP_RX = 1,
-};
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
-
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
- enum sja1110_meta_tstamp dir, u64 tstamp);
-
-#else
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS 8
-static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
- u8 ts_id, enum sja1110_meta_tstamp dir,
- u64 tstamp)
+static inline s64 ns_to_sja1105_ticks(s64 ns)
{
+ return ns / SJA1105_TICK_NS;
}
-#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
-
-extern const struct dsa_switch_ops sja1105_switch_ops;
-
-static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
{
- return dp->ds->ops == &sja1105_switch_ops;
+ return ticks * SJA1105_TICK_NS;
}
-#else
-
static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
{
- return false;
+ return true;
}
-#endif
-
#endif /* _NET_DSA_SJA1105_H */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
deleted file mode 100644
index ef9ceead3db1..000000000000
--- a/include/linux/elevator.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_ELEVATOR_H
-#define _LINUX_ELEVATOR_H
-
-#include <linux/percpu.h>
-#include <linux/hashtable.h>
-
-#ifdef CONFIG_BLOCK
-
-struct io_cq;
-struct elevator_type;
-#ifdef CONFIG_BLK_DEBUG_FS
-struct blk_mq_debugfs_attr;
-#endif
-
-/*
- * Return values from elevator merger
- */
-enum elv_merge {
- ELEVATOR_NO_MERGE = 0,
- ELEVATOR_FRONT_MERGE = 1,
- ELEVATOR_BACK_MERGE = 2,
- ELEVATOR_DISCARD_MERGE = 3,
-};
-
-struct blk_mq_alloc_data;
-struct blk_mq_hw_ctx;
-
-struct elevator_mq_ops {
- int (*init_sched)(struct request_queue *, struct elevator_type *);
- void (*exit_sched)(struct elevator_queue *);
- int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
- void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
- void (*depth_updated)(struct blk_mq_hw_ctx *);
-
- bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
- bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
- int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
- void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
- void (*requests_merged)(struct request_queue *, struct request *, struct request *);
- void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
- void (*prepare_request)(struct request *);
- void (*finish_request)(struct request *);
- void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
- struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
- bool (*has_work)(struct blk_mq_hw_ctx *);
- void (*completed_request)(struct request *, u64);
- void (*requeue_request)(struct request *);
- struct request *(*former_request)(struct request_queue *, struct request *);
- struct request *(*next_request)(struct request_queue *, struct request *);
- void (*init_icq)(struct io_cq *);
- void (*exit_icq)(struct io_cq *);
-};
-
-#define ELV_NAME_MAX (16)
-
-struct elv_fs_entry {
- struct attribute attr;
- ssize_t (*show)(struct elevator_queue *, char *);
- ssize_t (*store)(struct elevator_queue *, const char *, size_t);
-};
-
-/*
- * identifies an elevator type, such as AS or deadline
- */
-struct elevator_type
-{
- /* managed by elevator core */
- struct kmem_cache *icq_cache;
-
- /* fields provided by elevator implementation */
- struct elevator_mq_ops ops;
-
- size_t icq_size; /* see iocontext.h */
- size_t icq_align; /* ditto */
- struct elv_fs_entry *elevator_attrs;
- const char *elevator_name;
- const char *elevator_alias;
- const unsigned int elevator_features;
- struct module *elevator_owner;
-#ifdef CONFIG_BLK_DEBUG_FS
- const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
- const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
-#endif
-
- /* managed by elevator core */
- char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */
- struct list_head list;
-};
-
-#define ELV_HASH_BITS 6
-
-void elv_rqhash_del(struct request_queue *q, struct request *rq);
-void elv_rqhash_add(struct request_queue *q, struct request *rq);
-void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
-struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
-
-/*
- * each queue has an elevator_queue associated with it
- */
-struct elevator_queue
-{
- struct elevator_type *type;
- void *elevator_data;
- struct kobject kobj;
- struct mutex sysfs_lock;
- unsigned int registered:1;
- DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
-};
-
-/*
- * block elevator interface
- */
-extern enum elv_merge elv_merge(struct request_queue *, struct request **,
- struct bio *);
-extern void elv_merge_requests(struct request_queue *, struct request *,
- struct request *);
-extern void elv_merged_request(struct request_queue *, struct request *,
- enum elv_merge);
-extern bool elv_attempt_insert_merge(struct request_queue *, struct request *,
- struct list_head *);
-extern struct request *elv_former_request(struct request_queue *, struct request *);
-extern struct request *elv_latter_request(struct request_queue *, struct request *);
-void elevator_init_mq(struct request_queue *q);
-
-/*
- * io scheduler registration
- */
-extern int elv_register(struct elevator_type *);
-extern void elv_unregister(struct elevator_type *);
-
-/*
- * io scheduler sysfs switching
- */
-extern ssize_t elv_iosched_show(struct request_queue *, char *);
-extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
-
-extern bool elv_bio_merge_ok(struct request *, struct bio *);
-extern struct elevator_queue *elevator_alloc(struct request_queue *,
- struct elevator_type *);
-
-/*
- * Helper functions.
- */
-extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
-extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
-
-/*
- * rb support functions.
- */
-extern void elv_rb_add(struct rb_root *, struct request *);
-extern void elv_rb_del(struct rb_root *, struct request *);
-extern struct request *elv_rb_find(struct rb_root *, sector_t);
-
-/*
- * Insertion selection
- */
-#define ELEVATOR_INSERT_FRONT 1
-#define ELEVATOR_INSERT_BACK 2
-#define ELEVATOR_INSERT_SORT 3
-#define ELEVATOR_INSERT_REQUEUE 4
-#define ELEVATOR_INSERT_FLUSH 5
-#define ELEVATOR_INSERT_SORT_MERGE 6
-
-#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
-#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
-
-#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist)
-
-/*
- * Elevator features.
- */
-
-/* Supports zoned block devices sequential write constraint */
-#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
-/* Supports scheduling on multiple hardware queues */
-#define ELEVATOR_F_MQ_AWARE (1U << 1)
-
-#endif /* CONFIG_BLOCK */
-#endif
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 2aaa15779d50..957ebec35aad 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
#endif
}
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
/*
* These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
* extra segments containing the gate DSO contents. Dumping its
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 928c411bd509..c58d50451485 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src)
*/
static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr)
{
- ether_addr_copy(dev->dev_addr, addr);
+ __dev_addr_set(dev, addr, ETH_ALEN);
}
/**
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..b956eeb0f9a8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -586,8 +586,10 @@ struct bpf_prog {
struct bpf_prog_aux *aux; /* Auxiliary fields */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
/* Instructions for interpreter */
- struct sock_filter insns[0];
- struct bpf_insn insnsi[];
+ union {
+ DECLARE_FLEX_ARRAY(struct sock_filter, insns);
+ DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi);
+ };
};
struct sk_filter {
@@ -1051,6 +1053,7 @@ extern int bpf_jit_enable;
extern int bpf_jit_harden;
extern int bpf_jit_kallsyms;
extern long bpf_jit_limit;
+extern long bpf_jit_limit_max;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index aec8f30ab200..07967a450eaa 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -436,6 +436,12 @@ typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
void *header, void *data);
typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context,
dma_addr_t completed, void *data);
+
+union fw_iso_callback {
+ fw_iso_callback_t sc;
+ fw_iso_mc_callback_t mc;
+};
+
struct fw_iso_context {
struct fw_card *card;
int type;
@@ -443,10 +449,7 @@ struct fw_iso_context {
int speed;
bool drop_overflow_headers;
size_t header_size;
- union {
- fw_iso_callback_t sc;
- fw_iso_mc_callback_t mc;
- } callback;
+ union fw_iso_callback callback;
void *callback_data;
};
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index c12df59d3f5f..3e378b1fb0bc 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -83,9 +83,10 @@ struct fprop_local_percpu {
int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
-void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
-void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
- int max_frac);
+void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl,
+ long nr);
+void __fprop_add_percpu_max(struct fprop_global *p,
+ struct fprop_local_percpu *pl, int max_frac, long nr);
void fprop_fraction_percpu(struct fprop_global *p,
struct fprop_local_percpu *pl, unsigned long *numerator,
unsigned long *denominator);
@@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
unsigned long flags;
local_irq_save(flags);
- __fprop_inc_percpu(p, pl);
+ __fprop_add_percpu(p, pl, 1);
local_irq_restore(flags);
}
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index c1be37437e77..a6cd6815f249 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -2,6 +2,27 @@
#ifndef _LINUX_FORTIFY_STRING_H_
#define _LINUX_FORTIFY_STRING_H_
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)");
+
+#define __compiletime_strlen(p) \
+({ \
+ unsigned char *__p = (unsigned char *)(p); \
+ size_t __ret = (size_t)-1; \
+ size_t __p_size = __builtin_object_size(p, 1); \
+ if (__p_size != (size_t)-1) { \
+ size_t __p_len = __p_size - 1; \
+ if (__builtin_constant_p(__p[__p_len]) && \
+ __p[__p_len] == '\0') \
+ __ret = __builtin_strlen(__p); \
+ } \
+ __ret; \
+})
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
@@ -49,28 +70,38 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q)
return p;
}
-__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
{
- __kernel_size_t ret;
size_t p_size = __builtin_object_size(p, 1);
+ size_t p_len = __compiletime_strlen(p);
+ size_t ret;
- /* Work around gcc excess stack consumption issue */
- if (p_size == (size_t)-1 ||
- (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
- return __underlying_strlen(p);
- ret = strnlen(p, p_size);
- if (p_size <= ret)
+ /* We can take compile-time actions when maxlen is const. */
+ if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) {
+ /* If p is const, we can use its compile-time-known len. */
+ if (maxlen >= p_size)
+ return p_len;
+ }
+
+ /* Do not check characters beyond the end of p. */
+ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+ if (p_size <= ret && maxlen != ret)
fortify_panic(__func__);
return ret;
}
-extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
-__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+/* defined after fortified strnlen to reuse it. */
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
{
+ __kernel_size_t ret;
size_t p_size = __builtin_object_size(p, 1);
- __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
- if (p_size <= ret && maxlen != ret)
+ /* Give up if we don't know how large p is. */
+ if (p_size == (size_t)-1)
+ return __underlying_strlen(p);
+ ret = strnlen(p, p_size);
+ if (p_size <= ret)
fortify_panic(__func__);
return ret;
}
@@ -79,24 +110,27 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
{
- size_t ret;
size_t p_size = __builtin_object_size(p, 1);
size_t q_size = __builtin_object_size(q, 1);
+ size_t q_len; /* Full count of source string length. */
+ size_t len; /* Count of characters going into destination. */
if (p_size == (size_t)-1 && q_size == (size_t)-1)
return __real_strlcpy(p, q, size);
- ret = strlen(q);
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
-
- if (__builtin_constant_p(len) && len >= p_size)
+ q_len = strlen(q);
+ len = (q_len >= size) ? size - 1 : q_len;
+ if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) {
+ /* Write size is always larger than destination. */
+ if (len >= p_size)
__write_overflow();
+ }
+ if (size) {
if (len >= p_size)
fortify_panic(__func__);
__underlying_memcpy(p, q, len);
p[len] = '\0';
}
- return ret;
+ return q_len;
}
/* defined after fortified strnlen to reuse it */
@@ -280,7 +314,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
if (p_size == (size_t)-1 && q_size == (size_t)-1)
return __underlying_strcpy(p, q);
size = strlen(q) + 1;
- /* test here to use the more stringent object size */
+ /* Compile-time check for const size overflow. */
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ /* Run-time check for dynamic size overflow. */
if (p_size < size)
fortify_panic(__func__);
memcpy(p, q, size);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7a633353fd2..f3cfca5edc9a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -48,6 +48,7 @@
struct backing_dev_info;
struct bdi_writeback;
struct bio;
+struct io_comp_batch;
struct export_operations;
struct fiemap_extent_info;
struct hd_geometry;
@@ -329,16 +330,12 @@ struct kiocb {
randomized_struct_fields_start
loff_t ki_pos;
- void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+ void (*ki_complete)(struct kiocb *iocb, long ret);
void *private;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- union {
- unsigned int ki_cookie; /* for ->iopoll */
- struct wait_page_queue *ki_waitq; /* for async buffered IO */
- };
-
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
randomized_struct_fields_end
};
@@ -2075,7 +2072,8 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
- int (*iopoll)(struct kiocb *kiocb, bool spin);
+ int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
+ unsigned int flags);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -2498,6 +2496,8 @@ enum file_time_flags {
extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+
static inline void file_accessed(struct file *file)
{
if (!(file->f_flags & O_NOATIME))
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index e912ed9141d9..91ea9477e9bd 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -118,9 +118,6 @@ struct fscrypt_operations {
*/
bool (*empty_dir)(struct inode *inode);
- /* The filesystem's maximum ciphertext filename length, in bytes */
- unsigned int max_namelen;
-
/*
* Check whether the filesystem's inode numbers and UUID are stable,
* meaning that they will never be changed even by offline operations
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c68d83c87f83..59eabbc3a36b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,12 +12,10 @@
#include <linux/types.h>
#include <linux/kdev_t.h>
-#include <linux/rcupdate.h>
-#include <linux/slab.h>
-#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
#include <linux/blk_types.h>
-#include <asm/local.h>
+#include <linux/device.h>
+#include <linux/xarray.h>
extern const struct device_type disk_type;
extern struct device_type part_type;
@@ -26,14 +24,6 @@ extern struct class block_class;
#define DISK_MAX_PARTS 256
#define DISK_NAME_LEN 32
-#include <linux/major.h>
-#include <linux/device.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/workqueue.h>
-#include <linux/xarray.h>
-
#define PARTITION_META_INFO_VOLNAMELTH 64
/*
* Enough for the string representation of any kind of UUID plus NULL.
@@ -149,6 +139,7 @@ struct gendisk {
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
+#define GD_DEAD 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
@@ -222,6 +213,8 @@ static inline int add_disk(struct gendisk *disk)
}
extern void del_gendisk(struct gendisk *gp);
+void invalidate_disk(struct gendisk *disk);
+
void set_disk_ro(struct gendisk *disk, bool read_only);
static inline int get_disk_ro(struct gendisk *disk)
@@ -230,6 +223,11 @@ static inline int get_disk_ro(struct gendisk *disk)
test_bit(GD_READ_ONLY, &disk->state);
}
+static inline int bdev_read_only(struct block_device *bdev)
+{
+ return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+}
+
extern void disk_block_events(struct gendisk *disk);
extern void disk_unblock_events(struct gendisk *disk);
extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
@@ -247,7 +245,12 @@ static inline sector_t get_start_sect(struct block_device *bdev)
static inline sector_t bdev_nr_sectors(struct block_device *bdev)
{
- return i_size_read(bdev->bd_inode) >> 9;
+ return bdev->bd_nr_sectors;
+}
+
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+{
+ return bdev_nr_sectors(bdev) << SECTOR_SHIFT;
}
static inline sector_t get_capacity(struct gendisk *disk)
@@ -255,6 +258,12 @@ static inline sector_t get_capacity(struct gendisk *disk)
return bdev_nr_sectors(disk->part0);
}
+static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+{
+ return bdev_nr_sectors(sb->s_bdev) >>
+ (sb->s_blocksize_bits - SECTOR_SHIFT);
+}
+
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void blk_drop_partitions(struct gendisk *disk);
@@ -290,10 +299,6 @@ bool bdev_check_media_change(struct block_device *bdev);
int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
-/* for drivers/char/raw.c: */
-int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 55b2ec1f965a..3745efd21cf6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -520,15 +520,11 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
-#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
-{
- return 0;
-}
-#endif
struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+ nodemask_t *nodemask);
unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nodemask_t *nodemask, int nr_pages,
@@ -570,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
return __alloc_pages(gfp_mask, order, nid, NULL);
}
+static inline
+struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+ VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+ VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+
+ return __folio_alloc(gfp, order, nid, NULL);
+}
+
/*
* Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
* prefer the current CPU's closest node. Otherwise node must be valid and
@@ -586,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc(gfp_t gfp, unsigned order);
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
int node, bool hugepage);
@@ -596,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
+static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+{
+ return __folio_alloc_node(gfp, order, numa_node_id());
+}
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 4aa1031d3e4c..0a0b2b09b1b8 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page)
return __kmap_local_page_prot(page, kmap_prot);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ struct page *page = folio_page(folio, offset / PAGE_SIZE);
+ return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return __kmap_local_page_prot(page, prot);
@@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page)
return page_address(page);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ return page_address(&folio->page) + offset;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return kmap_local_page(page);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b4c49f9cc379..27cdd715c5f9 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -97,6 +97,43 @@ static inline void kmap_flush_unused(void);
static inline void *kmap_local_page(struct page *page);
/**
+ * kmap_local_folio - Map a page in this folio for temporary usage
+ * @folio: The folio containing the page.
+ * @offset: The byte offset within the folio which identifies the page.
+ *
+ * Requires careful handling when nesting multiple mappings because the map
+ * management is stack based. The unmap has to be in the reverse order of
+ * the map operation::
+ *
+ * addr1 = kmap_local_folio(folio1, offset1);
+ * addr2 = kmap_local_folio(folio2, offset2);
+ * ...
+ * kunmap_local(addr2);
+ * kunmap_local(addr1);
+ *
+ * Unmapping addr1 before addr2 is invalid and causes malfunction.
+ *
+ * Contrary to kmap() mappings the mapping is only valid in the context of
+ * the caller and cannot be handed to other contexts.
+ *
+ * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
+ * virtual address of the direct mapping. Only real highmem pages are
+ * temporarily mapped.
+ *
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_folio() can rely on this side effect.
+ *
+ * Context: Can be invoked from any context.
+ * Return: The virtual address of @offset.
+ */
+static inline void *kmap_local_folio(struct folio *folio, size_t offset);
+
+/**
* kmap_atomic - Atomically map a page for temporary usage - Deprecated!
* @page: Pointer to the page to be mapped
*
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f123e15d966e..f280f33ff223 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -251,15 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
}
/**
- * thp_head - Head page of a transparent huge page.
- * @page: Any page (tail, head or regular) found in the page cache.
- */
-static inline struct page *thp_head(struct page *page)
-{
- return compound_head(page);
-}
-
-/**
* thp_order - Order of a transparent huge page.
* @page: Head page of a transparent huge page.
*/
@@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
-static inline struct page *thp_head(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return page;
-}
-
static inline unsigned int thp_order(struct page *page)
{
VM_BUG_ON_PGFLAGS(PageTail(page), page);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 694264503119..ada3dd79cd08 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1143,7 +1143,7 @@ struct ieee80211_mgmt {
__le16 auth_transaction;
__le16 status_code;
/* possibly followed by Challenge text */
- u8 variable[0];
+ u8 variable[];
} __packed auth;
struct {
__le16 reason_code;
@@ -1152,26 +1152,26 @@ struct ieee80211_mgmt {
__le16 capab_info;
__le16 listen_interval;
/* followed by SSID and Supported rates */
- u8 variable[0];
+ u8 variable[];
} __packed assoc_req;
struct {
__le16 capab_info;
__le16 status_code;
__le16 aid;
/* followed by Supported rates */
- u8 variable[0];
+ u8 variable[];
} __packed assoc_resp, reassoc_resp;
struct {
__le16 capab_info;
__le16 status_code;
- u8 variable[0];
+ u8 variable[];
} __packed s1g_assoc_resp, s1g_reassoc_resp;
struct {
__le16 capab_info;
__le16 listen_interval;
u8 current_ap[ETH_ALEN];
/* followed by SSID and Supported rates */
- u8 variable[0];
+ u8 variable[];
} __packed reassoc_req;
struct {
__le16 reason_code;
@@ -1182,11 +1182,11 @@ struct ieee80211_mgmt {
__le16 capab_info;
/* followed by some of SSID, Supported rates,
* FH Params, DS Params, CF Params, IBSS Params, TIM */
- u8 variable[0];
+ u8 variable[];
} __packed beacon;
struct {
/* only variable items: SSID, Supported rates */
- u8 variable[0];
+ DECLARE_FLEX_ARRAY(u8, variable);
} __packed probe_req;
struct {
__le64 timestamp;
@@ -1194,7 +1194,7 @@ struct ieee80211_mgmt {
__le16 capab_info;
/* followed by some of SSID, Supported rates,
* FH Params, DS Params, CF Params, IBSS Params */
- u8 variable[0];
+ u8 variable[];
} __packed probe_resp;
struct {
u8 category;
@@ -1203,16 +1203,16 @@ struct ieee80211_mgmt {
u8 action_code;
u8 dialog_token;
u8 status_code;
- u8 variable[0];
+ u8 variable[];
} __packed wme_action;
struct{
u8 action_code;
- u8 variable[0];
+ u8 variable[];
} __packed chan_switch;
struct{
u8 action_code;
struct ieee80211_ext_chansw_ie data;
- u8 variable[0];
+ u8 variable[];
} __packed ext_chan_switch;
struct{
u8 action_code;
@@ -1228,7 +1228,7 @@ struct ieee80211_mgmt {
__le16 timeout;
__le16 start_seq_num;
/* followed by BA Extension */
- u8 variable[0];
+ u8 variable[];
} __packed addba_req;
struct{
u8 action_code;
@@ -1244,11 +1244,11 @@ struct ieee80211_mgmt {
} __packed delba;
struct {
u8 action_code;
- u8 variable[0];
+ u8 variable[];
} __packed self_prot;
struct{
u8 action_code;
- u8 variable[0];
+ u8 variable[];
} __packed mesh_action;
struct {
u8 action;
@@ -1292,7 +1292,7 @@ struct ieee80211_mgmt {
u8 toa[6];
__le16 tod_error;
__le16 toa_error;
- u8 variable[0];
+ u8 variable[];
} __packed ftm;
struct {
u8 action_code;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 24f8489583ca..63f4ea4dac9b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -313,8 +313,8 @@ int iomap_writepages(struct address_space *mapping,
struct iomap_dio_ops {
int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
unsigned flags);
- blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
- loff_t file_offset);
+ void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
+ loff_t file_offset);
};
/*
@@ -337,7 +337,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags);
ssize_t iomap_dio_complete(struct iomap_dio *dio);
-int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP
struct file;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c8293c817646..848e1e12c5c6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -524,9 +524,10 @@ struct irq_chip {
void (*irq_bus_lock)(struct irq_data *data);
void (*irq_bus_sync_unlock)(struct irq_data *data);
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
void (*irq_cpu_online)(struct irq_data *data);
void (*irq_cpu_offline)(struct irq_data *data);
-
+#endif
void (*irq_suspend)(struct irq_data *data);
void (*irq_resume)(struct irq_data *data);
void (*irq_pm_shutdown)(struct irq_data *data);
@@ -606,8 +607,10 @@ struct irqaction;
extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
extern void irq_cpu_online(void);
extern void irq_cpu_offline(void);
+#endif
extern int irq_set_affinity_locked(struct irq_data *data,
const struct cpumask *cpumask, bool force);
extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
@@ -1261,6 +1264,7 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));
* top-level IRQ handler.
*/
extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
+asmlinkage void generic_handle_arch_irq(struct pt_regs *regs);
#else
#ifndef set_handle_irq
#define set_handle_irq(handle_irq) \
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index ec2a47a81e42..8cd11a223260 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,7 @@
#define _LINUX_IRQ_WORK_H
#include <linux/smp_types.h>
+#include <linux/rcuwait.h>
/*
* An entry can be in one of four states:
@@ -16,11 +17,13 @@
struct irq_work {
struct __call_single_node node;
void (*func)(struct irq_work *);
+ struct rcuwait irqwait;
};
#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \
.node = { .u_flags = (_flags), }, \
.func = (_func), \
+ .irqwait = __RCUWAIT_INITIALIZER(irqwait), \
}
#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
@@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
}
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
+ return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
bool irq_work_queue(struct irq_work *work);
bool irq_work_queue_on(struct irq_work *work, int cpu);
diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 67351aac65ef..3a091d0710ae 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -14,8 +14,15 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/platform_device.h>
+/* Undefined on purpose */
+extern of_irq_init_cb_t typecheck_irq_init_cb;
+
+#define typecheck_irq_init_cb(fn) \
+ (__typecheck(typecheck_irq_init_cb, &fn) ? fn : fn)
+
/*
* This macro must be used by the different irqchip drivers to declare
* the association between their DT compatible string and their
@@ -23,24 +30,27 @@
*
* @name: name that must be unique across all IRQCHIP_DECLARE of the
* same file.
- * @compstr: compatible string of the irqchip driver
+ * @compat: compatible string of the irqchip driver
* @fn: initialization function
*/
-#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn)
+#define IRQCHIP_DECLARE(name, compat, fn) \
+ OF_DECLARE_2(irqchip, name, compat, typecheck_irq_init_cb(fn))
extern int platform_irqchip_probe(struct platform_device *pdev);
#define IRQCHIP_PLATFORM_DRIVER_BEGIN(drv_name) \
static const struct of_device_id drv_name##_irqchip_match_table[] = {
-#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, .data = fn },
+#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \
+ .data = typecheck_irq_init_cb(fn), },
#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \
{}, \
}; \
MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \
-static struct platform_driver drv_name##_driver = { \
- .probe = platform_irqchip_probe, \
+static struct platform_driver drv_name##_driver = { \
+ .probe = IS_ENABLED(CONFIG_IRQCHIP) ? \
+ platform_irqchip_probe : NULL, \
.driver = { \
.name = #drv_name, \
.owner = THIS_MODULE, \
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 59aea39785bf..93d270ca0c56 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -168,14 +168,7 @@ int generic_handle_irq(unsigned int irq);
* conversion failed.
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
-
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
-int handle_domain_irq(struct irq_domain *domain,
- unsigned int hwirq, struct pt_regs *regs);
-
-int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
- struct pt_regs *regs);
-#endif
+int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
#endif
/* Test to see if a driver has successfully requested an irq */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index dd874a1ee862..de5f5913374d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -89,7 +89,7 @@ static __always_inline bool kasan_enabled(void)
return static_branch_likely(&kasan_flag_enabled);
}
-static inline bool kasan_has_integrated_init(void)
+static inline bool kasan_hw_tags_enabled(void)
{
return kasan_enabled();
}
@@ -104,7 +104,7 @@ static inline bool kasan_enabled(void)
return IS_ENABLED(CONFIG_KASAN);
}
-static inline bool kasan_has_integrated_init(void)
+static inline bool kasan_hw_tags_enabled(void)
{
return false;
}
@@ -125,6 +125,11 @@ static __always_inline void kasan_free_pages(struct page *page,
#endif /* CONFIG_KASAN_HW_TAGS */
+static inline bool kasan_has_integrated_init(void)
+{
+ return kasan_hw_tags_enabled();
+}
+
#ifdef CONFIG_KASAN
struct kasan_cache {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2776423a587e..e8696e4a45aa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
#endif /* CONFIG_PREEMPT_* */
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void ___might_sleep(const char *file, int line, int preempt_offset);
-extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, unsigned int offsets);
+extern void __might_sleep(const char *file, int line);
extern void __cant_sleep(const char *file, int line, int preempt_offset);
extern void __cant_migrate(const char *file, int line);
@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
* supposed to.
*/
# define might_sleep() \
- do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
/**
* cant_sleep - annotation for functions that cannot sleep
*
@@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
*/
# define non_block_end() WARN_ON(current->non_block_count-- == 0)
#else
- static inline void ___might_sleep(const char *file, int line,
- int preempt_offset) { }
- static inline void __might_sleep(const char *file, int line,
- int preempt_offset) { }
+ static inline void __might_resched(const char *file, int line,
+ unsigned int offsets) { }
+static inline void __might_sleep(const char *file, int line) { }
# define might_sleep() do { might_resched(); } while (0)
# define cant_sleep() do { } while (0)
# define cant_migrate() do { } while (0)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44ae1a7eb9e3..69ae6b278464 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64,
enum cpu_usage_stat);
extern void account_steal_time(u64);
extern void account_idle_time(u64);
+extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h
deleted file mode 100644
index a27605e2f826..000000000000
--- a/include/linux/keyslot-manager.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2019 Google LLC
- */
-
-#ifndef __LINUX_KEYSLOT_MANAGER_H
-#define __LINUX_KEYSLOT_MANAGER_H
-
-#include <linux/bio.h>
-#include <linux/blk-crypto.h>
-
-struct blk_keyslot_manager;
-
-/**
- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
- * @keyslot_program: Program the specified key into the specified slot in the
- * inline encryption hardware.
- * @keyslot_evict: Evict key from the specified keyslot in the hardware.
- * The key is provided so that e.g. dm layers can evict
- * keys from the devices that they map over.
- * Returns 0 on success, -errno otherwise.
- *
- * This structure should be provided by storage device drivers when they set up
- * a keyslot manager - this structure holds the function ptrs that the keyslot
- * manager will use to manipulate keyslots in the hardware.
- */
-struct blk_ksm_ll_ops {
- int (*keyslot_program)(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot);
- int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot);
-};
-
-struct blk_keyslot_manager {
- /*
- * The struct blk_ksm_ll_ops that this keyslot manager will use
- * to perform operations like programming and evicting keys on the
- * device
- */
- struct blk_ksm_ll_ops ksm_ll_ops;
-
- /*
- * The maximum number of bytes supported for specifying the data unit
- * number.
- */
- unsigned int max_dun_bytes_supported;
-
- /*
- * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
- * whether a crypto mode and data unit size are supported. The i'th
- * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
- * size of (1 << i) is supported. We only support data unit sizes
- * that are powers of 2.
- */
- unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
-
- /* Device for runtime power management (NULL if none) */
- struct device *dev;
-
- /* Here onwards are *private* fields for internal keyslot manager use */
-
- unsigned int num_slots;
-
- /* Protects programming and evicting keys from the device */
- struct rw_semaphore lock;
-
- /* List of idle slots, with least recently used slot at front */
- wait_queue_head_t idle_slots_wait_queue;
- struct list_head idle_slots;
- spinlock_t idle_slots_lock;
-
- /*
- * Hash table which maps struct *blk_crypto_key to keyslots, so that we
- * can find a key's keyslot in O(1) time rather than O(num_slots).
- * Protected by 'lock'.
- */
- struct hlist_head *slot_hashtable;
- unsigned int log_slot_ht_size;
-
- /* Per-keyslot data */
- struct blk_ksm_keyslot *slots;
-};
-
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
-
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
- unsigned int num_slots);
-
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- struct blk_ksm_keyslot **slot_ptr);
-
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
-
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
-
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_config *cfg);
-
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key);
-
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
- const struct blk_keyslot_manager *child);
-
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm);
-
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
- struct blk_keyslot_manager *ksm_subset);
-
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
- struct blk_keyslot_manager *reference_ksm);
-
-#endif /* __LINUX_KEYSLOT_MANAGER_H */
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..a38a5bca1ba5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
-void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
#else /* !CONFIG_KSM */
@@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page,
{
}
-static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
{
}
#endif /* CONFIG_MMU */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c0c64f03e107..236ec689056a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -676,6 +676,18 @@ struct ata_ering {
struct ata_ering_entry ring[ATA_ERING_SIZE];
};
+struct ata_cpr {
+ u8 num;
+ u8 num_storage_elements;
+ u64 start_lba;
+ u64 num_lbas;
+};
+
+struct ata_cpr_log {
+ u8 nr_cpr;
+ struct ata_cpr cpr[];
+};
+
struct ata_device {
struct ata_link *link;
unsigned int devno; /* 0 or 1 */
@@ -735,6 +747,9 @@ struct ata_device {
u32 zac_zones_optimal_nonseq;
u32 zac_zones_max_open;
+ /* Concurrent positioning ranges */
+ struct ata_cpr_log *cpr_log;
+
/* error history */
int spdn_cnt;
/* ering is CLEAR_END, read comment above CLEAR_END */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9fe165beb0f9..467b94257105 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -481,23 +481,6 @@ do { \
#endif /* CONFIG_LOCK_STAT */
-#ifdef CONFIG_LOCKDEP
-
-/*
- * On lockdep we dont want the hand-coded irq-enable of
- * _raw_*_lock_flags() code, because lockdep assumes
- * that interrupts are not re-enabled during lock-acquire:
- */
-#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
- LOCK_CONTENDED((_lock), (try), (lock))
-
-#else /* CONFIG_LOCKDEP */
-
-#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
- lockfl((_lock), (flags))
-
-#endif /* CONFIG_LOCKDEP */
-
#ifdef CONFIG_PROVE_LOCKING
extern void print_irqtrace_events(struct task_struct *curr);
#else
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 3e726ace5c62..d22430840b53 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -21,7 +21,7 @@ enum lockdep_wait_type {
LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
- LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+ LD_WAIT_CONFIG, /* preemptible in PREEMPT_RT, spinlock_t etc.. */
#else
LD_WAIT_CONFIG = LD_WAIT_SPIN,
#endif
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 5c4a18a91f89..ae4526389261 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -16,10 +16,6 @@
#include <asm/mem_encrypt.h>
-#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
-
-static inline bool mem_encrypt_active(void) { return false; }
-
#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
#ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3096c9a0ee01..e34bf0cbdf55 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -369,7 +369,7 @@ enum page_memcg_data_flags {
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
-static inline bool PageMemcgKmem(struct page *page);
+static inline bool folio_memcg_kmem(struct folio *folio);
/*
* After the initialization objcg->memcg is always pointing at
@@ -384,89 +384,95 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
}
/*
- * __page_memcg - get the memory cgroup associated with a non-kmem page
- * @page: a pointer to the page struct
+ * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * kmem pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * kmem folios.
*/
-static inline struct mem_cgroup *__page_memcg(struct page *page)
+static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * __page_objcg - get the object cgroup associated with a kmem page
- * @page: a pointer to the page struct
+ * __folio_objcg - get the object cgroup associated with a kmem folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the object cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the object cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper object cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * LRU pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * LRU folios.
*/
-static inline struct obj_cgroup *__page_objcg(struct page *page)
+static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * page_memcg - get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+ * folio_memcg - Get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
*
- * For a non-kmem page any of the following ensures page and memcg binding
+ * For a non-kmem folio any of the following ensures folio and memcg binding
* stability:
*
- * - the page lock
+ * - the folio lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
*
- * For a kmem page a caller should hold an rcu read lock to protect memcg
- * associated with a kmem page from being released.
+ * For a kmem folio a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem folio from being released.
*/
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ if (folio_memcg_kmem(folio))
+ return obj_cgroup_memcg(__folio_objcg(folio));
+ return __folio_memcg(folio);
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
- if (PageMemcgKmem(page))
- return obj_cgroup_memcg(__page_objcg(page));
- else
- return __page_memcg(page);
+ return folio_memcg(page_folio(page));
}
-/*
- * page_memcg_rcu - locklessly get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+/**
+ * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
+ *
+ * Return: A pointer to the memory cgroup associated with the folio,
+ * or NULL.
*/
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
- unsigned long memcg_data = READ_ONCE(page->memcg_data);
+ unsigned long memcg_data = READ_ONCE(folio->memcg_data);
- VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
WARN_ON_ONCE(!rcu_read_lock_held());
if (memcg_data & MEMCG_DATA_KMEM) {
@@ -523,17 +529,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
#ifdef CONFIG_MEMCG_KMEM
/*
- * PageMemcgKmem - check if the page has MemcgKmem flag set
- * @page: a pointer to the page struct
+ * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
+ * @folio: Pointer to the folio.
*
- * Checks if the page has MemcgKmem flag set. The caller must ensure that
- * the page has an associated memory cgroup. It's not safe to call this function
- * against some types of pages, e.g. slab pages.
+ * Checks if the folio has MemcgKmem flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
*/
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
- VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
- return page->memcg_data & MEMCG_DATA_KMEM;
+ VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
+ VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
+ return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
@@ -577,7 +584,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#else
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
return false;
}
@@ -593,6 +600,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#endif
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return folio_memcg_kmem(page_folio(page));
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return (memcg == root_mem_cgroup);
@@ -684,26 +696,47 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
page_counter_read(&memcg->memory);
}
-int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
+
+/**
+ * mem_cgroup_charge - Charge a newly allocated folio to a cgroup.
+ * @folio: Folio to charge.
+ * @mm: mm context of the allocating task.
+ * @gfp: Reclaim mode.
+ *
+ * Try to charge @folio to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp if necessary. If @mm is NULL, try to
+ * charge to the active memcg.
+ *
+ * Do not use this for folios allocated for swapin.
+ *
+ * Return: 0 on success. Otherwise, an error code is returned.
+ */
+static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
+ gfp_t gfp)
{
if (mem_cgroup_disabled())
return 0;
- return __mem_cgroup_charge(page, mm, gfp_mask);
+ return __mem_cgroup_charge(folio, mm, gfp);
}
int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
-void __mem_cgroup_uncharge(struct page *page);
-static inline void mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct folio *folio);
+
+/**
+ * mem_cgroup_uncharge - Uncharge a folio.
+ * @folio: Folio to uncharge.
+ *
+ * Uncharge a folio previously charged with mem_cgroup_charge().
+ */
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
if (mem_cgroup_disabled())
return;
- __mem_cgroup_uncharge(page);
+ __mem_cgroup_uncharge(folio);
}
void __mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -714,7 +747,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
__mem_cgroup_uncharge_list(page_list);
}
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
+void mem_cgroup_migrate(struct folio *old, struct folio *new);
/**
* mem_cgroup_lruvec - get the lru list vector for a memcg & node
@@ -753,33 +786,33 @@ out:
}
/**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
- * @page: the page
+ * folio_lruvec - return lruvec for isolating/putting an LRU folio
+ * @folio: Pointer to the folio.
*
- * This function relies on page->mem_cgroup being stable.
+ * This function relies on folio->mem_cgroup being stable.
*/
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
- struct mem_cgroup *memcg = page_memcg(page);
+ struct mem_cgroup *memcg = folio_memcg(folio);
- VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
- return mem_cgroup_lruvec(memcg, pgdat);
+ VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio);
+ return mem_cgroup_lruvec(memcg, folio_pgdat(folio));
}
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
-struct lruvec *lock_page_lruvec(struct page *page);
-struct lruvec *lock_page_lruvec_irq(struct page *page);
-struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+struct lruvec *folio_lruvec_lock(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flags);
#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
#else
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
#endif
@@ -947,6 +980,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
extern bool cgroup_memory_noswap;
#endif
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
void lock_page_memcg(struct page *page);
void unlock_page_memcg(struct page *page);
@@ -1115,12 +1150,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
#define MEM_CGROUP_ID_SHIFT 0
#define MEM_CGROUP_ID_MAX 0
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ return NULL;
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
@@ -1131,6 +1171,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return NULL;
}
+static inline bool folio_memcg_kmem(struct folio *folio)
+{
+ return false;
+}
+
static inline bool PageMemcgKmem(struct page *page)
{
return false;
@@ -1179,8 +1224,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
return false;
}
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+static inline int mem_cgroup_charge(struct folio *folio,
+ struct mm_struct *mm, gfp_t gfp)
{
return 0;
}
@@ -1195,7 +1240,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
{
}
-static inline void mem_cgroup_uncharge(struct page *page)
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
}
@@ -1203,7 +1248,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_migrate(struct page *old, struct page *new)
+static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
{
}
@@ -1213,14 +1258,14 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return &pgdat->__lruvec;
}
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
-
+ struct pglist_data *pgdat = folio_pgdat(folio);
return &pgdat->__lruvec;
}
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
@@ -1250,26 +1295,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
-static inline struct lruvec *lock_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irq(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flagsp)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
return &pgdat->__lruvec;
@@ -1356,6 +1401,14 @@ static inline void unlock_page_memcg(struct page *page)
{
}
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
static inline void mem_cgroup_handle_over_high(void)
{
}
@@ -1517,38 +1570,39 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
}
/* Test requires a stable page->memcg binding, see page_memcg() */
-static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+static inline bool folio_matches_lruvec(struct folio *folio,
+ struct lruvec *lruvec)
{
- return lruvec_pgdat(lruvec) == page_pgdat(page) &&
- lruvec_memcg(lruvec) == page_memcg(page);
+ return lruvec_pgdat(lruvec) == folio_pgdat(folio) &&
+ lruvec_memcg(lruvec) == folio_memcg(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
struct lruvec *locked_lruvec)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irq(locked_lruvec);
}
- return lock_page_lruvec_irq(page);
+ return folio_lruvec_lock_irq(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
struct lruvec *locked_lruvec, unsigned long *flags)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
}
- return lock_page_lruvec_irqsave(page, flags);
+ return folio_lruvec_lock_irqsave(folio, flags);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -1558,17 +1612,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
unsigned long *pheadroom, unsigned long *pdirty,
unsigned long *pwriteback);
-void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
struct bdi_writeback *wb);
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
if (mem_cgroup_disabled())
return;
- if (unlikely(&page_memcg(page)->css != wb->memcg_css))
- mem_cgroup_track_foreign_dirty_slowpath(page, wb);
+ if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
@@ -1588,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
{
}
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 7efc0a7c14c9..182c606adb06 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#else
-#define hotplug_memory_notifier(fn, pri) ({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+ return 0;
+}
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index c8077e936691..0d2aeb9b0f66 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -57,6 +57,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
+int folio_migrate_mapping(struct address_space *mapping,
+ struct folio *newfolio, struct folio *folio, int extra_count);
#else
static inline void putback_movable_pages(struct list_head *l) {}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..f17d2101af7a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3638d09ba77..993204a6c1a1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits {
u8 reserved_at_0[0x8];
u8 local_port[0x8];
u8 reserved_at_10[0x10];
+
u8 entropy_force_cap[0x1];
u8 entropy_calc_cap[0x1];
u8 entropy_gre_calc_cap[0x1];
- u8 reserved_at_23[0x1b];
+ u8 reserved_at_23[0xf];
+ u8 rx_ts_over_crc_cap[0x1];
+ u8 reserved_at_33[0xb];
u8 fcs_cap[0x1];
u8 reserved_at_3f[0x1];
+
u8 entropy_force[0x1];
u8 entropy_calc[0x1];
u8 entropy_gre_calc[0x1];
- u8 reserved_at_43[0x1b];
+ u8 reserved_at_43[0xf];
+ u8 rx_ts_over_crc[0x1];
+ u8 reserved_at_53[0xb];
u8 fcs_chk[0x1];
u8 reserved_at_5f[0x1];
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..40ff114aaf9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,10 +36,7 @@
struct mempolicy;
struct anon_vma;
struct anon_vma_chain;
-struct file_ra_state;
struct user_struct;
-struct writeback_control;
-struct bdi_writeback;
struct pt_regs;
extern int sysctl_page_lock_unfairness;
@@ -216,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
-/*
- * Any attempt to mark this function as static leads to build failure
- * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
- * is referred to by BPF code. This must be visible for error injection.
- */
-int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp, void **shadowp);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
@@ -748,13 +738,18 @@ static inline int put_page_testzero(struct page *page)
return page_ref_dec_and_test(page);
}
+static inline int folio_put_testzero(struct folio *folio)
+{
+ return put_page_testzero(&folio->page);
+}
+
/*
* Try to grab a ref unless the page has a refcount of zero, return false if
* that is the case.
* This can be called when MMU is off so it must not access
* any of the virtual mappings.
*/
-static inline int get_page_unless_zero(struct page *page)
+static inline bool get_page_unless_zero(struct page *page)
{
return page_ref_add_unless(page, 1, 0);
}
@@ -907,7 +902,7 @@ void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
-void copy_huge_page(struct page *dst, struct page *src);
+void folio_copy(struct folio *dst, struct folio *src);
/*
* Compound pages have a destructor function. Provide a
@@ -950,6 +945,20 @@ static inline unsigned int compound_order(struct page *page)
return page[1].compound_order;
}
+/**
+ * folio_order - The allocation order of a folio.
+ * @folio: The folio.
+ *
+ * A folio is composed of 2^order pages. See get_order() for the definition
+ * of order.
+ *
+ * Return: The order of the folio.
+ */
+static inline unsigned int folio_order(struct folio *folio)
+{
+ return compound_order(&folio->page);
+}
+
static inline bool hpage_pincount_available(struct page *page)
{
/*
@@ -1131,6 +1140,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
+static inline enum zone_type folio_zonenum(const struct folio *folio)
+{
+ return page_zonenum(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
static inline bool is_zone_device_page(const struct page *page)
{
@@ -1200,18 +1214,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
/* 127: arbitrary random number, small enough to assemble well */
-#define page_ref_zero_or_close_to_overflow(page) \
- ((unsigned int) page_ref_count(page) + 127u <= 127u)
+#define folio_ref_zero_or_close_to_overflow(folio) \
+ ((unsigned int) folio_ref_count(folio) + 127u <= 127u)
+
+/**
+ * folio_get - Increment the reference count on a folio.
+ * @folio: The folio.
+ *
+ * Context: May be called in any context, as long as you know that
+ * you have a refcount on the folio. If you do not already have one,
+ * folio_try_get() may be the right interface for you to use.
+ */
+static inline void folio_get(struct folio *folio)
+{
+ VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
+ folio_ref_inc(folio);
+}
static inline void get_page(struct page *page)
{
- page = compound_head(page);
- /*
- * Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_refcount.
- */
- VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
- page_ref_inc(page);
+ folio_get(page_folio(page));
}
bool __must_check try_grab_page(struct page *page, unsigned int flags);
@@ -1228,9 +1250,28 @@ static inline __must_check bool try_get_page(struct page *page)
return true;
}
+/**
+ * folio_put - Decrement the reference count on a folio.
+ * @folio: The folio.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately. Do not access the memory or the struct folio
+ * after calling folio_put() unless you can be sure that it wasn't the
+ * last reference.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folio_put(struct folio *folio)
+{
+ if (folio_put_testzero(folio))
+ __put_page(&folio->page);
+}
+
static inline void put_page(struct page *page)
{
- page = compound_head(page);
+ struct folio *folio = page_folio(page);
/*
* For devmap managed pages we need to catch refcount transition from
@@ -1238,13 +1279,12 @@ static inline void put_page(struct page *page)
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
- if (page_is_devmap_managed(page)) {
- put_devmap_managed_page(page);
+ if (page_is_devmap_managed(&folio->page)) {
+ put_devmap_managed_page(&folio->page);
return;
}
- if (put_page_testzero(page))
- __put_page(page);
+ folio_put(folio);
}
/*
@@ -1379,6 +1419,11 @@ static inline int page_to_nid(const struct page *page)
}
#endif
+static inline int folio_nid(const struct folio *folio)
+{
+ return page_to_nid(&folio->page);
+}
+
#ifdef CONFIG_NUMA_BALANCING
static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
@@ -1546,6 +1591,16 @@ static inline pg_data_t *page_pgdat(const struct page *page)
return NODE_DATA(page_to_nid(page));
}
+static inline struct zone *folio_zone(const struct folio *folio)
+{
+ return page_zone(&folio->page);
+}
+
+static inline pg_data_t *folio_pgdat(const struct folio *folio)
+{
+ return page_pgdat(&folio->page);
+}
+
#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
@@ -1559,6 +1614,20 @@ static inline unsigned long page_to_section(const struct page *page)
}
#endif
+/**
+ * folio_pfn - Return the Page Frame Number of a folio.
+ * @folio: The folio.
+ *
+ * A folio may contain multiple pages. The pages have consecutive
+ * Page Frame Numbers.
+ *
+ * Return: The Page Frame Number of the first page in the folio.
+ */
+static inline unsigned long folio_pfn(struct folio *folio)
+{
+ return page_to_pfn(&folio->page);
+}
+
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
static inline bool is_pinnable_page(struct page *page)
@@ -1595,6 +1664,89 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
+/**
+ * folio_nr_pages - The number of pages in the folio.
+ * @folio: The folio.
+ *
+ * Return: A positive power of two.
+ */
+static inline long folio_nr_pages(struct folio *folio)
+{
+ return compound_nr(&folio->page);
+}
+
+/**
+ * folio_next - Move to the next physical folio.
+ * @folio: The folio we're currently operating on.
+ *
+ * If you have physically contiguous memory which may span more than
+ * one folio (eg a &struct bio_vec), use this function to move from one
+ * folio to the next. Do not use it if the memory is only virtually
+ * contiguous as the folios are almost certainly not adjacent to each
+ * other. This is the folio equivalent to writing ``page++``.
+ *
+ * Context: We assume that the folios are refcounted and/or locked at a
+ * higher level and do not adjust the reference counts.
+ * Return: The next struct folio.
+ */
+static inline struct folio *folio_next(struct folio *folio)
+{
+ return (struct folio *)folio_page(folio, folio_nr_pages(folio));
+}
+
+/**
+ * folio_shift - The size of the memory described by this folio.
+ * @folio: The folio.
+ *
+ * A folio represents a number of bytes which is a power-of-two in size.
+ * This function tells you which power-of-two the folio is. See also
+ * folio_size() and folio_order().
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The base-2 logarithm of the size of this folio.
+ */
+static inline unsigned int folio_shift(struct folio *folio)
+{
+ return PAGE_SHIFT + folio_order(folio);
+}
+
+/**
+ * folio_size - The number of bytes in a folio.
+ * @folio: The folio.
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The number of bytes in this folio.
+ */
+static inline size_t folio_size(struct folio *folio)
+{
+ return PAGE_SIZE << folio_order(folio);
+}
+
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
+
+#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
+static inline int arch_make_folio_accessible(struct folio *folio)
+{
+ int ret;
+ long i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ ret = arch_make_page_accessible(folio_page(folio, i));
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+#endif
+
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
@@ -1635,19 +1787,6 @@ void page_address_init(void);
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
-extern struct address_space *page_mapping(struct page *page);
-
-extern struct address_space *__page_file_mapping(struct page *);
-
-static inline
-struct address_space *page_file_mapping(struct page *page)
-{
- if (unlikely(PageSwapCache(page)))
- return __page_file_mapping(page);
-
- return page->mapping;
-}
-
extern pgoff_t __page_file_index(struct page *page);
/*
@@ -1662,7 +1801,7 @@ static inline pgoff_t page_index(struct page *page)
}
bool page_mapped(struct page *page);
-struct address_space *page_mapping(struct page *page);
+bool folio_mapped(struct folio *folio);
/*
* Return true only if the page has been allocated with
@@ -1700,6 +1839,7 @@ extern void pagefault_out_of_memory(void);
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
+#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
/*
* Flags passed to show_mem() and show_free_areas() to suppress output in
@@ -1854,20 +1994,9 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
-int redirty_page_for_writepage(struct writeback_control *wbc,
- struct page *page);
-void account_page_cleaned(struct page *page, struct address_space *mapping,
- struct bdi_writeback *wb);
-int set_page_dirty(struct page *page);
+bool folio_mark_dirty(struct folio *folio);
+bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
-void __cancel_dirty_page(struct page *page);
-static inline void cancel_dirty_page(struct page *page)
-{
- /* Avoid atomic ops, locking, etc. when not actually needed. */
- if (PageDirty(page))
- __cancel_dirty_page(page);
-}
-int clear_page_dirty_for_io(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
@@ -2659,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-/* mm/page-writeback.c */
-int __must_check write_one_page(struct page *page);
-void task_dirty_inc(struct task_struct *tsk);
-
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355ea1ee32bd..e2ec68b0515c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -6,27 +6,33 @@
#include <linux/swap.h>
/**
- * page_is_file_lru - should the page be on a file LRU or anon LRU?
- * @page: the page to test
- *
- * Returns 1 if @page is a regular filesystem backed page cache page or a lazily
- * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal
- * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by
- * functions that manipulate the LRU lists, to sort a page onto the right LRU
- * list.
+ * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
+ * @folio: The folio to test.
*
* We would like to get this info without a page flag, but the state
- * needs to survive until the page is last deleted from the LRU, which
+ * needs to survive until the folio is last deleted from the LRU, which
* could be as far down as __page_cache_release.
+ *
+ * Return: An integer (not a boolean!) used to sort a folio onto the
+ * right LRU list and to account folios correctly.
+ * 1 if @folio is a regular filesystem backed page cache folio
+ * or a lazily freed anonymous folio (e.g. via MADV_FREE).
+ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
+ * ram or swap backed folio.
*/
+static inline int folio_is_file_lru(struct folio *folio)
+{
+ return !folio_test_swapbacked(folio);
+}
+
static inline int page_is_file_lru(struct page *page)
{
- return !PageSwapBacked(page);
+ return folio_is_file_lru(page_folio(page));
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
- int nr_pages)
+ long nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
}
/**
- * __clear_page_lru_flags - clear page lru flags before releasing a page
- * @page: the page that was on lru and now has a zero reference
+ * __folio_clear_lru_flags - Clear page lru flags before releasing a page.
+ * @folio: The folio that was on lru and now has a zero reference.
*/
-static __always_inline void __clear_page_lru_flags(struct page *page)
+static __always_inline void __folio_clear_lru_flags(struct folio *folio)
{
- VM_BUG_ON_PAGE(!PageLRU(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);
- __ClearPageLRU(page);
+ __folio_clear_lru(folio);
/* this shouldn't happen, so leave the flags to bad_page() */
- if (PageActive(page) && PageUnevictable(page))
+ if (folio_test_active(folio) && folio_test_unevictable(folio))
return;
- __ClearPageActive(page);
- __ClearPageUnevictable(page);
+ __folio_clear_active(folio);
+ __folio_clear_unevictable(folio);
+}
+
+static __always_inline void __clear_page_lru_flags(struct page *page)
+{
+ __folio_clear_lru_flags(page_folio(page));
}
/**
- * page_lru - which LRU list should a page be on?
- * @page: the page to test
+ * folio_lru_list - Which LRU list should a folio be on?
+ * @folio: The folio to test.
*
- * Returns the LRU list a page should be on, as an index
+ * Return: The LRU list a folio should be on, as an index
* into the array of LRU lists.
*/
-static __always_inline enum lru_list page_lru(struct page *page)
+static __always_inline enum lru_list folio_lru_list(struct folio *folio)
{
enum lru_list lru;
- VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+ VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
- if (PageUnevictable(page))
+ if (folio_test_unevictable(folio))
return LRU_UNEVICTABLE;
- lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
- if (PageActive(page))
+ lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+ if (folio_test_active(folio))
lru += LRU_ACTIVE;
return lru;
}
+static __always_inline
+void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add(&folio->lru, &lruvec->lists[lru]);
+}
+
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio_tail(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add_tail(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ list_del(&folio->lru);
+ update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
+ -folio_nr_pages(folio));
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec)
{
- list_del(&page->lru);
- update_lru_size(lruvec, page_lru(page), page_zonenum(page),
- -thp_nr_pages(page));
+ lruvec_del_folio(lruvec, page_folio(page));
}
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..f92686cf2cdb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
+#include <linux/rcupdate.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
@@ -239,6 +240,72 @@ struct page {
#endif
} _struct_page_alignment;
+/**
+ * struct folio - Represents a contiguous set of bytes.
+ * @flags: Identical to the page flags.
+ * @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mapping: The file this page belongs to, or refers to the anon_vma for
+ * anonymous memory.
+ * @index: Offset within the file, in units of pages. For anonymous memory,
+ * this is the index from the beginning of the mmap.
+ * @private: Filesystem per-folio data (see folio_attach_private()).
+ * Used for swp_entry_t if folio_test_swapcache().
+ * @_mapcount: Do not access this member directly. Use folio_mapcount() to
+ * find out how many times this folio is mapped by userspace.
+ * @_refcount: Do not access this member directly. Use folio_ref_count()
+ * to find how many references there are to this folio.
+ * @memcg_data: Memory Control Group data.
+ *
+ * A folio is a physically, virtually and logically contiguous set
+ * of bytes. It is a power-of-two in size, and it is aligned to that
+ * same power-of-two. It is at least as large as %PAGE_SIZE. If it is
+ * in the page cache, it is at a file offset which is a multiple of that
+ * power-of-two. It may be mapped into userspace at an address which is
+ * at an arbitrary page offset, but its kernel virtual address is aligned
+ * to its size.
+ */
+struct folio {
+ /* private: don't document the anon union */
+ union {
+ struct {
+ /* public: */
+ unsigned long flags;
+ struct list_head lru;
+ struct address_space *mapping;
+ pgoff_t index;
+ void *private;
+ atomic_t _mapcount;
+ atomic_t _refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+ /* private: the union with struct page is transitional */
+ };
+ struct page page;
+ };
+};
+
+static_assert(sizeof(struct page) == sizeof(struct folio));
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
+FOLIO_MATCH(flags, flags);
+FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(compound_head, lru);
+FOLIO_MATCH(index, index);
+FOLIO_MATCH(private, private);
+FOLIO_MATCH(_mapcount, _mapcount);
+FOLIO_MATCH(_refcount, _refcount);
+#ifdef CONFIG_MEMCG
+FOLIO_MATCH(memcg_data, memcg_data);
+#endif
+#undef FOLIO_MATCH
+
+static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
+{
+ struct page *tail = &folio->page + 1;
+ return &tail->compound_mapcount;
+}
+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
@@ -257,6 +324,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page)
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+/*
+ * page_private can be used on tail pages. However, PagePrivate is only
+ * checked by the VM on the head page. So page_private on the tail pages
+ * should be used for data that's ancillary to the head page (eg attaching
+ * buffer heads to tail pages after attaching buffer heads to the head page)
+ */
#define page_private(page) ((page)->private)
static inline void set_page_private(struct page *page, unsigned long private)
@@ -264,6 +337,11 @@ static inline void set_page_private(struct page *page, unsigned long private)
page->private = private;
}
+static inline void *folio_get_private(struct folio *folio)
+{
+ return folio->private;
+}
+
struct page_frag_cache {
void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@ -572,6 +650,9 @@ struct mm_struct {
bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+ struct rcu_head delayed_drop;
+#endif
#ifdef CONFIG_HUGETLB_PAGE
atomic_long_t hugetlb_usage;
#endif
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0c0c9a0fdf57..52eae8c45b8d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,7 +15,7 @@
#include <linux/mmc/card.h>
#include <linux/mmc/pm.h>
#include <linux/dma-direction.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
struct mmc_ios {
unsigned int clock; /* clock rate */
@@ -492,7 +492,7 @@ struct mmc_host {
/* Inline encryption support */
#ifdef CONFIG_MMC_CRYPTO
- struct blk_keyslot_manager ksm;
+ struct blk_crypto_profile crypto_profile;
#endif
/* Host Software Queue support */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 1935d4c72d10..d7285f8148a3 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_BUG_ON_FOLIO(cond, folio) \
+ do { \
+ if (unlikely(cond)) { \
+ dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\
+ BUG(); \
+ } \
+ } while (0)
#define VM_BUG_ON_VMA(cond, vma) \
do { \
if (unlikely(cond)) { \
@@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm);
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond)
#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d6a4158a9a6..12c4177f7703 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -22,6 +22,7 @@
* Overload PG_private_2 to give us PG_fscache - this is used to indicate that
* a page is currently backed by a local disk cache
*/
+#define folio_test_fscache(folio) folio_test_private_2(folio)
#define PageFsCache(page) PagePrivate2((page))
#define SetPageFsCache(page) SetPagePrivate2((page))
#define ClearPageFsCache(page) ClearPagePrivate2((page))
@@ -29,60 +30,80 @@
#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
/**
- * set_page_fscache - Set PG_fscache on a page and take a ref
- * @page: The page.
+ * folio_start_fscache - Start an fscache write on a folio.
+ * @folio: The folio.
*
- * Set the PG_fscache (PG_private_2) flag on a page and take the reference
- * needed for the VM to handle its lifetime correctly. This sets the flag and
- * takes the reference unconditionally, so care must be taken not to set the
- * flag again if it's already set.
+ * Call this function before writing a folio to a local cache. Starting a
+ * second write before the first one finishes is not allowed.
*/
-static inline void set_page_fscache(struct page *page)
+static inline void folio_start_fscache(struct folio *folio)
{
- set_page_private_2(page);
+ VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
+ folio_get(folio);
+ folio_set_private_2(folio);
}
/**
- * end_page_fscache - Clear PG_fscache and release any waiters
- * @page: The page
- *
- * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers
- * waiting for this. The page ref held for PG_private_2 being set is released.
+ * folio_end_fscache - End an fscache write on a folio.
+ * @folio: The folio.
*
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
- * serialised.
+ * Call this function after the folio has been written to the local cache.
+ * This will wake any sleepers waiting on this folio.
*/
-static inline void end_page_fscache(struct page *page)
+static inline void folio_end_fscache(struct folio *folio)
{
- end_page_private_2(page);
+ folio_end_private_2(folio);
}
/**
- * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page.
+ * If this folio is currently being written to a local cache, wait for
+ * the write to finish. Another write may start after this one finishes,
+ * unless the caller holds the folio lock.
*/
-static inline void wait_on_page_fscache(struct page *page)
+static inline void folio_wait_fscache(struct folio *folio)
{
- wait_on_page_private_2(page);
+ folio_wait_private_2(folio);
}
/**
- * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a
- * fatal signal is received by the calling task.
+ * If this folio is currently being written to a local cache, wait
+ * for the write to finish or for a fatal signal to be received.
+ * Another write may start after this one finishes, unless the caller
+ * holds the folio lock.
*
* Return:
* - 0 if successful.
* - -EINTR if a fatal signal was encountered.
*/
+static inline int folio_wait_fscache_killable(struct folio *folio)
+{
+ return folio_wait_private_2_killable(folio);
+}
+
+static inline void set_page_fscache(struct page *page)
+{
+ folio_start_fscache(page_folio(page));
+}
+
+static inline void end_page_fscache(struct page *page)
+{
+ folio_end_private_2(page_folio(page));
+}
+
+static inline void wait_on_page_fscache(struct page *page)
+{
+ folio_wait_private_2(page_folio(page));
+}
+
static inline int wait_on_page_fscache_killable(struct page *page)
{
- return wait_on_page_private_2_killable(page);
+ return folio_wait_private_2_killable(page_folio(page));
}
enum netfs_read_source {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 2a38f2b477a5..cb909edb76c4 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -7,6 +7,7 @@
#define _NVME_FC_DRIVER_H 1
#include <linux/scatterlist.h>
+#include <linux/blk-mq.h>
/*
@@ -497,6 +498,8 @@ struct nvme_fc_port_template {
int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *rport,
struct nvmefc_ls_rsp *ls_rsp);
+ void (*map_queues)(struct nvme_fc_local_port *localport,
+ struct blk_mq_queue_map *map);
u32 max_hw_queues;
u16 max_sgl_segments;
@@ -779,6 +782,10 @@ struct nvmet_fc_target_port {
* LS received.
* Entrypoint is Mandatory.
*
+ * @map_queues: This functions lets the driver expose the queue mapping
+ * to the block layer.
+ * Entrypoint is Optional.
+ *
* @fcp_op: Called to perform a data transfer or transmit a response.
* The nvmefc_tgt_fcp_req structure is the same LLDD-supplied
* exchange structure specified in the nvmet_fc_rcv_fcp_req() call
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 3ec8e50efa16..4dd7e6fe92fb 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -6,6 +6,8 @@
#ifndef _LINUX_NVME_RDMA_H
#define _LINUX_NVME_RDMA_H
+#define NVME_RDMA_MAX_QUEUE_SIZE 128
+
enum nvme_rdma_cm_fmt {
NVME_RDMA_CM_FMT_1_0 = 0x0,
};
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b7c4c4130b65..855dd9b3e84b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -27,8 +27,20 @@
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type {
- NVME_NQN_DISC = 1, /* Discovery type target subsystem */
- NVME_NQN_NVME = 2, /* NVME type target subsystem */
+ /* Referral to another discovery type target subsystem */
+ NVME_NQN_DISC = 1,
+
+ /* NVME type target subsystem */
+ NVME_NQN_NVME = 2,
+
+ /* Current discovery type target subsystem */
+ NVME_NQN_CURR = 3,
+};
+
+enum nvme_ctrl_type {
+ NVME_CTRL_IO = 1, /* I/O controller */
+ NVME_CTRL_DISC = 2, /* Discovery controller */
+ NVME_CTRL_ADMIN = 3, /* Administrative controller */
};
/* Address Family codes for Discovery Log Page entry ADRFAM field */
@@ -244,7 +256,9 @@ struct nvme_id_ctrl {
__le32 rtd3e;
__le32 oaes;
__le32 ctratt;
- __u8 rsvd100[28];
+ __u8 rsvd100[11];
+ __u8 cntrltype;
+ __u8 fguid[16];
__le16 crdt1;
__le16 crdt2;
__le16 crdt3;
@@ -312,6 +326,7 @@ struct nvme_id_ctrl {
};
enum {
+ NVME_CTRL_CMIC_MULTI_PORT = 1 << 0,
NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1,
NVME_CTRL_CMIC_ANA = 1 << 3,
NVME_CTRL_ONCS_COMPARE = 1 << 0,
@@ -1303,6 +1318,12 @@ struct nvmf_common_command {
#define MAX_DISC_LOGS 255
+/* Discovery log page entry flags (EFLAGS): */
+enum {
+ NVME_DISC_EFLAGS_EPCSD = (1 << 1),
+ NVME_DISC_EFLAGS_DUPRETINFO = (1 << 0),
+};
+
/* Discovery log page entry */
struct nvmf_disc_rsp_page_entry {
__u8 trtype;
@@ -1312,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry {
__le16 portid;
__le16 cntlid;
__le16 asqsz;
- __u8 resv8[22];
+ __le16 eflags;
+ __u8 resv10[20];
char trsvcid[NVMF_TRSVCID_SIZE];
__u8 resv64[192];
char subnqn[NVMF_NQN_FIELD_LEN];
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index cf6a65b94d40..cf48983d3c86 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -65,7 +65,6 @@ extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
extern int early_init_dt_scan_chosen_stdout(void);
extern void early_init_fdt_scan_reserved_mem(void);
extern void early_init_fdt_reserve_self(void);
-extern void __init early_init_dt_scan_chosen_arch(unsigned long node);
extern void early_init_dt_add_memory_arch(u64 base, u64 size);
extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a558d67ee86f..981341a3c3c4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -143,6 +143,8 @@ enum pageflags {
#endif
__NR_PAGEFLAGS,
+ PG_readahead = PG_reclaim,
+
/* Filesystems */
PG_checked = PG_owner_priv_1,
@@ -171,6 +173,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * Compound pages. Stored in first tail page's flags.
+ * Indicates that at least one subpage is hwpoisoned in the
+ * THP.
+ */
+ PG_has_hwpoisoned = PG_mappedtodisk,
+#endif
+
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
@@ -193,6 +204,34 @@ static inline unsigned long _compound_head(const struct page *page)
#define compound_head(page) ((typeof(page))_compound_head(page))
+/**
+ * page_folio - Converts from page to folio.
+ * @p: The page.
+ *
+ * Every page is part of a folio. This function cannot be called on a
+ * NULL pointer.
+ *
+ * Context: No reference, nor lock is required on @page. If the caller
+ * does not hold a reference, this call may race with a folio split, so
+ * it should re-check the folio still contains this page after gaining
+ * a reference on the folio.
+ * Return: The folio which contains this page.
+ */
+#define page_folio(p) (_Generic((p), \
+ const struct page *: (const struct folio *)_compound_head(p), \
+ struct page *: (struct folio *)_compound_head(p)))
+
+/**
+ * folio_page - Return a page from a folio.
+ * @folio: The folio.
+ * @n: The page number to return.
+ *
+ * @n is relative to the start of the folio. This function does not
+ * check that the page number lies within @folio; the caller is presumed
+ * to have a reference to the page.
+ */
+#define folio_page(folio, n) nth_page(&(folio)->page, n)
+
static __always_inline int PageTail(struct page *page)
{
return READ_ONCE(page->compound_head) & 1;
@@ -217,6 +256,15 @@ static inline void page_init_poison(struct page *page, size_t size)
}
#endif
+static unsigned long *folio_flags(struct folio *folio, unsigned n)
+{
+ struct page *page = &folio->page;
+
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+ return &page[n].flags;
+}
+
/*
* Page flags policies wrt compound pages
*
@@ -261,36 +309,64 @@ static inline void page_init_poison(struct page *page, size_t size)
VM_BUG_ON_PGFLAGS(!PageHead(page), page); \
PF_POISONED_CHECK(&page[1]); })
+/* Which page is the flag stored in */
+#define FOLIO_PF_ANY 0
+#define FOLIO_PF_HEAD 0
+#define FOLIO_PF_ONLY_HEAD 0
+#define FOLIO_PF_NO_TAIL 0
+#define FOLIO_PF_NO_COMPOUND 0
+#define FOLIO_PF_SECOND 1
+
/*
* Macros to create function definitions for page flags
*/
#define TESTPAGEFLAG(uname, lname, policy) \
+static __always_inline bool folio_test_##lname(struct folio *folio) \
+{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int Page##uname(struct page *page) \
- { return test_bit(PG_##lname, &policy(page, 0)->flags); }
+{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
#define SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_set_##lname(struct folio *folio) \
+{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void SetPage##uname(struct page *page) \
- { set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ set_bit(PG_##lname, &policy(page, 1)->flags); }
#define CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_clear_##lname(struct folio *folio) \
+{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void ClearPage##uname(struct page *page) \
- { clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define __SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_set_##lname(struct folio *folio) \
+{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __SetPage##uname(struct page *page) \
- { __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define __CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_clear_##lname(struct folio *folio) \
+{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __ClearPage##uname(struct page *page) \
- { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTSETFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_set_##lname(struct folio *folio) \
+{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestSetPage##uname(struct page *page) \
- { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTCLEARFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_clear_##lname(struct folio *folio) \
+{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestClearPage##uname(struct page *page) \
- { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
@@ -306,29 +382,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
-#define TESTPAGEFLAG_FALSE(uname) \
+#define TESTPAGEFLAG_FALSE(uname, lname) \
+static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
static inline int Page##uname(const struct page *page) { return 0; }
-#define SETPAGEFLAG_NOOP(uname) \
+#define SETPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_set_##lname(struct folio *folio) { } \
static inline void SetPage##uname(struct page *page) { }
-#define CLEARPAGEFLAG_NOOP(uname) \
+#define CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_clear_##lname(struct folio *folio) { } \
static inline void ClearPage##uname(struct page *page) { }
-#define __CLEARPAGEFLAG_NOOP(uname) \
+#define __CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void __folio_clear_##lname(struct folio *folio) { } \
static inline void __ClearPage##uname(struct page *page) { }
-#define TESTSETFLAG_FALSE(uname) \
+#define TESTSETFLAG_FALSE(uname, lname) \
+static inline bool folio_test_set_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestSetPage##uname(struct page *page) { return 0; }
-#define TESTCLEARFLAG_FALSE(uname) \
+#define TESTCLEARFLAG_FALSE(uname, lname) \
+static inline bool folio_test_clear_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestClearPage##uname(struct page *page) { return 0; }
-#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
- SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
+ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)
-#define TESTSCFLAG_FALSE(uname) \
- TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+#define TESTSCFLAG_FALSE(uname, lname) \
+ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
@@ -384,8 +468,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
-PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
- TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
+ TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)
#ifdef CONFIG_HIGHMEM
/*
@@ -394,22 +478,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
*/
#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#else
-PAGEFLAG_FALSE(HighMem)
+PAGEFLAG_FALSE(HighMem, highmem)
#endif
#ifdef CONFIG_SWAP
-static __always_inline int PageSwapCache(struct page *page)
+static __always_inline bool folio_test_swapcache(struct folio *folio)
{
-#ifdef CONFIG_THP_SWAP
- page = compound_head(page);
-#endif
- return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+ return folio_test_swapbacked(folio) &&
+ test_bit(PG_swapcache, folio_flags(folio, 0));
+}
+static __always_inline bool PageSwapCache(struct page *page)
+{
+ return folio_test_swapcache(page_folio(page));
}
+
SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(SwapCache)
+PAGEFLAG_FALSE(SwapCache, swapcache)
#endif
PAGEFLAG(Unevictable, unevictable, PF_HEAD)
@@ -421,14 +508,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
- TESTSCFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
+ TESTSCFLAG_FALSE(Mlocked, mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
-PAGEFLAG_FALSE(Uncached)
+PAGEFLAG_FALSE(Uncached, uncached)
#endif
#ifdef CONFIG_MEMORY_FAILURE
@@ -437,7 +524,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
extern bool take_page_off_buddy(struct page *page);
#else
-PAGEFLAG_FALSE(HWPoison)
+PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif
@@ -451,7 +538,7 @@ PAGEFLAG(Idle, idle, PF_ANY)
#ifdef CONFIG_KASAN_HW_TAGS
PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
#else
-PAGEFLAG_FALSE(SkipKASanPoison)
+PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
#endif
/*
@@ -489,10 +576,14 @@ static __always_inline int PageMappingFlags(struct page *page)
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline int PageAnon(struct page *page)
+static __always_inline bool folio_test_anon(struct folio *folio)
+{
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
+static __always_inline bool PageAnon(struct page *page)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+ return folio_test_anon(page_folio(page));
}
static __always_inline int __PageMovable(struct page *page)
@@ -508,30 +599,32 @@ static __always_inline int __PageMovable(struct page *page)
* is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
* anon_vma, but to that page's node of the stable tree.
*/
-static __always_inline int PageKsm(struct page *page)
+static __always_inline bool folio_test_ksm(struct folio *folio)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_KSM;
}
+
+static __always_inline bool PageKsm(struct page *page)
+{
+ return folio_test_ksm(page_folio(page));
+}
#else
-TESTPAGEFLAG_FALSE(Ksm)
+TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif
u64 stable_page_flags(struct page *page);
-static inline int PageUptodate(struct page *page)
+static inline bool folio_test_uptodate(struct folio *folio)
{
- int ret;
- page = compound_head(page);
- ret = test_bit(PG_uptodate, &(page)->flags);
+ bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
/*
- * Must ensure that the data we read out of the page is loaded
- * _after_ we've loaded page->flags to check for PageUptodate.
- * We can skip the barrier if the page is not uptodate, because
+ * Must ensure that the data we read out of the folio is loaded
+ * _after_ we've loaded folio->flags to check the uptodate bit.
+ * We can skip the barrier if the folio is not uptodate, because
* we wouldn't be reading anything from it.
*
- * See SetPageUptodate() for the other side of the story.
+ * See folio_mark_uptodate() for the other side of the story.
*/
if (ret)
smp_rmb();
@@ -539,47 +632,71 @@ static inline int PageUptodate(struct page *page)
return ret;
}
-static __always_inline void __SetPageUptodate(struct page *page)
+static inline int PageUptodate(struct page *page)
+{
+ return folio_test_uptodate(page_folio(page));
+}
+
+static __always_inline void __folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
smp_wmb();
- __set_bit(PG_uptodate, &page->flags);
+ __set_bit(PG_uptodate, folio_flags(folio, 0));
}
-static __always_inline void SetPageUptodate(struct page *page)
+static __always_inline void folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
/*
* Memory barrier must be issued before setting the PG_uptodate bit,
- * so that all previous stores issued in order to bring the page
- * uptodate are actually visible before PageUptodate becomes true.
+ * so that all previous stores issued in order to bring the folio
+ * uptodate are actually visible before folio_test_uptodate becomes true.
*/
smp_wmb();
- set_bit(PG_uptodate, &page->flags);
+ set_bit(PG_uptodate, folio_flags(folio, 0));
+}
+
+static __always_inline void __SetPageUptodate(struct page *page)
+{
+ __folio_mark_uptodate((struct folio *)page);
+}
+
+static __always_inline void SetPageUptodate(struct page *page)
+{
+ folio_mark_uptodate((struct folio *)page);
}
CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
-int test_clear_page_writeback(struct page *page);
-int __test_set_page_writeback(struct page *page, bool keep_write);
+bool __folio_start_writeback(struct folio *folio, bool keep_write);
+bool set_page_writeback(struct page *page);
-#define test_set_page_writeback(page) \
- __test_set_page_writeback(page, false)
-#define test_set_page_writeback_keepwrite(page) \
- __test_set_page_writeback(page, true)
+#define folio_start_writeback(folio) \
+ __folio_start_writeback(folio, false)
+#define folio_start_writeback_keepwrite(folio) \
+ __folio_start_writeback(folio, true)
-static inline void set_page_writeback(struct page *page)
+static inline void set_page_writeback_keepwrite(struct page *page)
{
- test_set_page_writeback(page);
+ folio_start_writeback_keepwrite(page_folio(page));
}
-static inline void set_page_writeback_keepwrite(struct page *page)
+static inline bool test_set_page_writeback(struct page *page)
{
- test_set_page_writeback_keepwrite(page);
+ return set_page_writeback(page);
}
__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
+/* Whether there are one or multiple pages in a folio */
+static inline bool folio_test_single(struct folio *folio)
+{
+ return !folio_test_head(folio);
+}
+
+static inline bool folio_test_multi(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
static __always_inline void set_compound_head(struct page *page, struct page *head)
{
WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
@@ -603,12 +720,15 @@ static inline void ClearPageCompound(struct page *page)
#ifdef CONFIG_HUGETLB_PAGE
int PageHuge(struct page *page);
int PageHeadHuge(struct page *page);
+static inline bool folio_test_hugetlb(struct folio *folio)
+{
+ return PageHeadHuge(&folio->page);
+}
#else
-TESTPAGEFLAG_FALSE(Huge)
-TESTPAGEFLAG_FALSE(HeadHuge)
+TESTPAGEFLAG_FALSE(Huge, hugetlb)
+TESTPAGEFLAG_FALSE(HeadHuge, headhuge)
#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
@@ -624,6 +744,11 @@ static inline int PageTransHuge(struct page *page)
return PageHead(page);
}
+static inline bool folio_test_transhuge(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
/*
* PageTransCompound returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
@@ -660,12 +785,26 @@ static inline int PageTransTail(struct page *page)
PAGEFLAG(DoubleMap, double_map, PF_SECOND)
TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
#else
-TESTPAGEFLAG_FALSE(TransHuge)
-TESTPAGEFLAG_FALSE(TransCompound)
-TESTPAGEFLAG_FALSE(TransCompoundMap)
-TESTPAGEFLAG_FALSE(TransTail)
-PAGEFLAG_FALSE(DoubleMap)
- TESTSCFLAG_FALSE(DoubleMap)
+TESTPAGEFLAG_FALSE(TransHuge, transhuge)
+TESTPAGEFLAG_FALSE(TransCompound, transcompound)
+TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
+TESTPAGEFLAG_FALSE(TransTail, transtail)
+PAGEFLAG_FALSE(DoubleMap, double_map)
+ TESTSCFLAG_FALSE(DoubleMap, double_map)
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+/*
+ * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
+ * compound page.
+ *
+ * This flag is set by hwpoison handler. Cleared by THP split or free page.
+ */
+PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+ TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+#else
+PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
+ TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
#endif
/*
@@ -849,6 +988,11 @@ static inline int page_has_private(struct page *page)
return !!(page->flags & PAGE_FLAGS_PRIVATE);
}
+static inline bool folio_has_private(struct folio *folio)
+{
+ return page_has_private(&folio->page);
+}
+
#undef PF_ANY
#undef PF_HEAD
#undef PF_ONLY_HEAD
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index d8a6aecf99cb..83abf95e9fa7 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -8,46 +8,16 @@
#ifdef CONFIG_PAGE_IDLE_FLAG
-#ifdef CONFIG_64BIT
-static inline bool page_is_young(struct page *page)
-{
- return PageYoung(page);
-}
-
-static inline void set_page_young(struct page *page)
-{
- SetPageYoung(page);
-}
-
-static inline bool test_and_clear_page_young(struct page *page)
-{
- return TestClearPageYoung(page);
-}
-
-static inline bool page_is_idle(struct page *page)
-{
- return PageIdle(page);
-}
-
-static inline void set_page_idle(struct page *page)
-{
- SetPageIdle(page);
-}
-
-static inline void clear_page_idle(struct page *page)
-{
- ClearPageIdle(page);
-}
-#else /* !CONFIG_64BIT */
+#ifndef CONFIG_64BIT
/*
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
extern struct page_ext_operations page_idle_ops;
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page)
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page)
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page)
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page)
return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page)
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-#endif /* CONFIG_64BIT */
+#endif /* !CONFIG_64BIT */
#else /* !CONFIG_PAGE_IDLE_FLAG */
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
return false;
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
return false;
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
return false;
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
}
#endif /* CONFIG_PAGE_IDLE_FLAG */
+static inline bool page_is_young(struct page *page)
+{
+ return folio_test_young(page_folio(page));
+}
+
+static inline void set_page_young(struct page *page)
+{
+ folio_set_young(page_folio(page));
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return folio_test_clear_young(page_folio(page));
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return folio_test_idle(page_folio(page));
+}
+
+static inline void set_page_idle(struct page *page)
+{
+ folio_set_idle(page_folio(page));
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ folio_clear_idle(page_folio(page));
+}
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 719bfe5108c5..43c638c51c1f 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order);
extern void __set_page_owner(struct page *page,
unsigned int order, gfp_t gfp_mask);
extern void __split_page_owner(struct page *page, unsigned int nr);
-extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
+extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(const struct page *page);
extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr)
if (static_branch_unlikely(&page_owner_inited))
__split_page_owner(page, nr);
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
{
if (static_branch_unlikely(&page_owner_inited))
- __copy_page_owner(oldpage, newpage);
+ __folio_copy_owner(newfolio, old);
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
{
@@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page,
unsigned int order)
{
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
{
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 7ad46f45df39..2e677e6ad09f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page)
return atomic_read(&page->_refcount);
}
+/**
+ * folio_ref_count - The reference count on this folio.
+ * @folio: The folio.
+ *
+ * The refcount is usually incremented by calls to folio_get() and
+ * decremented by calls to folio_put(). Some typical users of the
+ * folio refcount:
+ *
+ * - Each reference from a page table
+ * - The page cache
+ * - Filesystem private data
+ * - The LRU list
+ * - Pipes
+ * - Direct IO which references this page in the process address space
+ *
+ * Return: The number of references to this folio.
+ */
+static inline int folio_ref_count(const struct folio *folio)
+{
+ return page_ref_count(&folio->page);
+}
+
static inline int page_count(const struct page *page)
{
- return atomic_read(&compound_head(page)->_refcount);
+ return folio_ref_count(page_folio(page));
}
static inline void set_page_count(struct page *page, int v)
@@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v)
__page_ref_set(page, v);
}
+static inline void folio_set_count(struct folio *folio, int v)
+{
+ set_page_count(&folio->page, v);
+}
+
/*
* Setup the page count before being freed into the page allocator for
* the first time (boot or memory hotplug)
@@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr)
__page_ref_mod(page, nr);
}
+static inline void folio_ref_add(struct folio *folio, int nr)
+{
+ page_ref_add(&folio->page, nr);
+}
+
static inline void page_ref_sub(struct page *page, int nr)
{
atomic_sub(nr, &page->_refcount);
@@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr)
__page_ref_mod(page, -nr);
}
+static inline void folio_ref_sub(struct folio *folio, int nr)
+{
+ page_ref_sub(&folio->page, nr);
+}
+
static inline int page_ref_sub_return(struct page *page, int nr)
{
int ret = atomic_sub_return(nr, &page->_refcount);
@@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_return(struct folio *folio, int nr)
+{
+ return page_ref_sub_return(&folio->page, nr);
+}
+
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
@@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page)
__page_ref_mod(page, 1);
}
+static inline void folio_ref_inc(struct folio *folio)
+{
+ page_ref_inc(&folio->page);
+}
+
static inline void page_ref_dec(struct page *page)
{
atomic_dec(&page->_refcount);
@@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page)
__page_ref_mod(page, -1);
}
+static inline void folio_ref_dec(struct folio *folio)
+{
+ page_ref_dec(&folio->page);
+}
+
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
int ret = atomic_sub_and_test(nr, &page->_refcount);
@@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
+{
+ return page_ref_sub_and_test(&folio->page, nr);
+}
+
static inline int page_ref_inc_return(struct page *page)
{
int ret = atomic_inc_return(&page->_refcount);
@@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page)
return ret;
}
+static inline int folio_ref_inc_return(struct folio *folio)
+{
+ return page_ref_inc_return(&folio->page);
+}
+
static inline int page_ref_dec_and_test(struct page *page)
{
int ret = atomic_dec_and_test(&page->_refcount);
@@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page)
return ret;
}
+static inline int folio_ref_dec_and_test(struct folio *folio)
+{
+ return page_ref_dec_and_test(&folio->page);
+}
+
static inline int page_ref_dec_return(struct page *page)
{
int ret = atomic_dec_return(&page->_refcount);
@@ -161,15 +228,91 @@ static inline int page_ref_dec_return(struct page *page)
return ret;
}
-static inline int page_ref_add_unless(struct page *page, int nr, int u)
+static inline int folio_ref_dec_return(struct folio *folio)
+{
+ return page_ref_dec_return(&folio->page);
+}
+
+static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- int ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = atomic_add_unless(&page->_refcount, nr, u);
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
return ret;
}
+static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
+{
+ return page_ref_add_unless(&folio->page, nr, u);
+}
+
+/**
+ * folio_try_get - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * If you do not already have a reference to a folio, you can attempt to
+ * get one using this function. It may fail if, for example, the folio
+ * has been freed since you found a pointer to it, or it is frozen for
+ * the purposes of splitting or migration.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get(struct folio *folio)
+{
+ return folio_ref_add_unless(folio, 1, 0);
+}
+
+static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
+{
+#ifdef CONFIG_TINY_RCU
+ /*
+ * The caller guarantees the folio will not be freed from interrupt
+ * context, so (on !SMP) we only need preemption to be disabled
+ * and TINY_RCU does that for us.
+ */
+# ifdef CONFIG_PREEMPT_COUNT
+ VM_BUG_ON(!in_atomic() && !irqs_disabled());
+# endif
+ VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
+ folio_ref_add(folio, count);
+#else
+ if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
+ /* Either the folio has been freed, or will be freed. */
+ return false;
+ }
+#endif
+ return true;
+}
+
+/**
+ * folio_try_get_rcu - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * This is a version of folio_try_get() optimised for non-SMP kernels.
+ * If you are still holding the rcu_read_lock() after looking up the
+ * page and know that the page cannot have its refcount decreased to
+ * zero in interrupt context, you can use this instead of folio_try_get().
+ *
+ * Example users include get_user_pages_fast() (as pages are not unmapped
+ * from interrupt context) and the page cache lookups (as pages are not
+ * truncated from interrupt context). We also know that pages are not
+ * frozen in interrupt context for the purposes of splitting or migration.
+ *
+ * You can also use this function if you're holding a lock that prevents
+ * pages being frozen & removed; eg the i_pages lock for the page cache
+ * or the mmap_sem or page table lock for page tables. In this case,
+ * it will always succeed, and you could have used a plain folio_get(),
+ * but it's sometimes more convenient to have a common function called
+ * from both locked and RCU-protected contexts.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get_rcu(struct folio *folio)
+{
+ return folio_ref_try_add_rcu(folio, 1);
+}
+
static inline int page_ref_freeze(struct page *page, int count)
{
int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
@@ -179,6 +322,11 @@ static inline int page_ref_freeze(struct page *page, int count)
return ret;
}
+static inline int folio_ref_freeze(struct folio *folio, int count)
+{
+ return page_ref_freeze(&folio->page, count);
+}
+
static inline void page_ref_unfreeze(struct page *page, int count)
{
VM_BUG_ON_PAGE(page_count(page) != 0, page);
@@ -189,4 +337,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
__page_ref_unfreeze(page, count);
}
+static inline void folio_ref_unfreeze(struct folio *folio, int count)
+{
+ page_ref_unfreeze(&folio->page, count);
+}
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..013cdc90f5fd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -162,149 +162,119 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
void release_pages(struct page **pages, int nr);
-/*
- * For file cache pages, return the address_space, otherwise return NULL
+struct address_space *page_mapping(struct page *);
+struct address_space *folio_mapping(struct folio *);
+struct address_space *swapcache_mapping(struct folio *);
+
+/**
+ * folio_file_mapping - Find the mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to. Folios in the swap cache return the mapping of the
+ * swap file or swap device where the data is stored. This is different
+ * from the mapping returned by folio_mapping(). The only reason to
+ * use it is if, like NFS, you return 0 from ->activate_swapfile.
+ *
+ * Do not call this for folios which aren't in the page cache or swap cache.
*/
-static inline struct address_space *page_mapping_file(struct page *page)
+static inline struct address_space *folio_file_mapping(struct folio *folio)
{
- if (unlikely(PageSwapCache(page)))
- return NULL;
- return page_mapping(page);
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_mapping(folio);
+
+ return folio->mapping;
+}
+
+static inline struct address_space *page_file_mapping(struct page *page)
+{
+ return folio_file_mapping(page_folio(page));
}
/*
- * speculatively take a reference to a page.
- * If the page is free (_refcount == 0), then _refcount is untouched, and 0
- * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
- *
- * This function must be called inside the same rcu_read_lock() section as has
- * been used to lookup the page in the pagecache radix-tree (or page table):
- * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
- *
- * Unless an RCU grace period has passed, the count of all pages coming out
- * of the allocator must be considered unstable. page_count may return higher
- * than expected, and put_page must be able to do the right thing when the
- * page has been finished with, no matter what it is subsequently allocated
- * for (because put_page is what is used here to drop an invalid speculative
- * reference).
- *
- * This is the interesting part of the lockless pagecache (and lockless
- * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
- * has the following pattern:
- * 1. find page in radix tree
- * 2. conditionally increment refcount
- * 3. check the page is still in pagecache (if no, goto 1)
- *
- * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with the i_pages lock held):
- * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
- * B. remove page from pagecache
- * C. free the page
- *
- * There are 2 critical interleavings that matter:
- * - 2 runs before A: in this case, A sees elevated refcount and bails out
- * - A runs before 2: in this case, 2 sees zero refcount and retries;
- * subsequently, B will complete and 1 will find no page, causing the
- * lookup to return NULL.
- *
- * It is possible that between 1 and 2, the page is removed then the exact same
- * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using a lock could equally have run before or after
- * such a re-insertion, depending on order that locks are granted.
- *
- * Lookups racing against pagecache insertion isn't a big problem: either 1
- * will find the page or it will not. Likewise, the old find_get_page could run
- * either before the insertion or afterwards, depending on timing.
+ * For file cache pages, return the address_space, otherwise return NULL
*/
-static inline int __page_cache_add_speculative(struct page *page, int count)
+static inline struct address_space *page_mapping_file(struct page *page)
{
-#ifdef CONFIG_TINY_RCU
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- /*
- * Preempt must be disabled here - we rely on rcu_read_lock doing
- * this for us.
- *
- * Pagecache won't be truncated from interrupt context, so if we have
- * found a page in the radix tree here, we have pinned its refcount by
- * disabling preempt, and hence no need for the "speculative get" that
- * SMP requires.
- */
- VM_BUG_ON_PAGE(page_count(page) == 0, page);
- page_ref_add(page, count);
+ struct folio *folio = page_folio(page);
-#else
- if (unlikely(!page_ref_add_unless(page, count, 0))) {
- /*
- * Either the page has been freed, or will be freed.
- * In either case, retry here and the caller should
- * do the right thing (see comments above).
- */
- return 0;
- }
-#endif
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- return 1;
+ if (unlikely(folio_test_swapcache(folio)))
+ return NULL;
+ return folio_mapping(folio);
}
-static inline int page_cache_get_speculative(struct page *page)
+static inline bool page_cache_add_speculative(struct page *page, int count)
{
- return __page_cache_add_speculative(page, 1);
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ return folio_ref_try_add_rcu((struct folio *)page, count);
}
-static inline int page_cache_add_speculative(struct page *page, int count)
+static inline bool page_cache_get_speculative(struct page *page)
{
- return __page_cache_add_speculative(page, count);
+ return page_cache_add_speculative(page, 1);
}
/**
- * attach_page_private - Attach private data to a page.
- * @page: Page to attach data to.
- * @data: Data to attach to page.
+ * folio_attach_private - Attach private data to a folio.
+ * @folio: Folio to attach data to.
+ * @data: Data to attach to folio.
*
- * Attaching private data to a page increments the page's reference count.
- * The data must be detached before the page will be freed.
+ * Attaching private data to a folio increments the page's reference count.
+ * The data must be detached before the folio will be freed.
*/
-static inline void attach_page_private(struct page *page, void *data)
+static inline void folio_attach_private(struct folio *folio, void *data)
{
- get_page(page);
- set_page_private(page, (unsigned long)data);
- SetPagePrivate(page);
+ folio_get(folio);
+ folio->private = data;
+ folio_set_private(folio);
}
/**
- * detach_page_private - Detach private data from a page.
- * @page: Page to detach data from.
+ * folio_detach_private - Detach private data from a folio.
+ * @folio: Folio to detach data from.
*
- * Removes the data that was previously attached to the page and decrements
+ * Removes the data that was previously attached to the folio and decrements
* the refcount on the page.
*
- * Return: Data that was attached to the page.
+ * Return: Data that was attached to the folio.
*/
-static inline void *detach_page_private(struct page *page)
+static inline void *folio_detach_private(struct folio *folio)
{
- void *data = (void *)page_private(page);
+ void *data = folio_get_private(folio);
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
return NULL;
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
+ folio_clear_private(folio);
+ folio->private = NULL;
+ folio_put(folio);
return data;
}
+static inline void attach_page_private(struct page *page, void *data)
+{
+ folio_attach_private(page_folio(page), data);
+}
+
+static inline void *detach_page_private(struct page *page)
+{
+ return folio_detach_private(page_folio(page));
+}
+
#ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
- return alloc_pages(gfp, 0);
+ return folio_alloc(gfp, order);
}
#endif
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+ return &filemap_alloc_folio(gfp, 0)->page;
+}
+
static inline struct page *page_cache_alloc(struct address_space *x)
{
return __page_cache_alloc(mapping_gfp_mask(x));
@@ -331,9 +301,28 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_FOR_MMAP 0x00000040
#define FGP_HEAD 0x00000080
#define FGP_ENTRY 0x00000100
+#define FGP_STABLE 0x00000200
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
- int fgp_flags, gfp_t cache_gfp_mask);
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+
+/**
+ * filemap_get_folio - Find and get a folio.
+ * @mapping: The address_space to search.
+ * @index: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @index. If a folio is
+ * present, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct folio *filemap_get_folio(struct address_space *mapping,
+ pgoff_t index)
+{
+ return __filemap_get_folio(mapping, index, 0, 0);
+}
/**
* find_get_page - find and get a page reference
@@ -377,25 +366,6 @@ static inline struct page *find_lock_page(struct address_space *mapping,
}
/**
- * find_lock_head - Locate, pin and lock a pagecache page.
- * @mapping: The address_space to search.
- * @index: The page index.
- *
- * Looks up the page cache entry at @mapping & @index. If there is a
- * page cache page, its head page is returned locked and with an increased
- * refcount.
- *
- * Context: May sleep.
- * Return: A struct page which is !PageTail, or %NULL if there is no page
- * in the cache for this index.
- */
-static inline struct page *find_lock_head(struct address_space *mapping,
- pgoff_t index)
-{
- return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
-}
-
-/**
* find_or_create_page - locate or add a pagecache page
* @mapping: the page's address_space
* @index: the page's index into the mapping
@@ -452,6 +422,73 @@ static inline bool thp_contains(struct page *head, pgoff_t index)
return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
}
+#define swapcache_index(folio) __page_file_index(&(folio)->page)
+
+/**
+ * folio_index - File index of a folio.
+ * @folio: The folio.
+ *
+ * For a folio which is either in the page cache or the swap cache,
+ * return its index within the address_space it belongs to. If you know
+ * the page is definitely in the page cache, you can look at the folio's
+ * index directly.
+ *
+ * Return: The index (offset in units of pages) of a folio in its file.
+ */
+static inline pgoff_t folio_index(struct folio *folio)
+{
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_index(folio);
+ return folio->index;
+}
+
+/**
+ * folio_next_index - Get the index of the next folio.
+ * @folio: The current folio.
+ *
+ * Return: The index of the folio which follows this folio in the file.
+ */
+static inline pgoff_t folio_next_index(struct folio *folio)
+{
+ return folio->index + folio_nr_pages(folio);
+}
+
+/**
+ * folio_file_page - The page for a particular index.
+ * @folio: The folio which contains this index.
+ * @index: The index we want to look up.
+ *
+ * Sometimes after looking up a folio in the page cache, we need to
+ * obtain the specific page for an index (eg a page fault).
+ *
+ * Return: The page containing the file data for this index.
+ */
+static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return &folio->page;
+ return folio_page(folio, index & (folio_nr_pages(folio) - 1));
+}
+
+/**
+ * folio_contains - Does this folio contain this index?
+ * @folio: The folio.
+ * @index: The page index within the file.
+ *
+ * Context: The caller should have the page locked in order to prevent
+ * (eg) shmem from moving the page between the page cache and swap cache
+ * and changing its index in the middle of the operation.
+ * Return: true or false.
+ */
+static inline bool folio_contains(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return folio->index == index;
+ return index - folio_index(folio) < folio_nr_pages(folio);
+}
+
/*
* Given the page we found in the page cache, return the page corresponding
* to this index in the file
@@ -560,6 +597,27 @@ static inline loff_t page_file_offset(struct page *page)
return ((loff_t)page_index(page)) << PAGE_SHIFT;
}
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ */
+static inline loff_t folio_pos(struct folio *folio)
+{
+ return page_offset(&folio->page);
+}
+
+/**
+ * folio_file_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ *
+ * This differs from folio_pos() for folios which belong to a swap file.
+ * NFS is the only filesystem today which needs to use folio_file_pos().
+ */
+static inline loff_t folio_file_pos(struct folio *folio)
+{
+ return page_file_offset(&folio->page);
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
@@ -575,13 +633,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
struct wait_page_key {
- struct page *page;
+ struct folio *folio;
int bit_nr;
int page_match;
};
struct wait_page_queue {
- struct page *page;
+ struct folio *folio;
int bit_nr;
wait_queue_entry_t wait;
};
@@ -589,7 +647,7 @@ struct wait_page_queue {
static inline bool wake_page_match(struct wait_page_queue *wait_page,
struct wait_page_key *key)
{
- if (wait_page->page != key->page)
+ if (wait_page->folio != key->folio)
return false;
key->page_match = 1;
@@ -599,20 +657,31 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
return true;
}
-extern void __lock_page(struct page *page);
-extern int __lock_page_killable(struct page *page);
-extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
-extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+void __folio_lock(struct folio *folio);
+int __folio_lock_killable(struct folio *folio);
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
unsigned int flags);
-extern void unlock_page(struct page *page);
+void unlock_page(struct page *page);
+void folio_unlock(struct folio *folio);
+
+static inline bool folio_trylock(struct folio *folio)
+{
+ return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+}
/*
* Return true if the page was successfully locked
*/
static inline int trylock_page(struct page *page)
{
- page = compound_head(page);
- return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
+ return folio_trylock(page_folio(page));
+}
+
+static inline void folio_lock(struct folio *folio)
+{
+ might_sleep();
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
/*
@@ -620,38 +689,30 @@ static inline int trylock_page(struct page *page)
*/
static inline void lock_page(struct page *page)
{
+ struct folio *folio;
might_sleep();
- if (!trylock_page(page))
- __lock_page(page);
+
+ folio = page_folio(page);
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
-/*
- * lock_page_killable is like lock_page but can be interrupted by fatal
- * signals. It returns 0 if it locked the page and -EINTR if it was
- * killed while waiting.
- */
-static inline int lock_page_killable(struct page *page)
+static inline int folio_lock_killable(struct folio *folio)
{
might_sleep();
- if (!trylock_page(page))
- return __lock_page_killable(page);
+ if (!folio_trylock(folio))
+ return __folio_lock_killable(folio);
return 0;
}
/*
- * lock_page_async - Lock the page, unless this would block. If the page
- * is already locked, then queue a callback when the page becomes unlocked.
- * This callback can then retry the operation.
- *
- * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
- * was already locked and the callback defined in 'wait' was queued.
+ * lock_page_killable is like lock_page but can be interrupted by fatal
+ * signals. It returns 0 if it locked the page and -EINTR if it was
+ * killed while waiting.
*/
-static inline int lock_page_async(struct page *page,
- struct wait_page_queue *wait)
+static inline int lock_page_killable(struct page *page)
{
- if (!trylock_page(page))
- return __lock_page_async(page, wait);
- return 0;
+ return folio_lock_killable(page_folio(page));
}
/*
@@ -659,78 +720,108 @@ static inline int lock_page_async(struct page *page,
* caller indicated that it can handle a retry.
*
* Return value and mmap_lock implications depend on flags; see
- * __lock_page_or_retry().
+ * __folio_lock_or_retry().
*/
-static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
+static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
+ struct folio *folio;
might_sleep();
- return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
+
+ folio = page_folio(page);
+ return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
}
/*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
* and should not be used directly.
*/
-extern void wait_on_page_bit(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+void folio_wait_bit(struct folio *folio, int bit_nr);
+int folio_wait_bit_killable(struct folio *folio, int bit_nr);
/*
- * Wait for a page to be unlocked.
+ * Wait for a folio to be unlocked.
*
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
+ * This must be called with the caller "holding" the folio,
+ * ie with increased "page->count" so that the folio won't
* go away during the wait..
*/
+static inline void folio_wait_locked(struct folio *folio)
+{
+ if (folio_test_locked(folio))
+ folio_wait_bit(folio, PG_locked);
+}
+
+static inline int folio_wait_locked_killable(struct folio *folio)
+{
+ if (!folio_test_locked(folio))
+ return 0;
+ return folio_wait_bit_killable(folio, PG_locked);
+}
+
static inline void wait_on_page_locked(struct page *page)
{
- if (PageLocked(page))
- wait_on_page_bit(compound_head(page), PG_locked);
+ folio_wait_locked(page_folio(page));
}
static inline int wait_on_page_locked_killable(struct page *page)
{
- if (!PageLocked(page))
- return 0;
- return wait_on_page_bit_killable(compound_head(page), PG_locked);
+ return folio_wait_locked_killable(page_folio(page));
}
int put_and_wait_on_page_locked(struct page *page, int state);
void wait_on_page_writeback(struct page *page);
-int wait_on_page_writeback_killable(struct page *page);
-extern void end_page_writeback(struct page *page);
+void folio_wait_writeback(struct folio *folio);
+int folio_wait_writeback_killable(struct folio *folio);
+void end_page_writeback(struct page *page);
+void folio_end_writeback(struct folio *folio);
void wait_for_stable_page(struct page *page);
+void folio_wait_stable(struct folio *folio);
+void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
+static inline void __set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ __folio_mark_dirty(page_folio(page), mapping, warn);
+}
+void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
+ struct bdi_writeback *wb);
+static inline void account_page_cleaned(struct page *page,
+ struct address_space *mapping, struct bdi_writeback *wb)
+{
+ return folio_account_cleaned(page_folio(page), mapping, wb);
+}
+void __folio_cancel_dirty(struct folio *folio);
+static inline void folio_cancel_dirty(struct folio *folio)
+{
+ /* Avoid atomic ops, locking, etc. when not actually needed. */
+ if (folio_test_dirty(folio))
+ __folio_cancel_dirty(folio);
+}
+static inline void cancel_dirty_page(struct page *page)
+{
+ folio_cancel_dirty(page_folio(page));
+}
+bool folio_clear_dirty_for_io(struct folio *folio);
+bool clear_page_dirty_for_io(struct page *page);
+int __must_check folio_write_one(struct folio *folio);
+static inline int __must_check write_one_page(struct page *page)
+{
+ return folio_write_one(page_folio(page));
+}
-void __set_page_dirty(struct page *, struct address_space *, int warn);
int __set_page_dirty_nobuffers(struct page *page);
int __set_page_dirty_no_writeback(struct page *page);
void page_endio(struct page *page, bool is_write, int err);
-/**
- * set_page_private_2 - Set PG_private_2 on a page and take a ref
- * @page: The page.
- *
- * Set the PG_private_2 flag on a page and take the reference needed for the VM
- * to handle its lifetime correctly. This sets the flag and takes the
- * reference unconditionally, so care must be taken not to set the flag again
- * if it's already set.
- */
-static inline void set_page_private_2(struct page *page)
-{
- page = compound_head(page);
- get_page(page);
- SetPagePrivate2(page);
-}
-
-void end_page_private_2(struct page *page);
-void wait_on_page_private_2(struct page *page);
-int wait_on_page_private_2_killable(struct page *page);
+void folio_end_private_2(struct folio *folio);
+void folio_wait_private_2(struct folio *folio);
+int folio_wait_private_2_killable(struct folio *folio);
/*
* Add an arbitrary waiter to a page's wait queue
*/
-extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
/*
* Fault everything in given userspace address range in.
@@ -790,9 +881,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
}
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page, void *shadow);
void replace_page_cache_page(struct page *old, struct page *new);
@@ -817,6 +910,10 @@ static inline int add_to_page_cache(struct page *page,
return error;
}
+/* Must be non-static for BPF error injection */
+int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp, void **shadowp);
+
/**
* struct readahead_control - Describes a readahead request.
*
@@ -906,33 +1003,57 @@ void page_cache_async_readahead(struct address_space *mapping,
page_cache_async_ra(&ractl, page, req_count);
}
+static inline struct folio *__readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio;
+
+ BUG_ON(ractl->_batch_count > ractl->_nr_pages);
+ ractl->_nr_pages -= ractl->_batch_count;
+ ractl->_index += ractl->_batch_count;
+
+ if (!ractl->_nr_pages) {
+ ractl->_batch_count = 0;
+ return NULL;
+ }
+
+ folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ ractl->_batch_count = folio_nr_pages(folio);
+
+ return folio;
+}
+
/**
* readahead_page - Get the next page to read.
- * @rac: The current readahead request.
+ * @ractl: The current readahead request.
*
* Context: The page is locked and has an elevated refcount. The caller
* should decreases the refcount once the page has been submitted for I/O
* and unlock the page once all I/O to that page has completed.
* Return: A pointer to the next page, or %NULL if we are done.
*/
-static inline struct page *readahead_page(struct readahead_control *rac)
+static inline struct page *readahead_page(struct readahead_control *ractl)
{
- struct page *page;
-
- BUG_ON(rac->_batch_count > rac->_nr_pages);
- rac->_nr_pages -= rac->_batch_count;
- rac->_index += rac->_batch_count;
+ struct folio *folio = __readahead_folio(ractl);
- if (!rac->_nr_pages) {
- rac->_batch_count = 0;
- return NULL;
- }
+ return &folio->page;
+}
- page = xa_load(&rac->mapping->i_pages, rac->_index);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- rac->_batch_count = thp_nr_pages(page);
+/**
+ * readahead_folio - Get the next folio to read.
+ * @ractl: The current readahead request.
+ *
+ * Context: The folio is locked. The caller should unlock the folio once
+ * all I/O to that folio has completed.
+ * Return: A pointer to the next folio, or %NULL if we are done.
+ */
+static inline struct folio *readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio = __readahead_folio(ractl);
- return page;
+ if (folio)
+ folio_put(folio);
+ return folio;
}
static inline unsigned int __readahead_batch(struct readahead_control *rac,
@@ -1040,6 +1161,34 @@ static inline unsigned long dir_pages(struct inode *inode)
}
/**
+ * folio_mkwrite_check_truncate - check if folio was truncated
+ * @folio: the folio to check
+ * @inode: the inode to check the folio against
+ *
+ * Return: the number of bytes in the folio up to EOF,
+ * or -EFAULT if the folio was truncated.
+ */
+static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
+ struct inode *inode)
+{
+ loff_t size = i_size_read(inode);
+ pgoff_t index = size >> PAGE_SHIFT;
+ size_t offset = offset_in_folio(folio, size);
+
+ if (!folio->mapping)
+ return -EFAULT;
+
+ /* folio is wholly inside EOF */
+ if (folio_next_index(folio) - 1 < index)
+ return folio_size(folio);
+ /* folio is wholly past EOF */
+ if (folio->index > index || !offset)
+ return -EFAULT;
+ /* folio is partially inside EOF */
+ return offset;
+}
+
+/**
* page_mkwrite_check_truncate - check if page was truncated
* @page: the page to check
* @inode: the inode to check the page against
@@ -1068,19 +1217,25 @@ static inline int page_mkwrite_check_truncate(struct page *page,
}
/**
- * i_blocks_per_page - How many blocks fit in this page.
+ * i_blocks_per_folio - How many blocks fit in this folio.
* @inode: The inode which contains the blocks.
- * @page: The page (head page if the page is a THP).
+ * @folio: The folio.
*
- * If the block size is larger than the size of this page, return zero.
+ * If the block size is larger than the size of this folio, return zero.
*
- * Context: The caller should hold a refcount on the page to prevent it
+ * Context: The caller should hold a refcount on the folio to prevent it
* from being split.
- * Return: The number of filesystem blocks covered by this page.
+ * Return: The number of filesystem blocks covered by this folio.
*/
static inline
+unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+{
+ return folio_size(folio) >> inode->i_blkbits;
+}
+
+static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
- return thp_size(page) >> inode->i_blkbits;
+ return i_blocks_per_folio(inode, page_folio(page));
}
#endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index d2558121d48c..6f7949b2fd8d 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -3,6 +3,7 @@
#define _LINUX_PART_STAT_H
#include <linux/genhd.h>
+#include <asm/local.h>
struct disk_stats {
u64 nsecs[NR_STAT_GROUPS];
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index ae16a9856305..b31d3f3312ce 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -267,6 +267,28 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
}
/**
+ * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the
+ * caller is responsible for taking RCU.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref)
+{
+ unsigned long __percpu *percpu_count;
+ bool ret = false;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (likely(__ref_is_percpu(ref, &percpu_count))) {
+ this_cpu_inc(*percpu_count);
+ ret = true;
+ } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
+ ret = atomic_long_inc_not_zero(&ref->data->count);
+ }
+ return ret;
+}
+
+/**
* percpu_ref_tryget_live - try to increment a live percpu refcount
* @ref: percpu_ref to try-get
*
@@ -283,20 +305,11 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
*/
static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
{
- unsigned long __percpu *percpu_count;
bool ret = false;
rcu_read_lock();
-
- if (__ref_is_percpu(ref, &percpu_count)) {
- this_cpu_inc(*percpu_count);
- ret = true;
- } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
- ret = atomic_long_inc_not_zero(&ref->data->count);
- }
-
+ ret = percpu_ref_tryget_live_rcu(ref);
rcu_read_unlock();
-
return ret;
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9b60bb89d86a..22212f2a8219 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1400,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event)
}
extern void perf_event_addr_filters_sync(struct perf_event *event);
+extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_sample_data *data,
diff --git a/include/linux/platform_data/usb-omap1.h b/include/linux/platform_data/usb-omap1.h
index 43b5ce139c37..878e572a78bf 100644
--- a/include/linux/platform_data/usb-omap1.h
+++ b/include/linux/platform_data/usb-omap1.h
@@ -48,6 +48,8 @@ struct omap_usb_config {
u32 (*usb2_init)(unsigned nwires, unsigned alt_pingroup);
int (*ocpi_enable)(void);
+
+ void (*lb_reset)(void);
};
#endif /* __LINUX_USB_OMAP1_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..031898b38d06 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -122,9 +122,10 @@
* The preempt_count offset after spin_lock()
*/
#if !defined(CONFIG_PREEMPT_RT)
-#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
+#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
#else
-#define PREEMPT_LOCK_OFFSET 0
+/* Locks on RT do not disable preemption */
+#define PREEMPT_LOCK_OFFSET 0
#endif
/*
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index c0475d1c9885..81cad9e1e412 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher {
#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE)
#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC)
-#if IS_ENABLED(CONFIG_QCOM_SCM)
extern bool qcom_scm_is_available(void);
extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
@@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
extern int qcom_scm_lmh_profile_change(u32 profile_id);
extern bool qcom_scm_lmh_dcvsh_available(void);
-#else
-
-#include <linux/errno.h>
-
-static inline bool qcom_scm_is_available(void) { return false; }
-
-static inline int qcom_scm_set_cold_boot_addr(void *entry,
- const cpumask_t *cpus) { return -ENODEV; }
-static inline int qcom_scm_set_warm_boot_addr(void *entry,
- const cpumask_t *cpus) { return -ENODEV; }
-static inline void qcom_scm_cpu_power_down(u32 flags) {}
-static inline u32 qcom_scm_set_remote_state(u32 state,u32 id)
- { return -ENODEV; }
-
-static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
- size_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
- phys_addr_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_auth_and_reset(u32 peripheral)
- { return -ENODEV; }
-static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
-static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; }
-
-static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
- { return -ENODEV; }
-static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val)
- { return -ENODEV; }
-
-static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; }
-static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare)
- { return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size)
- { return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
- { return -ENODEV; }
-extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
- u32 cp_nonpixel_start,
- u32 cp_nonpixel_size)
- { return -ENODEV; }
-static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
- unsigned int *src, const struct qcom_scm_vmperm *newvm,
- unsigned int dest_cnt) { return -ENODEV; }
-
-static inline bool qcom_scm_ocmem_lock_available(void) { return false; }
-static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset,
- u32 size, u32 mode) { return -ENODEV; }
-static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id,
- u32 offset, u32 size) { return -ENODEV; }
-
-static inline bool qcom_scm_ice_available(void) { return false; }
-static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; }
-static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size,
- enum qcom_scm_ice_cipher cipher,
- u32 data_unit_size) { return -ENODEV; }
-
-static inline bool qcom_scm_hdcp_available(void) { return false; }
-static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
- u32 *resp) { return -ENODEV; }
-
-static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
- { return -ENODEV; }
-
-static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
- u64 limit_node, u32 node_id, u64 version)
- { return -ENODEV; }
-
-static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
-
-static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
-#endif
#endif
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..e704b1a4c06c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
*
* returns the number of cleaned PTEs.
*/
-int page_mkclean(struct page *);
+int folio_mkclean(struct folio *);
/*
* called in munlock()/munmap() path to check for other vmas holding
@@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
{
}
-static inline int page_mkclean(struct page *page)
+static inline int folio_mkclean(struct folio *folio)
{
return 0;
}
-
-
#endif /* CONFIG_MMU */
+static inline int page_mkclean(struct page *page)
+{
+ return folio_mkclean(page_folio(page));
+}
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 7ce9a51ae5c0..2c0ad417ce3c 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -30,31 +30,16 @@ do { \
#ifdef CONFIG_DEBUG_SPINLOCK
extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
-#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
extern int do_raw_read_trylock(rwlock_t *lock);
extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
-#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
extern int do_raw_write_trylock(rwlock_t *lock);
extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
#else
-
-#ifndef arch_read_lock_flags
-# define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#endif
-
-#ifndef arch_write_lock_flags
-# define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-#endif
-
# define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_read_lock_flags(lock, flags) \
- do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock)
# define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
# define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_write_lock_flags(lock, flags) \
- do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
# define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock)
# define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
#endif
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index abfb53ab11be..f1db6f17c4fb 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -157,8 +157,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock,
- do_raw_read_lock_flags, &flags);
+ LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
return flags;
}
@@ -184,8 +183,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock,
- do_raw_write_lock_flags, &flags);
+ LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
return flags;
}
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 2713e689ad66..4a6ff274335a 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -427,6 +427,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
int __sbitmap_queue_get(struct sbitmap_queue *sbq);
/**
+ * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits
+ * @sbq: Bitmap queue to allocate from.
+ * @nr_tags: number of tags requested
+ * @offset: offset to add to returned bits
+ *
+ * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is
+ * a bit in the mask returned, and the caller must add @offset to the value to
+ * get the absolute tag value.
+ */
+unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
+ unsigned int *offset);
+
+/**
* __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
* sbitmap_queue, limiting the depth used from each word, with preemption
* already disabled.
@@ -515,6 +528,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu);
+/**
+ * sbitmap_queue_clear_batch() - Free a batch of allocated bits
+ * &struct sbitmap_queue.
+ * @sbq: Bitmap to free from.
+ * @offset: offset for each tag in array
+ * @tags: array of tags
+ * @nr_tags: number of tags in array
+ */
+void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
+ int *tags, int nr_tags);
+
static inline int sbq_index_inc(int index)
{
return (index + 1) & (SBQ_WAIT_QUEUES - 1);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a927ddec64..6f6f8f340a0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -503,6 +503,8 @@ struct sched_statistics {
u64 block_start;
u64 block_max;
+ s64 sum_block_runtime;
+
u64 exec_max;
u64 slice_max;
@@ -522,7 +524,7 @@ struct sched_statistics {
u64 nr_wakeups_passive;
u64 nr_wakeups_idle;
#endif
-};
+} ____cacheline_aligned;
struct sched_entity {
/* For load-balancing: */
@@ -538,8 +540,6 @@ struct sched_entity {
u64 nr_migrations;
- struct sched_statistics statistics;
-
#ifdef CONFIG_FAIR_GROUP_SCHED
int depth;
struct sched_entity *parent;
@@ -750,10 +750,6 @@ struct task_struct {
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
-#ifdef CONFIG_THREAD_INFO_IN_TASK
- /* Current CPU: */
- unsigned int cpu;
-#endif
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
struct task_struct *last_wakee;
@@ -775,10 +771,10 @@ struct task_struct {
int normal_prio;
unsigned int rt_priority;
- const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
struct sched_dl_entity dl;
+ const struct sched_class *sched_class;
#ifdef CONFIG_SCHED_CORE
struct rb_node core_node;
@@ -803,6 +799,8 @@ struct task_struct {
struct uclamp_se uclamp[UCLAMP_CNT];
#endif
+ struct sched_statistics stats;
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
@@ -1160,10 +1158,8 @@ struct task_struct {
/* Stacked block device info: */
struct bio_list *bio_list;
-#ifdef CONFIG_BLOCK
/* Stack plugging: */
struct blk_plug *plug;
-#endif
/* VM state: */
struct reclaim_state *reclaim_state;
@@ -1886,10 +1882,7 @@ extern struct thread_info init_thread_info;
extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_THREAD_INFO_IN_TASK
-static inline struct thread_info *task_thread_info(struct task_struct *task)
-{
- return &task->thread_info;
-}
+# define task_thread_info(task) (&(task)->thread_info)
#elif !defined(__HAVE_THREAD_FUNCTIONS)
# define task_thread_info(task) ((struct thread_info *)(task)->stack)
#endif
@@ -2039,7 +2032,7 @@ static inline int _cond_resched(void) { return 0; }
#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
#define cond_resched() ({ \
- ___might_sleep(__FILE__, __LINE__, 0); \
+ __might_resched(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@@ -2047,19 +2040,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
extern int __cond_resched_rwlock_read(rwlock_t *lock);
extern int __cond_resched_rwlock_write(rwlock_t *lock);
-#define cond_resched_lock(lock) ({ \
- ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
- __cond_resched_lock(lock); \
+#define MIGHT_RESCHED_RCU_SHIFT 8
+#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+
+#ifndef CONFIG_PREEMPT_RT
+/*
+ * Non RT kernels have an elevated preempt count due to the held lock,
+ * but are not allowed to be inside a RCU read side critical section
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
+#else
+/*
+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ * cond_resched*lock() has to take that into account because it checks for
+ * preempt_count() and rcu_preempt_depth().
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS \
+ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+#endif
+
+#define cond_resched_lock(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_lock(lock); \
})
-#define cond_resched_rwlock_read(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- __cond_resched_rwlock_read(lock); \
+#define cond_resched_rwlock_read(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_rwlock_read(lock); \
})
-#define cond_resched_rwlock_write(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- __cond_resched_rwlock_write(lock); \
+#define cond_resched_rwlock_write(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_rwlock_write(lock); \
})
static inline void cond_resched_rcu(void)
@@ -2114,11 +2126,7 @@ static __always_inline bool need_resched(void)
static inline unsigned int task_cpu(const struct task_struct *p)
{
-#ifdef CONFIG_THREAD_INFO_IN_TASK
- return READ_ONCE(p->cpu);
-#else
return READ_ONCE(task_thread_info(p)->cpu);
-#endif
}
extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
@@ -2137,6 +2145,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
extern bool sched_task_on_rq(struct task_struct *p);
+extern unsigned long get_wchan(struct task_struct *p);
/*
* In order to reduce various lock holder preemption latencies provide an
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 22873d276be6..d73d314d59c6 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -11,7 +11,11 @@ enum cpu_idle_type {
CPU_MAX_IDLE_TYPES
};
+#ifdef CONFIG_SMP
extern void wake_up_if_idle(int cpu);
+#else
+static inline void wake_up_if_idle(int cpu) { }
+#endif
/*
* Idle thread specific functions to determine the need_resched
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 5561486fddef..aca874d33fe6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
+ * by far the least expensive way to do that.
+ */
+static inline void __mmdrop_delayed(struct rcu_head *rhp)
+{
+ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+ __mmdrop(mm);
+}
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ /* Provides a full memory barrier. See mmdrop() */
+ if (atomic_dec_and_test(&mm->mm_count))
+ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ mmdrop(mm);
+}
+#endif
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..ba88a6987400 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -54,7 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_post_fork(struct task_struct *p);
+extern void sched_post_fork(struct task_struct *p,
+ struct kernel_clone_args *kargs);
extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void);
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 2413427e439c..d10150587d81 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task)
static inline unsigned long *end_of_stack(const struct task_struct *task)
{
+#ifdef CONFIG_STACK_GROWSUP
+ return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1;
+#else
return task->stack;
+#endif
}
#elif !defined(__HAVE_THREAD_FUNCTIONS)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 8f0f778b7c91..c07bfa2d80f2 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -42,6 +42,13 @@ static inline int cpu_smt_flags(void)
}
#endif
+#ifdef CONFIG_SCHED_CLUSTER
+static inline int cpu_cluster_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
#ifdef CONFIG_SCHED_MC
static inline int cpu_core_flags(void)
{
@@ -98,7 +105,7 @@ struct sched_domain {
/* idle_balance() stats */
u64 max_newidle_lb_cost;
- unsigned long next_decay_max_lb_cost;
+ unsigned long last_decay_max_lb_cost;
u64 avg_scan_cost; /* select_idle_sibling */
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 21c3771e6a56..988528b5da43 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
mapping = (struct address_space *)
((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
- if (mapping != page->mapping)
+ if (!mapping || mapping != page->mapping)
return false;
return mapping->a_ops == &secretmem_aops;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3f96a6374e4f..7d34105e20c6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -464,6 +464,12 @@ int __save_altstack(stack_t __user *, unsigned long);
unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
} while (0);
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+bool sigaltstack_size_valid(size_t ss_size);
+#else
+static inline bool sigaltstack_size_valid(size_t size) { return true; }
+#endif /* !CONFIG_DYNAMIC_SIGFRAME */
+
#ifdef CONFIG_PROC_FS
struct seq_file;
extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..1ce9a9eb223b 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 8371bca13729..6b0b686f6f90 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -531,6 +531,9 @@ struct spi_controller {
/* I/O mutex */
struct mutex io_mutex;
+ /* Used to avoid adding the same CS twice */
+ struct mutex add_lock;
+
/* lock and mutex for SPI bus locking */
spinlock_t bus_lock_spinlock;
struct mutex bus_lock_mutex;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 45310ea1b1d7..f0447062eecd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -177,7 +177,6 @@ do { \
#ifdef CONFIG_DEBUG_SPINLOCK
extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
-#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
extern int do_raw_spin_trylock(raw_spinlock_t *lock);
extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
#else
@@ -188,18 +187,6 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
mmiowb_spin_lock();
}
-#ifndef arch_spin_lock_flags
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#endif
-
-static inline void
-do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
-{
- __acquire(lock);
- arch_spin_lock_flags(&lock->raw_lock, *flags);
- mmiowb_spin_lock();
-}
-
static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
{
int ret = arch_spin_trylock(&(lock)->raw_lock);
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 6b8e1a0b137b..51fa0dab68c4 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -108,16 +108,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
local_irq_save(flags);
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- /*
- * On lockdep we dont want the hand-coded irq-enable of
- * do_raw_spin_lock_flags() code, because lockdep assumes
- * that interrupts are not re-enabled during lock-acquire:
- */
-#ifdef CONFIG_LOCKDEP
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
-#else
- do_raw_spin_lock_flags(lock, &flags);
-#endif
return flags;
}
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 0ac9112c1bbe..16521074b6f7 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -62,7 +62,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
#define arch_spin_is_locked(lock) ((void)(lock), 0)
/* for sched/core.c and kernel_lock.c: */
# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0)
-# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0)
# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0)
# define arch_spin_trylock(lock) ({ barrier(); (void)(lock); 1; })
#endif /* DEBUG_SPINLOCK */
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 998a4ba28eba..ca507bd5f808 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -20,7 +20,7 @@ enum {
#endif
/**
- * sizeof_field(TYPE, MEMBER)
+ * sizeof_field() - Report the size of a struct field in bytes
*
* @TYPE: The structure containing the field of interest
* @MEMBER: The field to return the size of
@@ -28,7 +28,7 @@ enum {
#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
/**
- * offsetofend(TYPE, MEMBER)
+ * offsetofend() - Report the offset of a struct field within the struct
*
* @TYPE: The type of the structure
* @MEMBER: The member within the structure to get the end offset of
@@ -36,4 +36,65 @@ enum {
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+/**
+ * struct_group() - Wrap a set of declarations in a mirrored struct
+ *
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members.
+ */
+#define struct_group(NAME, MEMBERS...) \
+ __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS)
+
+/**
+ * struct_group_attr() - Create a struct_group() with trailing attributes
+ *
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @ATTRS: Any struct attributes to apply
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members. Includes structure attributes argument.
+ */
+#define struct_group_attr(NAME, ATTRS, MEMBERS...) \
+ __struct_group(/* no tag */, NAME, ATTRS, MEMBERS)
+
+/**
+ * struct_group_tagged() - Create a struct_group with a reusable tag
+ *
+ * @TAG: The tag name for the named sub-struct
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members. Includes struct tag argument for the named copy,
+ * so the specified layout can be reused later.
+ */
+#define struct_group_tagged(TAG, NAME, MEMBERS...) \
+ __struct_group(TAG, NAME, /* no attrs */, MEMBERS)
+
+/**
+ * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
+ *
+ * @TYPE: The type of each flexible array element
+ * @NAME: The name of the flexible array member
+ *
+ * In order to have a flexible array member in a union or alone in a
+ * struct, it needs to be wrapped in an anonymous struct with at least 1
+ * named member, but that member can be empty.
+ */
+#define DECLARE_FLEX_ARRAY(TYPE, NAME) \
+ __DECLARE_FLEX_ARRAY(TYPE, NAME)
+
#endif
diff --git a/include/linux/string.h b/include/linux/string.h
index 5e96d656be7a..5a36608144a9 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -249,15 +249,6 @@ static inline const char *kbasename(const char *path)
return tail ? tail + 1 : path;
}
-#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
-#define __RENAME(x) __asm__(#x)
-
-void fortify_panic(const char *name) __noreturn __cold;
-void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
-void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
-void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
-void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
-
#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
#include <linux/fortify-string.h>
#endif
@@ -281,6 +272,41 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
}
/**
+ * memset_after - Set a value after a struct member to the end of a struct
+ *
+ * @obj: Address of target struct instance
+ * @v: Byte value to repeatedly write
+ * @member: after which struct member to start writing bytes
+ *
+ * This is good for clearing padding following the given member.
+ */
+#define memset_after(obj, v, member) \
+({ \
+ u8 *__ptr = (u8 *)(obj); \
+ typeof(v) __val = (v); \
+ memset(__ptr + offsetofend(typeof(*(obj)), member), __val, \
+ sizeof(*(obj)) - offsetofend(typeof(*(obj)), member)); \
+})
+
+/**
+ * memset_startat - Set a value starting at a member to the end of a struct
+ *
+ * @obj: Address of target struct instance
+ * @v: Byte value to repeatedly write
+ * @member: struct member to start writing at
+ *
+ * Note that if there is padding between the prior member and the target
+ * member, memset_after() should be used to clear the prior padding.
+ */
+#define memset_startat(obj, v, member) \
+({ \
+ u8 *__ptr = (u8 *)(obj); \
+ typeof(v) __val = (v); \
+ memset(__ptr + offsetof(typeof(*(obj)), member), __val, \
+ sizeof(*(obj)) - offsetof(typeof(*(obj)), member)); \
+})
+
+/**
* str_has_prefix - Test if a string has a given prefix
* @str: The string to test
* @prefix: The string to see if @str starts with
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba52f3a3478e..cdf0957a88a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,11 +320,17 @@ struct vma_swap_readahead {
#endif
};
+static inline swp_entry_t folio_swap_entry(struct folio *folio)
+{
+ swp_entry_t entry = { .val = page_private(&folio->page) };
+ return entry;
+}
+
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
-void workingset_refault(struct page *page, void *shadow);
-void workingset_activation(struct page *page);
+void workingset_refault(struct folio *folio, void *shadow);
+void workingset_activation(struct folio *folio);
/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
@@ -344,9 +350,11 @@ extern unsigned long nr_free_buffer_pages(void);
/* linux/mm/swap.c */
extern void lru_note_cost(struct lruvec *lruvec, bool file,
unsigned int nr_pages);
-extern void lru_note_cost_page(struct page *);
+extern void lru_note_cost_folio(struct folio *);
+extern void folio_add_lru(struct folio *);
extern void lru_cache_add(struct page *);
-extern void mark_page_accessed(struct page *);
+void mark_page_accessed(struct page *);
+void folio_mark_accessed(struct folio *);
extern atomic_t lru_disable_count;
@@ -365,7 +373,6 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
-extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 252243c7783d..528a478dbda8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -58,6 +58,7 @@ struct mq_attr;
struct compat_stat;
struct old_timeval32;
struct robust_list_head;
+struct futex_waitv;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
@@ -610,7 +611,7 @@ asmlinkage long sys_waitid(int which, pid_t pid,
asmlinkage long sys_set_tid_address(int __user *tidptr);
asmlinkage long sys_unshare(unsigned long unshare_flags);
-/* kernel/futex.c */
+/* kernel/futex/syscalls.c */
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
const struct __kernel_timespec __user *utime,
u32 __user *uaddr2, u32 val3);
@@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
size_t len);
+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
+ unsigned int nr_futexes, unsigned int flags,
+ struct __kernel_timespec __user *timeout, clockid_t clockid);
+
/* kernel/hrtimer.c */
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 96305a64a5a7..c635c2e014e3 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -3,7 +3,7 @@
#define _LINUX_T10_PI_H
#include <linux/types.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
/*
* A T10 PI-capable target device can be formatted with different
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 0999f6317978..ad0c4e041030 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -203,7 +203,7 @@ static inline void copy_overflow(int size, unsigned long count)
static __always_inline __must_check bool
check_copy_size(const void *addr, size_t bytes, bool is_source)
{
- int sz = __compiletime_object_size(addr);
+ int sz = __builtin_object_size(addr, 0);
if (unlikely(sz >= 0 && sz < bytes)) {
if (!__builtin_constant_p(bytes))
copy_overflow(sz, bytes);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7634cd737061..0b3704ad13c8 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -186,6 +186,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_die_id
#define topology_die_id(cpu) ((void)(cpu), -1)
#endif
+#ifndef topology_cluster_id
+#define topology_cluster_id(cpu) ((void)(cpu), -1)
+#endif
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
@@ -195,6 +198,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_core_cpumask
#define topology_core_cpumask(cpu) cpumask_of(cpu)
#endif
+#ifndef topology_cluster_cpumask
+#define topology_cluster_cpumask(cpu) cpumask_of(cpu)
+#endif
#ifndef topology_die_cpumask
#define topology_die_cpumask(cpu) cpumask_of(cpu)
#endif
@@ -206,6 +212,13 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
}
#endif
+#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
+static inline const struct cpumask *cpu_cluster_mask(int cpu)
+{
+ return topology_cluster_cpumask(cpu);
+}
+#endif
+
static inline const struct cpumask *cpu_cpu_mask(int cpu)
{
return cpumask_of_node(cpu_to_node(cpu));
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index aa11fe323c56..12d827734686 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -269,6 +269,7 @@ enum tpm2_cc_attrs {
#define TPM_VID_INTEL 0x8086
#define TPM_VID_WINBOND 0x1050
#define TPM_VID_STM 0x104A
+#define TPM_VID_ATML 0x1114
enum tpm_chip_flags {
TPM_CHIP_FLAG_TPM2 = BIT(1),
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c5714e65..fe95f0922526 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -16,23 +16,8 @@
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
- *
- * Now if the arch does not support a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
*/
enum {
/* Function recursion bits */
@@ -40,12 +25,14 @@ enum {
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
+ TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
+ /* Internal use recursion bits */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
+ TRACE_INTERNAL_TRANSITION_BIT,
TRACE_BRANCH_BIT,
/*
@@ -86,12 +73,6 @@ enum {
*/
TRACE_GRAPH_NOTRACE_BIT,
- /*
- * When transitioning between context, the preempt_count() may
- * not be correct. Allow for a single recursion to cover this case.
- */
- TRACE_TRANSITION_BIT,
-
/* Used to prevent recursion recording from recursing. */
TRACE_RECORD_RECURSION_BIT,
};
@@ -113,12 +94,10 @@ enum {
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
/*
* Used for setting context
@@ -132,6 +111,7 @@ enum {
TRACE_CTX_IRQ,
TRACE_CTX_SOFTIRQ,
TRACE_CTX_NORMAL,
+ TRACE_CTX_TRANSITION,
};
static __always_inline int trace_get_context_bit(void)
@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
#endif
static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
- int start, int max)
+ int start)
{
unsigned int val = READ_ONCE(current->trace_recursion);
int bit;
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit))) {
/*
* It could be that preempt_count has not been updated during
* a switch between contexts. Allow for a single recursion.
*/
- bit = TRACE_TRANSITION_BIT;
+ bit = TRACE_CTX_TRANSITION + start;
if (val & (1 << bit)) {
do_ftrace_record_recursion(ip, pip);
return -1;
}
- } else {
- /* Normal check passed, clear the transition to allow it again */
- val &= ~(1 << TRACE_TRANSITION_BIT);
}
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit + 1;
+ return bit;
}
static __always_inline void trace_clear_recursion(int bit)
{
- if (!bit)
- return;
-
barrier();
- bit--;
trace_recursion_clear(bit);
}
@@ -214,7 +183,7 @@ static __always_inline void trace_clear_recursion(int bit)
static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
unsigned long parent_ip)
{
- return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START);
}
/**
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb70cabe6e7f..33a4240e6a6f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d6a6cf53b127..bfe38869498d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone,
struct per_cpu_zonestat *pzstats) { }
#endif /* CONFIG_SMP */
+static inline void __zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ __mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void __zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void __node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void __node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
int migratetype)
{
@@ -525,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page,
#endif /* CONFIG_MEMCG */
-static inline void inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, 1);
-}
-
static inline void __inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -543,6 +609,24 @@ static inline void __dec_lruvec_page_state(struct page *page,
__mod_lruvec_page_state(page, idx, -1);
}
+static inline void __lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ __mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void __lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void __lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
+
static inline void inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -555,4 +639,21 @@ static inline void dec_lruvec_page_state(struct page *page,
mod_lruvec_page_state(page, idx, -1);
}
+static inline void lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 93dab0e9580f..2d0df57c9902 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1160,6 +1160,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
(wait)->flags = 0; \
} while (0)
-bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg);
+typedef int (*task_call_f)(struct task_struct *p, void *arg);
+extern int task_call_func(struct task_struct *p, task_call_f func, void *arg);
#endif /* _LINUX_WAIT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2ebef6b1a3d6..74d3c1efd9bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq;
* RETURNS:
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
-struct workqueue_struct *alloc_workqueue(const char *fmt,
- unsigned int flags,
- int max_active, ...);
+__printf(1, 4) struct workqueue_struct *
+alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
/**
* alloc_ordered_workqueue - allocate an ordered workqueue
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d1f65adf6a26..3bfd487d1dd2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@
#include <linux/flex_proportions.h>
#include <linux/backing-dev-defs.h>
#include <linux/blk_types.h>
-#include <linux/blk-cgroup.h>
struct bio;
@@ -109,15 +108,12 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc)
return flags;
}
-static inline struct cgroup_subsys_state *
-wbc_blkcg_css(struct writeback_control *wbc)
-{
#ifdef CONFIG_CGROUP_WRITEBACK
- if (wbc->wb)
- return wbc->wb->blkcg_css;
-#endif
- return blkcg_root_css;
-}
+#define wbc_blkcg_css(wbc) \
+ ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css)
+#else
+#define wbc_blkcg_css(wbc) (blkcg_root_css)
+#endif /* CONFIG_CGROUP_WRITEBACK */
/*
* A wb_domain represents a domain that wb's (bdi_writeback's) belong to
@@ -393,7 +389,14 @@ void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
-void account_page_redirty(struct page *page);
+bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
+void folio_account_redirty(struct folio *folio);
+static inline void account_page_redirty(struct page *page)
+{
+ folio_account_redirty(page_folio(page));
+}
+bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
+bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 29db736af86d..bb763085479a 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -28,12 +28,10 @@
#ifndef CONFIG_PREEMPT_RT
#define WW_MUTEX_BASE mutex
#define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k)
-#define ww_mutex_base_trylock(l) mutex_trylock(l)
#define ww_mutex_base_is_locked(b) mutex_is_locked((b))
#else
#define WW_MUTEX_BASE rt_mutex
#define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k)
-#define ww_mutex_base_trylock(l) rt_mutex_trylock(l)
#define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex)
#endif
@@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock,
extern void ww_mutex_unlock(struct ww_mutex *lock);
-/**
- * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context
- * @lock: mutex to lock
- *
- * Trylocks a mutex without acquire context, so no deadlock detection is
- * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise.
- */
-static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
-{
- return ww_mutex_base_trylock(&lock->base);
-}
+extern int __must_check ww_mutex_trylock(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx);
/***
* ww_mutex_destroy - mark a w/w mutex unusable
diff --git a/include/linux/xz.h b/include/linux/xz.h
index 9884c8440188..7285ca5d56e9 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -234,6 +234,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
XZ_EXTERN void xz_dec_end(struct xz_dec *s);
/*
+ * Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
+ * See xz_dec_microlzma_alloc() below for details.
+ *
+ * These functions aren't used or available in preboot code and thus aren't
+ * marked with XZ_EXTERN. This avoids warnings about static functions that
+ * are never defined.
+ */
+/**
+ * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
+ */
+struct xz_dec_microlzma;
+
+/**
+ * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
+ * @mode XZ_SINGLE or XZ_PREALLOC
+ * @dict_size LZMA dictionary size. This must be at least 4 KiB and
+ * at most 3 GiB.
+ *
+ * In contrast to xz_dec_init(), this function only allocates the memory
+ * and remembers the dictionary size. xz_dec_microlzma_reset() must be used
+ * before calling xz_dec_microlzma_run().
+ *
+ * The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE.
+ * With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated.
+ *
+ * On success, xz_dec_microlzma_alloc() returns a pointer to
+ * struct xz_dec_microlzma. If memory allocation fails or
+ * dict_size is invalid, NULL is returned.
+ *
+ * The compressed format supported by this decoder is a raw LZMA stream
+ * whose first byte (always 0x00) has been replaced with bitwise-negation
+ * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
+ * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
+ * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
+ * marker must not be used. The unused values are reserved for future use.
+ * This MicroLZMA header format was created for use in EROFS but may be used
+ * by others too.
+ */
+extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
+ uint32_t dict_size);
+
+/**
+ * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
+ * @s Decoder state allocated using xz_dec_microlzma_alloc()
+ * @comp_size Compressed size of the input stream
+ * @uncomp_size Uncompressed size of the input stream. A value smaller
+ * than the real uncompressed size of the input stream can
+ * be specified if uncomp_size_is_exact is set to false.
+ * uncomp_size can never be set to a value larger than the
+ * expected real uncompressed size because it would eventually
+ * result in XZ_DATA_ERROR.
+ * @uncomp_size_is_exact This is an int instead of bool to avoid
+ * requiring stdbool.h. This should normally be set to true.
+ * When this is set to false, error detection is weaker.
+ */
+extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
+ uint32_t comp_size, uint32_t uncomp_size,
+ int uncomp_size_is_exact);
+
+/**
+ * xz_dec_microlzma_run() - Run the MicroLZMA decoder
+ * @s Decoder state initialized using xz_dec_microlzma_reset()
+ * @b: Input and output buffers
+ *
+ * This works similarly to xz_dec_run() with a few important differences.
+ * Only the differences are documented here.
+ *
+ * The only possible return values are XZ_OK, XZ_STREAM_END, and
+ * XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress
+ * is possible due to lack of input data or output space, this function will
+ * keep returning XZ_OK. Thus, the calling code must be written so that it
+ * will eventually provide input and output space matching (or exceeding)
+ * comp_size and uncomp_size arguments given to xz_dec_microlzma_reset().
+ * If the caller cannot do this (for example, if the input file is truncated
+ * or otherwise corrupt), the caller must detect this error by itself to
+ * avoid an infinite loop.
+ *
+ * If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned.
+ * This can happen also when incorrect dictionary, uncompressed, or
+ * compressed sizes have been specified.
+ *
+ * With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over
+ * uncompressed data. This way the caller doesn't need to provide a temporary
+ * output buffer for the bytes that will be ignored.
+ *
+ * With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK
+ * is also possible and thus XZ_SINGLE is actually a limited multi-call mode.
+ * After XZ_OK the bytes decoded so far may be read from the output buffer.
+ * It is possible to continue decoding but the variables b->out and b->out_pos
+ * MUST NOT be changed by the caller. Increasing the value of b->out_size is
+ * allowed to make more output space available; one doesn't need to provide
+ * space for the whole uncompressed data on the first call. The input buffer
+ * may be changed normally like with XZ_PREALLOC. This way input data can be
+ * provided from non-contiguous memory.
+ */
+extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
+ struct xz_buf *b);
+
+/**
+ * xz_dec_microlzma_end() - Free the memory allocated for the decoder state
+ * @s: Decoder state allocated using xz_dec_microlzma_alloc().
+ * If s is NULL, this function does nothing.
+ */
+extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
+
+/*
* Standalone build (userspace build or in-kernel build for boot time use)
* needs a CRC32 implementation. For normal in-kernel use, kernel's own
* CRC32 module is used instead, and users of this module don't need to
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 62dd8422e0dc..27336fc70467 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
* netdev and may otherwise be used by driver read-only, will be update
* by cfg80211 on change_interface
* @mgmt_registrations: list of registrations for management frames
- * @mgmt_registrations_lock: lock for the list
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
* @mtx: mutex used to lock data in this struct, may be used by drivers
@@ -5423,7 +5422,6 @@ struct wireless_dev {
u32 identifier;
struct list_head mgmt_registrations;
- spinlock_t mgmt_registrations_lock;
u8 mgmt_registrations_need_update:1;
struct mutex mtx;
diff --git a/include/net/mctp.h b/include/net/mctp.h
index a824d47c3c6d..ffd2c23bd76d 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -54,7 +54,7 @@ struct mctp_sock {
struct sock sk;
/* bind() params */
- int bind_net;
+ unsigned int bind_net;
mctp_eid_t bind_addr;
__u8 bind_type;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6026bbefbffd..3214848402ec 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -69,6 +69,10 @@ struct mptcp_out_options {
struct {
u64 sndr_key;
u64 rcvr_key;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ __sum16 csum;
};
struct {
struct mptcp_addr_info addr;
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 0fd8a4159662..ceadf8ba25a4 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -17,7 +17,6 @@ struct inet_frags_ctl;
struct nft_ct_frag6_pernet {
struct ctl_table_header *nf_frag_frags_hdr;
struct fqdir *fqdir;
- unsigned int users;
};
#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 148f5d8ee5ab..a16171c5fd9e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1202,7 +1202,7 @@ struct nft_object *nft_obj_lookup(const struct net *net,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq,
- int event, int family, int report, gfp_t gfp);
+ int event, u16 flags, int family, int report, gfp_t gfp);
/**
* struct nft_object_type - stateful object type
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 986a2a9cfdfa..b593f95e9991 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -27,5 +27,11 @@ struct netns_nf {
#if IS_ENABLED(CONFIG_DECNET)
struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ unsigned int defrag_ipv4_users;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ unsigned int defrag_ipv6_users;
+#endif
};
#endif
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2eb6d7c2c931..f37c7a558d6d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -384,11 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk,
* Verification Tag value does not match the receiver's own
* tag value, the receiver shall silently discard the packet...
*/
- if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)
- return 1;
+ if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag)
+ return 0;
chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port;
- return 0;
+ return 1;
}
/* Check VTAG of the packet matches the sender's own tag and the T bit is
diff --git a/include/net/sock.h b/include/net/sock.h
index ae929e21a376..463f390d90b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -307,6 +307,7 @@ struct bpf_local_storage;
* @sk_priority: %SO_PRIORITY setting
* @sk_type: socket type (%SOCK_STREAM, etc)
* @sk_protocol: which protocol this socket belongs in this network family
+ * @sk_peer_lock: lock protecting @sk_peer_pid and @sk_peer_cred
* @sk_peer_pid: &struct pid for this socket's peer
* @sk_peer_cred: %SO_PEERCRED setting
* @sk_rcvlowat: %SO_RCVLOWAT setting
@@ -1207,7 +1208,7 @@ struct proto {
#endif
bool (*stream_memory_free)(const struct sock *sk, int wake);
- bool (*stream_memory_read)(const struct sock *sk);
+ bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
@@ -2819,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs);
int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+static inline bool sk_is_readable(struct sock *sk)
+{
+ if (sk->sk_prot->sock_is_readable)
+ return sk->sk_prot->sock_is_readable(sk);
+ return false;
+}
#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3166dc15d7d6..60c384569e9c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1576,6 +1576,7 @@ struct tcp_md5sig_key {
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
u8 prefixlen;
+ u8 flags;
union tcp_md5_addr addr;
int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
@@ -1621,10 +1622,10 @@ struct tcp_md5sig_pool {
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index);
+ int family, u8 prefixlen, int l3index, u8 flags);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
diff --git a/include/net/tls.h b/include/net/tls.h
index be4b3e1cac46..1fffb206f09f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
+void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
@@ -375,7 +376,7 @@ void tls_sw_release_resources_rx(struct sock *sk);
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
-bool tls_sw_stream_read(const struct sock *sk);
+bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
@@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
#endif
}
-static inline void tls_err_abort(struct sock *sk, int err)
-{
- sk->sk_err = err;
- sk_error_report(sk);
-}
-
static inline bool tls_bigint_increment(unsigned char *seq, int len)
{
int i;
@@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk,
struct cipher_context *ctx)
{
if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
if (prot->version != TLS_1_3_VERSION &&
prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305)
diff --git a/include/net/udp.h b/include/net/udp.h
index 360df454356c..909ecf447e0f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
* CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
* packets in udp_gro_complete_segment. As does UDP GSO, verified by
* udp_send_skb. But when those packets are looped in dev_loopback_xmit
- * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
- * specific case, where PARTIAL is both correct and required.
+ * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY.
+ * Reset in this specific case, where PARTIAL is both correct and
+ * required.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
index 4726c1bbec65..64154c1fed02 100644
--- a/include/scsi/sas.h
+++ b/include/scsi/sas.h
@@ -323,8 +323,10 @@ struct ssp_response_iu {
__be32 sense_data_len;
__be32 response_data_len;
- u8 resp_data[0];
- u8 sense_data[];
+ union {
+ DECLARE_FLEX_ARRAY(u8, resp_data);
+ DECLARE_FLEX_ARRAY(u8, sense_data);
+ };
} __attribute__ ((packed));
struct ssp_command_iu {
@@ -554,8 +556,10 @@ struct ssp_response_iu {
__be32 sense_data_len;
__be32 response_data_len;
- u8 resp_data[0];
- u8 sense_data[];
+ union {
+ DECLARE_FLEX_ARRAY(u8, resp_data);
+ DECLARE_FLEX_ARRAY(u8, sense_data);
+ };
} __attribute__ ((packed));
struct ssp_command_iu {
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index eaf04c9a1dfc..31078063afac 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -396,4 +396,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc,
u8 key, u8 asc, u8 ascq);
+struct request *scsi_alloc_request(struct request_queue *q,
+ unsigned int op, blk_mq_req_flags_t flags);
+
#endif /* _SCSI_SCSI_CMND_H */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index b97e142a7ca9..430b73bd02ac 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -5,7 +5,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <scsi/scsi.h>
#include <linux/atomic.h>
#include <linux/sbitmap.h>
diff --git a/include/soc/arc/timers.h b/include/soc/arc/timers.h
index 7ecde3b159c8..ae99d3e855f1 100644
--- a/include/soc/arc/timers.h
+++ b/include/soc/arc/timers.h
@@ -17,8 +17,8 @@
#define ARC_REG_TIMER1_CNT 0x100 /* timer 1 count */
/* CTRL reg bits */
-#define TIMER_CTRL_IE (1 << 0) /* Interrupt when Count reaches limit */
-#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */
+#define ARC_TIMER_CTRL_IE (1 << 0) /* Interrupt when Count reaches limit */
+#define ARC_TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */
#define ARC_TIMERN_MAX 0xFFFFFFFF
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 06706a9fd5b1..d7055b41982d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -89,15 +89,6 @@
/* Source PGIDs, one per physical port */
#define PGID_SRC 80
-#define IFH_TAG_TYPE_C 0
-#define IFH_TAG_TYPE_S 1
-
-#define IFH_REW_OP_NOOP 0x0
-#define IFH_REW_OP_DSCP 0x1
-#define IFH_REW_OP_ONE_STEP_PTP 0x2
-#define IFH_REW_OP_TWO_STEP_PTP 0x3
-#define IFH_REW_OP_ORIGIN_PTP 0x5
-
#define OCELOT_NUM_TC 8
#define OCELOT_SPEED_2500 0
@@ -603,10 +594,10 @@ struct ocelot_port {
/* The VLAN ID that will be transmitted as untagged, on egress */
struct ocelot_vlan native_vlan;
+ unsigned int ptp_skbs_in_flight;
u8 ptp_cmd;
struct sk_buff_head tx_skbs;
u8 ts_id;
- spinlock_t ts_id_lock;
phy_interface_t phy_mode;
@@ -680,6 +671,9 @@ struct ocelot {
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_info;
struct hwtstamp_config hwtstamp_config;
+ unsigned int ptp_skbs_in_flight;
+ /* Protects the 2-step TX timestamp ID logic */
+ spinlock_t ts_id_lock;
/* Protects the PTP interface state */
struct mutex ptp_lock;
/* Protects the PTP clock */
@@ -692,15 +686,6 @@ struct ocelot_policer {
u32 burst; /* bytes */
};
-struct ocelot_skb_cb {
- struct sk_buff *clone;
- u8 ptp_cmd;
- u8 ts_id;
-};
-
-#define OCELOT_SKB_CB(skb) \
- ((struct ocelot_skb_cb *)((skb)->cb))
-
#define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
#define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
#define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
@@ -752,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
u32 val, u32 reg, u32 offset);
-#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB)
-
/* Packet I/O */
bool ocelot_can_inject(struct ocelot *ocelot, int grp);
void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
@@ -761,36 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
-u32 ocelot_ptp_rew_op(struct sk_buff *skb);
-#else
-
-static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp)
-{
- return false;
-}
-
-static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port,
- int grp, u32 rew_op,
- struct sk_buff *skb)
-{
-}
-
-static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp,
- struct sk_buff **skb)
-{
- return -EIO;
-}
-
-static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
-{
-}
-
-static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
-{
- return 0;
-}
-#endif
-
/* Hardware initialization */
int ocelot_regfields_init(struct ocelot *ocelot,
const struct reg_field *const regfields);
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index ded497d72bdb..f085884b1fa2 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -13,6 +13,9 @@
#include <linux/ptp_clock_kernel.h>
#include <soc/mscc/ocelot.h>
+#define OCELOT_MAX_PTP_ID 63
+#define OCELOT_PTP_FIFO_SIZE 128
+
#define PTP_PIN_CFG_RSZ 0x20
#define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ
#define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index 25fd525aaf92..4869ebbd438d 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -694,7 +694,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
int ocelot_vcap_filter_del(struct ocelot *ocelot,
struct ocelot_vcap_filter *rule);
struct ocelot_vcap_filter *
-ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id,
- bool tc_offload);
+ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block,
+ unsigned long cookie, bool tc_offload);
#endif /* _OCELOT_VCAP_H_ */
diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h
index 01570dbda503..0e45963bb767 100644
--- a/include/sound/hda_codec.h
+++ b/include/sound/hda_codec.h
@@ -224,6 +224,7 @@ struct hda_codec {
#endif
/* misc flags */
+ unsigned int configured:1; /* codec was configured */
unsigned int in_freeing:1; /* being released */
unsigned int registered:1; /* codec was registered */
unsigned int display_power_control:1; /* needs display power */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index cc5ab96a7471..a95daa4d4caa 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -114,7 +114,7 @@ TRACE_EVENT(block_rq_requeue,
*/
TRACE_EVENT(block_rq_complete,
- TP_PROTO(struct request *rq, int error, unsigned int nr_bytes),
+ TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes),
TP_ARGS(rq, error, nr_bytes),
@@ -122,7 +122,7 @@ TRACE_EVENT(block_rq_complete,
__field( dev_t, dev )
__field( sector_t, sector )
__field( unsigned int, nr_sector )
- __field( int, error )
+ __field( int , error )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -131,7 +131,7 @@ TRACE_EVENT(block_rq_complete,
__entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
- __entry->error = error;
+ __entry->error = blk_status_to_errno(error);
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 695bfdbfdcad..920b6a303d60 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -178,7 +178,7 @@ TRACE_EVENT(cachefiles_unlink,
),
TP_fast_assign(
- __entry->obj = obj->fscache.debug_id;
+ __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX;
__entry->de = de;
__entry->why = why;
),
@@ -205,7 +205,7 @@ TRACE_EVENT(cachefiles_rename,
),
TP_fast_assign(
- __entry->obj = obj->fscache.debug_id;
+ __entry->obj = obj ? obj->fscache.debug_id : UINT_MAX;
__entry->de = de;
__entry->to = to;
__entry->why = why;
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index db4f2cec8360..16ae7b666810 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -24,7 +24,7 @@ struct erofs_map_blocks;
#define show_mflags(flags) __print_flags(flags, "", \
{ EROFS_MAP_MAPPED, "M" }, \
{ EROFS_MAP_META, "I" }, \
- { EROFS_MAP_ZIPPED, "Z" })
+ { EROFS_MAP_ENCODED, "E" })
TRACE_EVENT(erofs_lookup,
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 0dd30de00e5b..7346f0164cf4 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -6,6 +6,7 @@
#define _TRACE_IO_URING_H
#include <linux/tracepoint.h>
+#include <uapi/linux/io_uring.h>
struct io_wq_work;
@@ -497,6 +498,66 @@ TRACE_EVENT(io_uring_task_run,
(unsigned long long) __entry->user_data)
);
+/*
+ * io_uring_req_failed - called when an sqe is errored dring submission
+ *
+ * @sqe: pointer to the io_uring_sqe that failed
+ * @error: error it failed with
+ *
+ * Allows easier diagnosing of malformed requests in production systems.
+ */
+TRACE_EVENT(io_uring_req_failed,
+
+ TP_PROTO(const struct io_uring_sqe *sqe, int error),
+
+ TP_ARGS(sqe, error),
+
+ TP_STRUCT__entry (
+ __field( u8, opcode )
+ __field( u8, flags )
+ __field( u8, ioprio )
+ __field( u64, off )
+ __field( u64, addr )
+ __field( u32, len )
+ __field( u32, op_flags )
+ __field( u64, user_data )
+ __field( u16, buf_index )
+ __field( u16, personality )
+ __field( u32, file_index )
+ __field( u64, pad1 )
+ __field( u64, pad2 )
+ __field( int, error )
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = sqe->opcode;
+ __entry->flags = sqe->flags;
+ __entry->ioprio = sqe->ioprio;
+ __entry->off = sqe->off;
+ __entry->addr = sqe->addr;
+ __entry->len = sqe->len;
+ __entry->op_flags = sqe->rw_flags;
+ __entry->user_data = sqe->user_data;
+ __entry->buf_index = sqe->buf_index;
+ __entry->personality = sqe->personality;
+ __entry->file_index = sqe->file_index;
+ __entry->pad1 = sqe->__pad2[0];
+ __entry->pad2 = sqe->__pad2[1];
+ __entry->error = error;
+ ),
+
+ TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+ "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
+ "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+ __entry->opcode, __entry->flags, __entry->ioprio,
+ (unsigned long long)__entry->off,
+ (unsigned long long) __entry->addr, __entry->len,
+ __entry->op_flags, (unsigned long long) __entry->user_data,
+ __entry->buf_index, __entry->personality, __entry->file_index,
+ (unsigned long long) __entry->pad1,
+ (unsigned long long) __entry->pad2, __entry->error)
+);
+
#endif /* _TRACE_IO_URING_H */
/* This part must be outside protection */
diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h
index 491098a0d8ed..bf7533f171ff 100644
--- a/include/trace/events/kyber.h
+++ b/include/trace/events/kyber.h
@@ -13,11 +13,11 @@
TRACE_EVENT(kyber_latency,
- TP_PROTO(struct request_queue *q, const char *domain, const char *type,
+ TP_PROTO(dev_t dev, const char *domain, const char *type,
unsigned int percentile, unsigned int numerator,
unsigned int denominator, unsigned int samples),
- TP_ARGS(q, domain, type, percentile, numerator, denominator, samples),
+ TP_ARGS(dev, domain, type, percentile, numerator, denominator, samples),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -30,7 +30,7 @@ TRACE_EVENT(kyber_latency,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
strlcpy(__entry->type, type, sizeof(__entry->type));
__entry->percentile = percentile;
@@ -47,10 +47,9 @@ TRACE_EVENT(kyber_latency,
TRACE_EVENT(kyber_adjust,
- TP_PROTO(struct request_queue *q, const char *domain,
- unsigned int depth),
+ TP_PROTO(dev_t dev, const char *domain, unsigned int depth),
- TP_ARGS(q, domain, depth),
+ TP_ARGS(dev, domain, depth),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -59,7 +58,7 @@ TRACE_EVENT(kyber_adjust,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
__entry->depth = depth;
),
@@ -71,9 +70,9 @@ TRACE_EVENT(kyber_adjust,
TRACE_EVENT(kyber_throttled,
- TP_PROTO(struct request_queue *q, const char *domain),
+ TP_PROTO(dev_t dev, const char *domain),
- TP_ARGS(q, domain),
+ TP_ARGS(dev, domain),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -81,7 +80,7 @@ TRACE_EVENT(kyber_throttled,
),
TP_fast_assign(
- __entry->dev = disk_devt(q->disk);
+ __entry->dev = dev;
strlcpy(__entry->domain, domain, sizeof(__entry->domain));
),
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1d28431e85bd..171524d3526d 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -16,38 +16,38 @@
#define PAGEMAP_MAPPEDDISK 0x0020u
#define PAGEMAP_BUFFERS 0x0040u
-#define trace_pagemap_flags(page) ( \
- (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
- (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \
- (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \
- (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \
- (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \
- (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \
+#define trace_pagemap_flags(folio) ( \
+ (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
+ (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \
+ (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \
+ (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \
+ (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \
+ (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \
)
TRACE_EVENT(mm_lru_insertion,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
__field(enum lru_list, lru )
__field(unsigned long, flags )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
- __entry->lru = page_lru(page);
- __entry->flags = trace_pagemap_flags(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
+ __entry->lru = folio_lru_list(folio);
+ __entry->flags = trace_pagemap_flags(folio);
),
/* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
- __entry->page,
+ TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
+ __entry->folio,
__entry->pfn,
__entry->lru,
__entry->flags & PAGEMAP_MAPPED ? "M" : " ",
@@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion,
TRACE_EVENT(mm_lru_activate,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
),
- /* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn)
-
+ TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn)
);
#endif /* _TRACE_PAGEMAP_H */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 840d1ba84cf5..7dccb66474f7 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -52,11 +52,11 @@ WB_WORK_REASON
struct wb_writeback_work;
-DECLARE_EVENT_CLASS(writeback_page_template,
+DECLARE_EVENT_CLASS(writeback_folio_template,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping),
+ TP_ARGS(folio, mapping),
TP_STRUCT__entry (
__array(char, name, 32)
@@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
- __entry->index = page->index;
+ __entry->index = folio->index;
),
TP_printk("bdi %s: ino=%lu index=%lu",
@@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template,
)
);
-DEFINE_EVENT(writeback_page_template, writeback_dirty_page,
+DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
-DEFINE_EVENT(writeback_page_template, wait_on_page_writeback,
+DEFINE_EVENT(writeback_folio_template, folio_wait_writeback,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
@@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs,
TRACE_EVENT(track_foreign_dirty,
- TP_PROTO(struct page *page, struct bdi_writeback *wb),
+ TP_PROTO(struct folio *folio, struct bdi_writeback *wb),
- TP_ARGS(page, wb),
+ TP_ARGS(folio, wb),
TP_STRUCT__entry(
__array(char, name, 32)
@@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty,
),
TP_fast_assign(
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio_mapping(folio);
struct inode *inode = mapping ? mapping->host : NULL;
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
@@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty,
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
- __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup);
+ __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 9dc0bf0c5a6e..ecd0f5bdfc1d 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -181,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1c5fb86d455a..4557a8b6086f 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
#undef __NR_syscalls
-#define __NR_syscalls 449
+#define __NR_syscalls 450
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h
index 8c4337548ab5..bb31567e66c0 100644
--- a/include/uapi/drm/mga_drm.h
+++ b/include/uapi/drm/mga_drm.h
@@ -279,20 +279,22 @@ typedef struct drm_mga_init {
unsigned long sarea_priv_offset;
- int chipset;
- int sgram;
+ __struct_group(/* no tag */, always32bit, /* no attrs */,
+ int chipset;
+ int sgram;
- unsigned int maccess;
+ unsigned int maccess;
- unsigned int fb_cpp;
- unsigned int front_offset, front_pitch;
- unsigned int back_offset, back_pitch;
+ unsigned int fb_cpp;
+ unsigned int front_offset, front_pitch;
+ unsigned int back_offset, back_pitch;
- unsigned int depth_cpp;
- unsigned int depth_offset, depth_pitch;
+ unsigned int depth_cpp;
+ unsigned int depth_offset, depth_pitch;
- unsigned int texture_offset[MGA_NR_TEX_HEAPS];
- unsigned int texture_size[MGA_NR_TEX_HEAPS];
+ unsigned int texture_offset[MGA_NR_TEX_HEAPS];
+ unsigned int texture_size[MGA_NR_TEX_HEAPS];
+ );
unsigned long fb_offset;
unsigned long mmio_offset;
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
deleted file mode 100644
index cf7399f03b71..000000000000
--- a/include/uapi/linux/bcache.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
-
-/*
- * Bcache on disk data structures
- */
-
-#include <linux/types.h>
-
-#define BITMASK(name, type, field, offset, size) \
-static inline __u64 name(const type *k) \
-{ return (k->field >> offset) & ~(~0ULL << size); } \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- k->field &= ~(~(~0ULL << size) << offset); \
- k->field |= (v & ~(~0ULL << size)) << offset; \
-}
-
-/* Btree keys - all units are in sectors */
-
-struct bkey {
- __u64 high;
- __u64 low;
- __u64 ptr[];
-};
-
-#define KEY_FIELD(name, field, offset, size) \
- BITMASK(name, struct bkey, field, offset, size)
-
-#define PTR_FIELD(name, offset, size) \
-static inline __u64 name(const struct bkey *k, unsigned int i) \
-{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \
- \
-static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \
-{ \
- k->ptr[i] &= ~(~(~0ULL << size) << offset); \
- k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \
-}
-
-#define KEY_SIZE_BITS 16
-#define KEY_MAX_U64S 8
-
-KEY_FIELD(KEY_PTRS, high, 60, 3)
-KEY_FIELD(HEADER_SIZE, high, 58, 2)
-KEY_FIELD(KEY_CSUM, high, 56, 2)
-KEY_FIELD(KEY_PINNED, high, 55, 1)
-KEY_FIELD(KEY_DIRTY, high, 36, 1)
-
-KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS)
-KEY_FIELD(KEY_INODE, high, 0, 20)
-
-/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
-
-static inline __u64 KEY_OFFSET(const struct bkey *k)
-{
- return k->low;
-}
-
-static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v)
-{
- k->low = v;
-}
-
-/*
- * The high bit being set is a relic from when we used it to do binary
- * searches - it told you where a key started. It's not used anymore,
- * and can probably be safely dropped.
- */
-#define KEY(inode, offset, size) \
-((struct bkey) { \
- .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \
- .low = (offset) \
-})
-
-#define ZERO_KEY KEY(0, 0, 0)
-
-#define MAX_KEY_INODE (~(~0 << 20))
-#define MAX_KEY_OFFSET (~0ULL >> 1)
-#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0)
-
-#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k))
-#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0)
-
-#define PTR_DEV_BITS 12
-
-PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS)
-PTR_FIELD(PTR_OFFSET, 8, 43)
-PTR_FIELD(PTR_GEN, 0, 8)
-
-#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
-
-#define MAKE_PTR(gen, offset, dev) \
- ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
-
-/* Bkey utility code */
-
-static inline unsigned long bkey_u64s(const struct bkey *k)
-{
- return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k);
-}
-
-static inline unsigned long bkey_bytes(const struct bkey *k)
-{
- return bkey_u64s(k) * sizeof(__u64);
-}
-
-#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src))
-
-static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
-{
- SET_KEY_INODE(dest, KEY_INODE(src));
- SET_KEY_OFFSET(dest, KEY_OFFSET(src));
-}
-
-static inline struct bkey *bkey_next(const struct bkey *k)
-{
- __u64 *d = (void *) k;
-
- return (struct bkey *) (d + bkey_u64s(k));
-}
-
-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
-{
- __u64 *d = (void *) k;
-
- return (struct bkey *) (d + nr_keys);
-}
-/* Enough for a key with 6 pointers */
-#define BKEY_PAD 8
-
-#define BKEY_PADDED(key) \
- union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; }
-
-/* Superblock */
-
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- */
-#define BCACHE_SB_VERSION_CDEV 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5
-#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6
-#define BCACHE_SB_MAX_VERSION 6
-
-#define SB_SECTOR 8
-#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT)
-#define SB_SIZE 4096
-#define SB_LABEL_SIZE 32
-#define SB_JOURNAL_BUCKETS 256U
-/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
-#define MAX_CACHES_PER_SET 8
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
-
-struct cache_sb_disk {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __le64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __le64 flags;
- __le64 seq;
-
- __le64 feature_compat;
- __le64 feature_incompat;
- __le64 feature_ro_compat;
-
- __le64 pad[5];
-
- union {
- struct {
- /* Cache devices */
- __le64 nbuckets; /* device size */
-
- __le16 block_size; /* sectors */
- __le16 bucket_size; /* sectors */
-
- __le16 nr_in_set;
- __le16 nr_this_dev;
- };
- struct {
- /* Backing devices */
- __le64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __le32 last_mount; /* time overflow in y2106 */
-
- __le16 first_bucket;
- union {
- __le16 njournal_buckets;
- __le16 keys;
- };
- __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
- __le16 obso_bucket_size_hi; /* obsoleted */
-};
-
-/*
- * This is for in-memory bcache super block.
- * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member
- * size, ordering and even whole struct size may be different
- * from cache_sb_disk.
- */
-struct cache_sb {
- __u64 offset; /* sector where this sb was written */
- __u64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __u64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __u64 flags;
- __u64 seq;
-
- __u64 feature_compat;
- __u64 feature_incompat;
- __u64 feature_ro_compat;
-
- union {
- struct {
- /* Cache devices */
- __u64 nbuckets; /* device size */
-
- __u16 block_size; /* sectors */
- __u16 nr_in_set;
- __u16 nr_this_dev;
- __u32 bucket_size; /* sectors */
- };
- struct {
- /* Backing devices */
- __u64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __u32 last_mount; /* time overflow in y2106 */
-
- __u16 first_bucket;
- union {
- __u16 njournal_buckets;
- __u16 keys;
- };
- __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
-};
-
-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
-{
- return sb->version == BCACHE_SB_VERSION_BDEV
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES;
-}
-
-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
-#define CACHE_REPLACEMENT_LRU 0U
-#define CACHE_REPLACEMENT_FIFO 1U
-#define CACHE_REPLACEMENT_RANDOM 2U
-
-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-/*
- * Magic numbers
- *
- * The various other data structures have their own magic numbers, which are
- * xored with the first part of the cache set's UUID
- */
-
-#define JSET_MAGIC 0x245235c1a3625032ULL
-#define PSET_MAGIC 0x6750e15f87337f91ULL
-#define BSET_MAGIC 0x90135c78b99e07f5ULL
-
-static inline __u64 jset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ JSET_MAGIC;
-}
-
-static inline __u64 pset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ PSET_MAGIC;
-}
-
-static inline __u64 bset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ BSET_MAGIC;
-}
-
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
-
-#define BCACHE_JSET_VERSION_UUIDv1 1
-#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
-#define BCACHE_JSET_VERSION 1
-
-struct jset {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- __u64 last_seq;
-
- BKEY_PADDED(uuid_bucket);
- BKEY_PADDED(btree_root);
- __u16 btree_level;
- __u16 pad[3];
-
- __u64 prio_bucket[MAX_CACHES_PER_SET];
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* Bucket prios/gens */
-
-struct prio_set {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 pad;
-
- __u64 next_bucket;
-
- struct bucket_disk {
- __u16 prio;
- __u8 gen;
- } __attribute((packed)) data[];
-};
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry {
- union {
- struct {
- __u8 uuid[16];
- __u8 label[32];
- __u32 first_reg; /* time overflow in y2106 */
- __u32 last_reg;
- __u32 invalidated;
-
- __u32 flags;
- /* Size of flash only volumes */
- __u64 sectors;
- };
-
- __u8 pad[128];
- };
-};
-
-BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
-
-/* Btree nodes */
-
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM 1
-#define BCACHE_BSET_VERSION 1
-
-/*
- * Btree nodes
- *
- * On disk a btree node is a list/log of these; within each set the keys are
- * sorted
- */
-struct bset {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* OBSOLETE */
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry_v0 {
- __u8 uuid[16];
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
- __u32 pad;
-};
-
-#endif /* _LINUX_BCACHE_H */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index d7d3cfead056..738619994e26 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args {
*/
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
+/*
+ * Read the protocol version in the structure
+ */
+#define BTRFS_SEND_FLAG_VERSION 0x8
+
#define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
- BTRFS_SEND_FLAG_OMIT_END_CMD)
+ BTRFS_SEND_FLAG_OMIT_END_CMD | \
+ BTRFS_SEND_FLAG_VERSION)
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
@@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args {
__u64 __user *clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
- __u64 reserved[4]; /* in */
+ __u32 version; /* in */
+ __u8 reserved[28]; /* in */
};
/*
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 6c34f6e2f1f7..804ff8d98f71 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -147,6 +147,8 @@
#define CDROM_NEXT_WRITABLE 0x5394 /* get next writable block */
#define CDROM_LAST_WRITTEN 0x5395 /* get last block written on disc */
+#define CDROM_TIMED_MEDIA_CHANGE 0x5396 /* get the timestamp of the last media change */
+
/*******************************************************
* CDROM IOCTL structures
*******************************************************/
@@ -295,6 +297,23 @@ struct cdrom_generic_command
};
};
+/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */
+struct cdrom_timed_media_change_info {
+ __s64 last_media_change; /* Timestamp of the last detected media
+ * change in ms. May be set by caller,
+ * updated upon successful return of
+ * ioctl.
+ */
+ __u64 media_flags; /* Flags returned by ioctl to indicate
+ * media status.
+ */
+};
+#define MEDIA_CHANGED_FLAG 0x1 /* Last detected media change was more
+ * recent than last_media_change set by
+ * caller.
+ */
+/* other bits of media_flags available for future use */
+
/*
* A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336,
* 2340, or 2352 bytes long.
diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h
index f880d2831160..e83954c69fff 100644
--- a/include/uapi/linux/dlm_device.h
+++ b/include/uapi/linux/dlm_device.h
@@ -45,13 +45,13 @@ struct dlm_lock_params {
void __user *bastaddr;
struct dlm_lksb __user *lksb;
char lvb[DLM_USER_LVB_LEN];
- char name[0];
+ char name[];
};
struct dlm_lspace_params {
__u32 flags;
__u32 minor;
- char name[0];
+ char name[];
};
struct dlm_purge_params {
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 235e5b2facaa..71a5df8d2689 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -44,6 +44,31 @@
FUTEX_PRIVATE_FLAG)
/*
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
+ * Currently, only 32 is supported.
+ */
+#define FUTEX_32 2
+
+/*
+ * Max numbers of elements in a futex_waitv array
+ */
+#define FUTEX_WAITV_MAX 128
+
+/**
+ * struct futex_waitv - A waiter for vectorized wait
+ * @val: Expected value at uaddr
+ * @uaddr: User address to wait on
+ * @flags: Flags for this waiter
+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
+ */
+struct futex_waitv {
+ __u64 val;
+ __u64 uaddr;
+ __u32 flags;
+ __u32 __reserved;
+};
+
+/*
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
*/
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 6135d92e0d47..daf82a230c0e 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -26,7 +26,7 @@
#ifndef _UAPI_HYPERV_H
#define _UAPI_HYPERV_H
-#include <linux/uuid.h>
+#include <linux/types.h>
/*
* Framework version for util services.
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b270a07b285e..c45b5e9a9387 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -158,6 +158,7 @@ enum {
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
#define IORING_TIMEOUT_REALTIME (1U << 3)
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 52b54d13f385..6acd4ccafbf7 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -10,6 +10,7 @@
#define __UAPI_MCTP_H
#include <linux/types.h>
+#include <linux/socket.h>
typedef __u8 mctp_eid_t;
@@ -18,11 +19,13 @@ struct mctp_addr {
};
struct sockaddr_mctp {
- unsigned short int smctp_family;
- int smctp_network;
+ __kernel_sa_family_t smctp_family;
+ __u16 __smctp_pad0;
+ unsigned int smctp_network;
struct mctp_addr smctp_addr;
__u8 smctp_type;
__u8 smctp_tag;
+ __u8 __smctp_pad1;
};
#define MCTP_NET_ANY 0x0
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..bd8860eeb291 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1141,6 +1141,21 @@ enum perf_event_type {
*/
PERF_RECORD_TEXT_POKE = 20,
+ /*
+ * Data written to the AUX area by hardware due to aux_output, may need
+ * to be matched to the event by an architecture-specific hardware ID.
+ * This records the hardware ID, but requires sample_id to provide the
+ * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
+ * records from multiple events.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 hw_id;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1210,14 +1225,16 @@ union perf_mem_data_src {
mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */
mem_blk:3, /* access blocked */
- mem_rsvd:21;
+ mem_hops:3, /* hop level */
+ mem_rsvd:18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:21,
+ __u64 mem_rsvd:18,
+ mem_hops:3, /* hop level */
mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */
@@ -1241,7 +1258,13 @@ union perf_mem_data_src {
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
#define PERF_MEM_OP_SHIFT 0
-/* memory hierarchy (memory level, hit or miss) */
+/*
+ * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
+ * Supporting this namespace inorder to not break defined ABIs.
+ *
+ * memory hierarchy (memory level, hit or miss)
+ */
#define PERF_MEM_LVL_NA 0x01 /* not available */
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
@@ -1307,6 +1330,11 @@ union perf_mem_data_src {
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT 40
+/* hop level */
+#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
+/* 2-7 available */
+#define PERF_MEM_HOPS_SHIFT 43
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index ee8220f8dcf5..3021ea25a284 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -4,3 +4,40 @@
#ifndef __always_inline
#define __always_inline inline
#endif
+
+/**
+ * __struct_group() - Create a mirrored named and anonyomous struct
+ *
+ * @TAG: The tag name for the named sub-struct (usually empty)
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @ATTRS: Any struct attributes (usually empty)
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical layout
+ * and size: one anonymous and one named. The former's members can be used
+ * normally without sub-struct naming, and the latter can be used to
+ * reason about the start, end, and size of the group of struct members.
+ * The named struct can also be explicitly tagged for layer reuse, as well
+ * as both having struct attributes appended.
+ */
+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
+ union { \
+ struct { MEMBERS } ATTRS; \
+ struct TAG { MEMBERS } ATTRS NAME; \
+ }
+
+/**
+ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
+ *
+ * @TYPE: The type of each flexible array element
+ * @NAME: The name of the flexible array member
+ *
+ * In order to have a flexible array member in a union or alone in a
+ * struct, it needs to be wrapped in an anonymous struct with at least 1
+ * named member, but that member can be empty.
+ */
+#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \
+ struct { \
+ struct { } __empty_ ## NAME; \
+ TYPE NAME[]; \
+ }
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index b96c1ea7166d..eda0426ec4c2 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -213,13 +213,13 @@ enum {
XFRM_MSG_GETSPDINFO,
#define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
+ XFRM_MSG_MAPPING,
+#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
+
XFRM_MSG_SETDEFAULT,
#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT
XFRM_MSG_GETDEFAULT,
#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT
-
- XFRM_MSG_MAPPING,
-#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
__XFRM_MSG_MAX
};
#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -514,9 +514,12 @@ struct xfrm_user_offload {
#define XFRM_OFFLOAD_INBOUND 2
struct xfrm_userpolicy_default {
-#define XFRM_USERPOLICY_DIRMASK_MAX (sizeof(__u8) * 8)
- __u8 dirmask;
- __u8 action;
+#define XFRM_USERPOLICY_UNSPEC 0
+#define XFRM_USERPOLICY_BLOCK 1
+#define XFRM_USERPOLICY_ACCEPT 2
+ __u8 in;
+ __u8 fwd;
+ __u8 out;
};
#ifndef __KERNEL__
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 7cc2a0f3f2f5..d13bb8c1b450 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -917,7 +917,6 @@ struct hl_wait_cs_in {
#define HL_WAIT_CS_STATUS_BUSY 1
#define HL_WAIT_CS_STATUS_TIMEDOUT 2
#define HL_WAIT_CS_STATUS_ABORTED 3
-#define HL_WAIT_CS_STATUS_INTERRUPTED 4
#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1
#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2
@@ -1286,7 +1285,8 @@ struct hl_debug_args {
* EIO - The CS was aborted (usually because the device was reset)
* ENODEV - The device wants to do hard-reset (so user need to close FD)
*
- * The driver also returns a custom define inside the IOCTL which can be:
+ * The driver also returns a custom define in case the IOCTL call returned 0.
+ * The define can be one of the following:
*
* HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0)
* HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0)
@@ -1294,8 +1294,6 @@ struct hl_debug_args {
* (ETIMEDOUT)
* HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
* device was reset (EIO)
- * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
- *
*/
#define HL_IOCTL_WAIT_CS \
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index e283c2220aba..7f44d54bb0ab 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -141,8 +141,8 @@ struct rxe_dma_info {
__u32 sge_offset;
__u32 reserved;
union {
- __u8 inline_data[0];
- struct rxe_sge sge[0];
+ __DECLARE_FLEX_ARRAY(__u8, inline_data);
+ __DECLARE_FLEX_ARRAY(struct rxe_sge, sge);
};
};
diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index da61398b1f8f..053949287ce8 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -240,8 +240,8 @@ struct snd_soc_tplg_vendor_array {
struct snd_soc_tplg_private {
__le32 size; /* in bytes of private data */
union {
- char data[0];
- struct snd_soc_tplg_vendor_array array[0];
+ __DECLARE_FLEX_ARRAY(char, data);
+ __DECLARE_FLEX_ARRAY(struct snd_soc_tplg_vendor_array, array);
};
} __attribute__((packed));
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index db28e79b77ee..a3584a357f35 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -52,12 +52,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
#if defined(CONFIG_XEN_PV)
int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
- unsigned int domid, bool no_translate, struct page **pages);
+ unsigned int domid, bool no_translate);
#else
static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
xen_pfn_t *pfn, int nr, int *err_ptr,
pgprot_t prot, unsigned int domid,
- bool no_translate, struct page **pages)
+ bool no_translate)
{
BUG();
return 0;
@@ -134,7 +134,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
*/
BUG_ON(err_ptr == NULL);
return xen_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid,
- false, pages);
+ false);
}
/*
@@ -146,7 +146,6 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
* @err_ptr: Returns per-MFN error status.
* @prot: page protection mask
* @domid: Domain owning the pages
- * @pages: Array of pages if this domain has an auto-translated physmap
*
* @mfn and @err_ptr may point to the same buffer, the MFNs will be
* overwritten by the error codes after they are mapped.
@@ -157,14 +156,13 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr, xen_pfn_t *mfn,
int nr, int *err_ptr,
- pgprot_t prot, unsigned int domid,
- struct page **pages)
+ pgprot_t prot, unsigned int domid)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EOPNOTSUPP;
return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid,
- true, pages);
+ true);
}
/* xen_remap_domain_gfn_range() - map a range of foreign frames
@@ -188,8 +186,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
if (xen_feature(XENFEAT_auto_translated_physmap))
return -EOPNOTSUPP;
- return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false,
- pages);
+ return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false);
}
int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,