aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/platform/aclinuxex.h9
-rw-r--r--include/asm-generic/hugetlb.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h18
-rw-r--r--include/linux/acpi.h3
-rw-r--r--include/linux/alloc_tag.h2
-rw-r--r--include/linux/arm_ffa.h3
-rw-r--r--include/linux/bio-integrity.h152
-rw-r--r--include/linux/bio.h156
-rw-r--r--include/linux/blk-integrity.h1
-rw-r--r--include/linux/blk-mq.h127
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/blkdev.h31
-rw-r--r--include/linux/bpf.h8
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/cacheinfo.h2
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/clockchips.h2
-rw-r--r--include/linux/compiler.h6
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/cpu_cooling.h1
-rw-r--r--include/linux/cpu_rmap.h2
-rw-r--r--include/linux/cpumask.h56
-rw-r--r--include/linux/cpumask_types.h66
-rw-r--r--include/linux/crc32.h3
-rw-r--r--include/linux/damon.h17
-rw-r--r--include/linux/dma-fence-chain.h4
-rw-r--r--include/linux/fault-inject.h11
-rw-r--r--include/linux/gfp.h8
-rw-r--r--include/linux/hid_bpf.h5
-rw-r--r--include/linux/highmem-internal.h17
-rw-r--r--include/linux/highmem.h8
-rw-r--r--include/linux/huge_mm.h53
-rw-r--r--include/linux/hugetlb.h30
-rw-r--r--include/linux/init.h18
-rw-r--r--include/linux/interrupt.h4
-rw-r--r--include/linux/ioport.h44
-rw-r--r--include/linux/irq.h43
-rw-r--r--include/linux/irqchip/arm-gic-v4.h8
-rw-r--r--include/linux/irqchip/irq-partition-percpu.h2
-rw-r--r--include/linux/irqdomain.h136
-rw-r--r--include/linux/jbd2.h10
-rw-r--r--include/linux/jhash.h6
-rw-r--r--include/linux/kernel_stat.h1
-rw-r--r--include/linux/kmsan.h76
-rw-r--r--include/linux/kmsan_types.h2
-rw-r--r--include/linux/kvm_host.h53
-rw-r--r--include/linux/list_lru.h2
-rw-r--r--include/linux/memblock.h2
-rw-r--r--include/linux/memcontrol.h363
-rw-r--r--include/linux/memfd.h5
-rw-r--r--include/linux/memory-tiers.h2
-rw-r--r--include/linux/memory_hotplug.h4
-rw-r--r--include/linux/migrate.h10
-rw-r--r--include/linux/migrate_mode.h6
-rw-r--r--include/linux/min_heap.h188
-rw-r--r--include/linux/mm.h81
-rw-r--r--include/linux/mm_types.h33
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/msi.h54
-rw-r--r--include/linux/mtd/cfi.h32
-rw-r--r--include/linux/node.h1
-rw-r--r--include/linux/nvme-fc-driver.h2
-rw-r--r--include/linux/page-flags.h82
-rw-r--r--include/linux/page_counter.h4
-rw-r--r--include/linux/pagemap.h46
-rw-r--r--include/linux/panic.h8
-rw-r--r--include/linux/pci-epc.h15
-rw-r--r--include/linux/pci-epf.h10
-rw-r--r--include/linux/pci.h10
-rw-r--r--include/linux/percpu-defs.h6
-rw-r--r--include/linux/percpu.h1
-rw-r--r--include/linux/pgalloc_tag.h7
-rw-r--r--include/linux/pgtable.h40
-rw-r--r--include/linux/pm_domain.h2
-rw-r--r--include/linux/poison.h6
-rw-r--r--include/linux/power_supply.h19
-rw-r--r--include/linux/profile.h1
-rw-r--r--include/linux/psp-sev.h4
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--include/linux/rmap.h45
-rw-r--r--include/linux/sbitmap.h5
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/shmem_fs.h9
-rw-r--r--include/linux/skbuff.h8
-rw-r--r--include/linux/skmsg.h5
-rw-r--r--include/linux/slab.h12
-rw-r--r--include/linux/soc/apple/rtkit.h4
-rw-r--r--include/linux/srcu.h14
-rw-r--r--include/linux/stop_machine.h2
-rw-r--r--include/linux/swap.h14
-rw-r--r--include/linux/swapops.h4
-rw-r--r--include/linux/switchtec.h2
-rw-r--r--include/linux/torture.h2
-rw-r--r--include/linux/tracepoint.h1
-rw-r--r--include/linux/vmstat.h4
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--include/linux/zswap.h10
-rw-r--r--include/ras/ras_event.h4
-rw-r--r--include/trace/events/firewire.h541
-rw-r--r--include/trace/events/firewire_ohci.h101
-rw-r--r--include/trace/events/kmem.h4
-rw-r--r--include/trace/events/migrate.h3
-rw-r--r--include/uapi/linux/fs.h158
-rw-r--r--include/uapi/linux/kvm.h27
-rw-r--r--include/uapi/linux/landlock.h66
-rw-r--r--include/uapi/linux/psp-sev.h27
-rw-r--r--include/uapi/linux/sev-guest.h3
109 files changed, 2355 insertions, 1004 deletions
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index 62cac266a1c8..eeff40295b4b 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -46,6 +46,9 @@ acpi_status acpi_os_terminate(void);
* Interrupts are off during resume, just like they are for boot.
* However, boot has (system_state != SYSTEM_RUNNING)
* to quiet __might_sleep() in kmalloc() and resume does not.
+ *
+ * These specialized allocators have to be macros for their allocations to be
+ * accounted separately (to have separate alloc_tag).
*/
#define acpi_os_allocate(_size) \
kmalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
@@ -53,14 +56,14 @@ acpi_status acpi_os_terminate(void);
#define acpi_os_allocate_zeroed(_size) \
kzalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
+#define acpi_os_acquire_object(_cache) \
+ kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
+
static inline void acpi_os_free(void *memory)
{
kfree(memory);
}
-#define acpi_os_acquire_object(_cache) \
- kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
-
static inline acpi_thread_id acpi_os_get_thread_id(void)
{
return (acpi_thread_id) (unsigned long)current;
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 6dcf4d576970..594d5905f615 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -144,7 +144,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_HUGE_PTEP_GET
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
return ptep_get(ptep);
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 35245e9225a5..677315e51e54 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -141,14 +141,6 @@
* often happens at runtime)
*/
-#if defined(CONFIG_MEMORY_HOTPLUG)
-#define MEM_KEEP(sec) *(.mem##sec)
-#define MEM_DISCARD(sec)
-#else
-#define MEM_KEEP(sec)
-#define MEM_DISCARD(sec) *(.mem##sec)
-#endif
-
#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
#define KEEP_PATCHABLE KEEP(*(__patchable_function_entries))
#define PATCHABLE_DISCARDS
@@ -357,7 +349,6 @@
*(.data..decrypted) \
*(.ref.data) \
*(.data..shared_aligned) /* percpu related */ \
- MEM_KEEP(init.data*) \
*(.data.unlikely) \
__start_once = .; \
*(.data.once) \
@@ -542,7 +533,6 @@
/* __*init sections */ \
__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \
*(.ref.rodata) \
- MEM_KEEP(init.rodata) \
} \
\
/* Built-in module parameters. */ \
@@ -593,8 +583,7 @@
*(.text.unknown .text.unknown.*) \
NOINSTR_TEXT \
*(.ref.text) \
- *(.text.asan.* .text.tsan.*) \
- MEM_KEEP(init.text*) \
+ *(.text.asan.* .text.tsan.*)
/* sched.text is aling to function alignment to secure we have same
@@ -701,7 +690,6 @@
#define INIT_DATA \
KEEP(*(SORT(___kentry+*))) \
*(.init.data .init.data.*) \
- MEM_DISCARD(init.data*) \
KERNEL_CTORS() \
MCOUNT_REC() \
*(.init.rodata .init.rodata.*) \
@@ -709,7 +697,6 @@
TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \
ERROR_INJECT_WHITELIST() \
- MEM_DISCARD(init.rodata) \
CLK_OF_TABLES() \
RESERVEDMEM_OF_TABLES() \
TIMER_OF_TABLES() \
@@ -727,8 +714,7 @@
#define INIT_TEXT \
*(.init.text .init.text.*) \
- *(.text.startup) \
- MEM_DISCARD(init.text*)
+ *(.text.startup)
#define EXIT_DATA \
*(.exit.data .exit.data.*) \
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e93059f71c71..f0b95c76c707 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -274,6 +274,9 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
return phys_id == PHYS_CPUID_INVALID;
}
+
+int __init acpi_get_madt_revision(void);
+
/* Validate the processor object's proc_id */
bool acpi_duplicate_processor_id(int proc_id);
/* Processor _CTS control */
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index abd24016a900..8c61ccd161ba 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -122,7 +122,7 @@ static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag
"alloc_tag was not cleared (got tag for %s:%u)\n",
ref->ct->filename, ref->ct->lineno);
- WARN_ONCE(!tag, "current->alloc_tag not set");
+ WARN_ONCE(!tag, "current->alloc_tag not set\n");
}
static inline void alloc_tag_sub_check(union codetag_ref *ref)
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c82d56768101..c6d18f50f671 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -212,6 +212,9 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
extern const struct bus_type ffa_bus_type;
+/* The FF-A 1.0 partition structure lacks the uuid[4] */
+#define FFA_1_0_PARTITON_INFO_SZ (8)
+
/* FFA transport related */
struct ffa_partition_info {
u16 id;
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
new file mode 100644
index 000000000000..dd831c269e99
--- /dev/null
+++ b/include/linux/bio-integrity.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BIO_INTEGRITY_H
+#define _LINUX_BIO_INTEGRITY_H
+
+#include <linux/bio.h>
+
+enum bip_flags {
+ BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
+ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
+ BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
+ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
+ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
+ BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */
+};
+
+struct bio_integrity_payload {
+ struct bio *bip_bio; /* parent bio */
+
+ struct bvec_iter bip_iter;
+
+ unsigned short bip_vcnt; /* # of integrity bio_vecs */
+ unsigned short bip_max_vcnt; /* integrity bio_vec slots */
+ unsigned short bip_flags; /* control flags */
+
+ struct bvec_iter bio_iter; /* for rewinding parent bio */
+
+ struct work_struct bip_work; /* I/O completion */
+
+ struct bio_vec *bip_vec;
+ struct bio_vec bip_inline_vecs[];/* embedded bvec array */
+};
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+
+#define bip_for_each_vec(bvl, bip, iter) \
+ for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
+
+#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
+ for_each_bio(_bio) \
+ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+ if (bio->bi_opf & REQ_INTEGRITY)
+ return bio->bi_integrity;
+
+ return NULL;
+}
+
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+
+ if (bip)
+ return bip->bip_flags & flag;
+
+ return false;
+}
+
+static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
+{
+ return bip->bip_iter.bi_sector;
+}
+
+static inline void bip_set_seed(struct bio_integrity_payload *bip,
+ sector_t seed)
+{
+ bip->bip_iter.bi_sector = seed;
+}
+
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
+ unsigned int nr);
+int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
+ unsigned int offset);
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+void bio_integrity_unmap_user(struct bio *bio);
+bool bio_integrity_prep(struct bio *bio);
+void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
+void bio_integrity_trim(struct bio *bio);
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask);
+int bioset_integrity_create(struct bio_set *bs, int pool_size);
+void bioset_integrity_free(struct bio_set *bs);
+void bio_integrity_init(void);
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+ return NULL;
+}
+
+static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+ return 0;
+}
+
+static inline void bioset_integrity_free(struct bio_set *bs)
+{
+}
+
+static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
+ ssize_t len, u32 seed)
+{
+ return -EINVAL;
+}
+
+static inline void bio_integrity_unmap_user(struct bio *bio)
+{
+}
+
+static inline bool bio_integrity_prep(struct bio *bio)
+{
+ return true;
+}
+
+static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+ gfp_t gfp_mask)
+{
+ return 0;
+}
+
+static inline void bio_integrity_advance(struct bio *bio,
+ unsigned int bytes_done)
+{
+}
+
+static inline void bio_integrity_trim(struct bio *bio)
+{
+}
+
+static inline void bio_integrity_init(void)
+{
+}
+
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+ return false;
+}
+
+static inline struct bio_integrity_payload *
+bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ return 0;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#endif /* _LINUX_BIO_INTEGRITY_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 818e93612947..a46e2047bea4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -321,69 +321,6 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
#define bio_for_each_folio_all(fi, bio) \
for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio))
-enum bip_flags {
- BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
- BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
- BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
- BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
- BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
- BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */
- BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */
-};
-
-/*
- * bio integrity payload
- */
-struct bio_integrity_payload {
- struct bio *bip_bio; /* parent bio */
-
- struct bvec_iter bip_iter;
-
- unsigned short bip_vcnt; /* # of integrity bio_vecs */
- unsigned short bip_max_vcnt; /* integrity bio_vec slots */
- unsigned short bip_flags; /* control flags */
-
- struct bvec_iter bio_iter; /* for rewinding parent bio */
-
- struct work_struct bip_work; /* I/O completion */
-
- struct bio_vec *bip_vec;
- struct bio_vec bip_inline_vecs[];/* embedded bvec array */
-};
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
-{
- if (bio->bi_opf & REQ_INTEGRITY)
- return bio->bi_integrity;
-
- return NULL;
-}
-
-static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
-{
- struct bio_integrity_payload *bip = bio_integrity(bio);
-
- if (bip)
- return bip->bip_flags & flag;
-
- return false;
-}
-
-static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
-{
- return bip->bip_iter.bi_sector;
-}
-
-static inline void bip_set_seed(struct bio_integrity_payload *bip,
- sector_t seed)
-{
- bip->bip_iter.bi_sector = seed;
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
@@ -721,99 +658,6 @@ static inline bool bioset_initialized(struct bio_set *bs)
return bs->bio_slab != NULL;
}
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-#define bip_for_each_vec(bvl, bip, iter) \
- for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
-
-#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \
- for_each_bio(_bio) \
- bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
-
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
-void bio_integrity_unmap_free_user(struct bio *bio);
-extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_prep(struct bio *);
-extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *);
-extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
-extern int bioset_integrity_create(struct bio_set *, int);
-extern void bioset_integrity_free(struct bio_set *);
-extern void bio_integrity_init(void);
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline void *bio_integrity(struct bio *bio)
-{
- return NULL;
-}
-
-static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
-{
- return 0;
-}
-
-static inline void bioset_integrity_free (struct bio_set *bs)
-{
- return;
-}
-
-static inline bool bio_integrity_prep(struct bio *bio)
-{
- return true;
-}
-
-static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
- gfp_t gfp_mask)
-{
- return 0;
-}
-
-static inline void bio_integrity_advance(struct bio *bio,
- unsigned int bytes_done)
-{
- return;
-}
-
-static inline void bio_integrity_trim(struct bio *bio)
-{
- return;
-}
-
-static inline void bio_integrity_init(void)
-{
- return;
-}
-
-static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
-{
- return false;
-}
-
-static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp,
- unsigned int nr)
-{
- return ERR_PTR(-EINVAL);
-}
-
-static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
-{
- return 0;
-}
-
-static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
- ssize_t len, u32 seed)
-{
- return -EINVAL;
-}
-static inline void bio_integrity_unmap_free_user(struct bio *bio)
-{
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
/*
* Mark a bio as polled. Note that for async polled IO, the caller must
* expect -EWOULDBLOCK if we cannot allocate a request (or other resources).
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 804f856ed3e5..de98049b7ded 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -3,6 +3,7 @@
#define _LINUX_BLK_INTEGRITY_H
#include <linux/blk-mq.h>
+#include <linux/bio-integrity.h>
struct request;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 89ba6b16fe8b..8d304b1d16b1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -27,38 +27,61 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
* request flags */
typedef __u32 __bitwise req_flags_t;
-/* drive already may have started this one */
-#define RQF_STARTED ((__force req_flags_t)(1 << 1))
-/* request for flush sequence */
-#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
-/* merge of different types, fail separately */
-#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
-/* don't call prep for this one */
-#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
-/* use hctx->sched_tags */
-#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8))
-/* use an I/O scheduler for this request */
-#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9))
-/* vaguely specified driver internal error. Ignored by the block layer */
-#define RQF_FAILED ((__force req_flags_t)(1 << 10))
-/* don't warn about errors */
-#define RQF_QUIET ((__force req_flags_t)(1 << 11))
-/* account into disk and partition IO statistics */
-#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
-/* runtime pm request */
-#define RQF_PM ((__force req_flags_t)(1 << 15))
-/* on IO scheduler merge hash */
-#define RQF_HASHED ((__force req_flags_t)(1 << 16))
-/* track IO completion time */
-#define RQF_STATS ((__force req_flags_t)(1 << 17))
-/* Look at ->special_vec for the actual data payload instead of the
- bio chain. */
-#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
-/* The request completion needs to be signaled to zone write pluging. */
-#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20))
-/* ->timeout has been called, don't expire again */
-#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
-#define RQF_RESV ((__force req_flags_t)(1 << 23))
+/* Keep rqf_name[] in sync with the definitions below */
+enum {
+ /* drive already may have started this one */
+ __RQF_STARTED,
+ /* request for flush sequence */
+ __RQF_FLUSH_SEQ,
+ /* merge of different types, fail separately */
+ __RQF_MIXED_MERGE,
+ /* don't call prep for this one */
+ __RQF_DONTPREP,
+ /* use hctx->sched_tags */
+ __RQF_SCHED_TAGS,
+ /* use an I/O scheduler for this request */
+ __RQF_USE_SCHED,
+ /* vaguely specified driver internal error. Ignored by block layer */
+ __RQF_FAILED,
+ /* don't warn about errors */
+ __RQF_QUIET,
+ /* account into disk and partition IO statistics */
+ __RQF_IO_STAT,
+ /* runtime pm request */
+ __RQF_PM,
+ /* on IO scheduler merge hash */
+ __RQF_HASHED,
+ /* track IO completion time */
+ __RQF_STATS,
+ /* Look at ->special_vec for the actual data payload instead of the
+ bio chain. */
+ __RQF_SPECIAL_PAYLOAD,
+ /* request completion needs to be signaled to zone write plugging. */
+ __RQF_ZONE_WRITE_PLUGGING,
+ /* ->timeout has been called, don't expire again */
+ __RQF_TIMED_OUT,
+ __RQF_RESV,
+ __RQF_BITS
+};
+
+#define RQF_STARTED ((__force req_flags_t)(1 << __RQF_STARTED))
+#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << __RQF_FLUSH_SEQ))
+#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << __RQF_MIXED_MERGE))
+#define RQF_DONTPREP ((__force req_flags_t)(1 << __RQF_DONTPREP))
+#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << __RQF_SCHED_TAGS))
+#define RQF_USE_SCHED ((__force req_flags_t)(1 << __RQF_USE_SCHED))
+#define RQF_FAILED ((__force req_flags_t)(1 << __RQF_FAILED))
+#define RQF_QUIET ((__force req_flags_t)(1 << __RQF_QUIET))
+#define RQF_IO_STAT ((__force req_flags_t)(1 << __RQF_IO_STAT))
+#define RQF_PM ((__force req_flags_t)(1 << __RQF_PM))
+#define RQF_HASHED ((__force req_flags_t)(1 << __RQF_HASHED))
+#define RQF_STATS ((__force req_flags_t)(1 << __RQF_STATS))
+#define RQF_SPECIAL_PAYLOAD \
+ ((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD))
+#define RQF_ZONE_WRITE_PLUGGING \
+ ((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING))
+#define RQF_TIMED_OUT ((__force req_flags_t)(1 << __RQF_TIMED_OUT))
+#define RQF_RESV ((__force req_flags_t)(1 << __RQF_RESV))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
@@ -278,8 +301,12 @@ enum blk_eh_timer_return {
BLK_EH_RESET_TIMER,
};
-#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
-#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
+/* Keep alloc_policy_name[] in sync with the definitions below */
+enum {
+ BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */
+ BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */
+ BLK_TAG_ALLOC_MAX
+};
/**
* struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
@@ -644,6 +671,7 @@ struct blk_mq_ops {
#endif
};
+/* Keep hctx_flag_name[] in sync with the definitions below */
enum {
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
@@ -653,27 +681,17 @@ enum {
*/
BLK_MQ_F_STACKING = 1 << 2,
BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
- BLK_MQ_F_BLOCKING = 1 << 5,
+ BLK_MQ_F_BLOCKING = 1 << 4,
/* Do not allow an I/O scheduler to be configured. */
- BLK_MQ_F_NO_SCHED = 1 << 6,
+ BLK_MQ_F_NO_SCHED = 1 << 5,
+
/*
* Select 'none' during queue registration in case of a single hwq
* or shared hwqs instead of 'mq-deadline'.
*/
- BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7,
- BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6,
+ BLK_MQ_F_ALLOC_POLICY_START_BIT = 7,
BLK_MQ_F_ALLOC_POLICY_BITS = 1,
-
- BLK_MQ_S_STOPPED = 0,
- BLK_MQ_S_TAG_ACTIVE = 1,
- BLK_MQ_S_SCHED_RESTART = 2,
-
- /* hw queue is inactive after all its CPUs become offline */
- BLK_MQ_S_INACTIVE = 3,
-
- BLK_MQ_MAX_DEPTH = 10240,
-
- BLK_MQ_CPU_WORK_BATCH = 8,
};
#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
@@ -682,8 +700,19 @@ enum {
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
+#define BLK_MQ_MAX_DEPTH (10240)
#define BLK_MQ_NO_HCTX_IDX (-1U)
+enum {
+ /* Keep hctx_state_name[] in sync with the definitions below */
+ BLK_MQ_S_STOPPED,
+ BLK_MQ_S_TAG_ACTIVE,
+ BLK_MQ_S_SCHED_RESTART,
+ /* hw queue is inactive after all its CPUs become offline */
+ BLK_MQ_S_INACTIVE,
+ BLK_MQ_S_MAX
+};
+
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
struct queue_limits *lim, void *queuedata,
struct lock_class_key *lkclass);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 632edd71f8c6..36ed96133217 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -354,6 +354,7 @@ enum req_op {
REQ_OP_LAST = (__force blk_opf_t)36,
};
+/* Keep cmd_flag_name[] in sync with the definitions below */
enum req_flag_bits {
__REQ_FAILFAST_DEV = /* no driver retries of device errors */
REQ_OP_BITS,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b8196e219ac2..e85ec73a07d5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -588,27 +588,28 @@ struct request_queue {
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
-#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
-#define QUEUE_FLAG_DYING 1 /* queue being torn down */
-#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
-#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
-#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
-#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
-#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
-#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
-#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
-#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
-#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
-#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
-#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
+enum {
+ QUEUE_FLAG_DYING, /* queue being torn down */
+ QUEUE_FLAG_NOMERGES, /* disable merge attempts */
+ QUEUE_FLAG_SAME_COMP, /* complete on same CPU-group */
+ QUEUE_FLAG_FAIL_IO, /* fake timeout */
+ QUEUE_FLAG_NOXMERGES, /* No extended merges */
+ QUEUE_FLAG_SAME_FORCE, /* force complete on same CPU */
+ QUEUE_FLAG_INIT_DONE, /* queue is initialized */
+ QUEUE_FLAG_STATS, /* track IO start and completion times */
+ QUEUE_FLAG_REGISTERED, /* queue has been registered to a disk */
+ QUEUE_FLAG_QUIESCED, /* queue has been quiesced */
+ QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */
+ QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */
+ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
+ QUEUE_FLAG_MAX
+};
#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP)
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f1d4a97b9d1..3b94ec161e8c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -275,7 +275,7 @@ struct bpf_map {
u32 btf_value_type_id;
u32 btf_vmlinux_value_type_id;
struct btf *btf;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
@@ -2253,7 +2253,7 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
unsigned long nr_pages, struct page **page_array);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
@@ -2262,6 +2262,10 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags);
#else
+/*
+ * These specialized allocators have to be macros for their allocations to be
+ * accounted separately (to have separate alloc_tag).
+ */
#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \
kmalloc_node(_size, _flags, _node)
#define bpf_map_kzalloc(_map, _size, _flags) \
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e022e40b099e..14acf1bbe0ce 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -53,7 +53,7 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
* filesystem and block layers. Nowadays the basic I/O unit
* is the bio, and buffer_heads are used for extracting block
* mappings (via a get_block_t call), for tracking state within
- * a page (via a page_mapping) and for wrapping bio submission
+ * a folio (via a folio_mapping) and for wrapping bio submission
* for backward compatibility reasons (e.g. submit_bh).
*/
struct buffer_head {
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 3dde175f4108..108060612bb8 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -4,7 +4,7 @@
#include <linux/bitops.h>
#include <linux/cpuhplock.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/smp.h>
struct device_node;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index b36690ca0d3f..ae04035b6cbe 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -539,9 +539,6 @@ struct cgroup {
/* used to store eBPF programs */
struct cgroup_bpf bpf;
- /* If there is block congestion on this cgroup. */
- atomic_t congestion_count;
-
/* Used to store internal freezer state */
struct cgroup_freezer_state freezer;
@@ -681,9 +678,7 @@ struct cftype {
__poll_t (*poll)(struct kernfs_open_file *of,
struct poll_table_struct *pt);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lock_class_key lockdep_key;
-#endif
};
/*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2150ca60394b..c60ba0ab1462 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -10,7 +10,6 @@
*/
#include <linux/sched.h>
-#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/rculist.h>
#include <linux/cgroupstats.h>
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9aac31d856f3..b0df28ddd394 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -12,7 +12,7 @@
#ifdef CONFIG_GENERIC_CLOCKEVENTS
# include <linux/clocksource.h>
-# include <linux/cpumask.h>
+# include <linux/cpumask_types.h>
# include <linux/ktime.h>
# include <linux/notifier.h>
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 68a24a3a6979..2594553bb30b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -208,10 +208,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
*/
#define data_race(expr) \
({ \
- __unqual_scalar_typeof(({ expr; })) __v = ({ \
- __kcsan_disable_current(); \
- expr; \
- }); \
+ __kcsan_disable_current(); \
+ __auto_type __v = (expr); \
__kcsan_enable_current(); \
__v; \
})
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a8926d0a28cd..bdcec1732445 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,7 +16,6 @@
#include <linux/node.h>
#include <linux/compiler.h>
-#include <linux/cpumask.h>
#include <linux/cpuhotplug.h>
#include <linux/cpuhplock.h>
#include <linux/cpu_smt.h>
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index a3bdc8a98f2c..2c774fb3c091 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -15,7 +15,6 @@
#include <linux/of.h>
#include <linux/thermal.h>
-#include <linux/cpumask.h>
struct cpufreq_policy;
diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index cae324d10965..20b5729903d7 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -7,7 +7,7 @@
* Copyright 2011 Solarflare Communications Inc.
*/
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/kref.h>
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 954d4adc8f81..099e8b32dd68 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -9,25 +9,13 @@
*/
#include <linux/cleanup.h>
#include <linux/kernel.h>
-#include <linux/threads.h>
#include <linux/bitmap.h>
+#include <linux/cpumask_types.h>
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/gfp_types.h>
#include <linux/numa.h>
-/* Don't assign or return these: may not be this big! */
-typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-
-/**
- * cpumask_bits - get the bits in a cpumask
- * @maskp: the struct cpumask *
- *
- * You should only assume nr_cpu_ids bits of this mask are valid. This is
- * a macro so it's const-correct.
- */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
/**
* cpumask_pr_args - printf args to output a cpumask
* @maskp: cpumask to be printed
@@ -925,48 +913,7 @@ static inline unsigned int cpumask_size(void)
return bitmap_size(large_cpumask_bits);
}
-/*
- * cpumask_var_t: struct cpumask for stack usage.
- *
- * Oh, the wicked games we play! In order to make kernel coding a
- * little more difficult, we typedef cpumask_var_t to an array or a
- * pointer: doing &mask on an array is a noop, so it still works.
- *
- * i.e.
- * cpumask_var_t tmpmask;
- * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- * return -ENOMEM;
- *
- * ... use 'tmpmask' like a normal struct cpumask * ...
- *
- * free_cpumask_var(tmpmask);
- *
- *
- * However, one notable exception is there. alloc_cpumask_var() allocates
- * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has
- * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t.
- *
- * cpumask_var_t tmpmask;
- * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- * return -ENOMEM;
- *
- * var = *tmpmask;
- *
- * This code makes NR_CPUS length memcopy and brings to a memory corruption.
- * cpumask_copy() provide safe copy functionality.
- *
- * Note that there is another evil here: If you define a cpumask_var_t
- * as a percpu variable then the way to obtain the address of the cpumask
- * structure differently influences what this_cpu_* operation needs to be
- * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
- * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
- * other type of cpumask_var_t implementation is configured.
- *
- * Please also note that __cpumask_var_read_mostly can be used to declare
- * a cpumask_var_t variable itself (not its content) as read mostly.
- */
#ifdef CONFIG_CPUMASK_OFFSTACK
-typedef struct cpumask *cpumask_var_t;
#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
#define __cpumask_var_read_mostly __read_mostly
@@ -1013,7 +960,6 @@ static inline bool cpumask_available(cpumask_var_t mask)
}
#else
-typedef struct cpumask cpumask_var_t[1];
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly
diff --git a/include/linux/cpumask_types.h b/include/linux/cpumask_types.h
new file mode 100644
index 000000000000..461ed1b6bcdb
--- /dev/null
+++ b/include/linux/cpumask_types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_CPUMASK_TYPES_H
+#define __LINUX_CPUMASK_TYPES_H
+
+#include <linux/bitops.h>
+#include <linux/threads.h>
+
+/* Don't assign or return these: may not be this big! */
+typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+
+/**
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
+ *
+ * You should only assume nr_cpu_ids bits of this mask are valid. This is
+ * a macro so it's const-correct.
+ */
+#define cpumask_bits(maskp) ((maskp)->bits)
+
+/*
+ * cpumask_var_t: struct cpumask for stack usage.
+ *
+ * Oh, the wicked games we play! In order to make kernel coding a
+ * little more difficult, we typedef cpumask_var_t to an array or a
+ * pointer: doing &mask on an array is a noop, so it still works.
+ *
+ * i.e.
+ * cpumask_var_t tmpmask;
+ * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ * return -ENOMEM;
+ *
+ * ... use 'tmpmask' like a normal struct cpumask * ...
+ *
+ * free_cpumask_var(tmpmask);
+ *
+ *
+ * However, one notable exception is there. alloc_cpumask_var() allocates
+ * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has
+ * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t.
+ *
+ * cpumask_var_t tmpmask;
+ * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ * return -ENOMEM;
+ *
+ * var = *tmpmask;
+ *
+ * This code makes NR_CPUS length memcopy and brings to a memory corruption.
+ * cpumask_copy() provide safe copy functionality.
+ *
+ * Note that there is another evil here: If you define a cpumask_var_t
+ * as a percpu variable then the way to obtain the address of the cpumask
+ * structure differently influences what this_cpu_* operation needs to be
+ * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
+ * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
+ * other type of cpumask_var_t implementation is configured.
+ *
+ * Please also note that __cpumask_var_read_mostly can be used to declare
+ * a cpumask_var_t variable itself (not its content) as read mostly.
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+typedef struct cpumask *cpumask_var_t;
+#else
+typedef struct cpumask cpumask_var_t[1];
+#endif /* CONFIG_CPUMASK_OFFSTACK */
+
+#endif /* __LINUX_CPUMASK_TYPES_H */
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 9e8a032c1788..87f788c0d607 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -9,7 +9,9 @@
#include <linux/bitrev.h>
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
/**
* crc32_le_combine - Combine two crc32 check values into one. For two
@@ -37,6 +39,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
/**
* __crc32c_le_combine - Combine two crc32c check values into one. For two
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f7da65e1ac04..27c546bfc6d4 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,8 @@ struct damon_target {
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
* @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists.
* @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
+ * @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions.
+ * @DAMOS_MIGRATE_COLD: Migrate the regions prioritizing colder regions.
* @DAMOS_STAT: Do nothing but count the stat.
* @NR_DAMOS_ACTIONS: Total number of DAMOS actions
*
@@ -122,6 +124,8 @@ enum damos_action {
DAMOS_NOHUGEPAGE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
+ DAMOS_MIGRATE_HOT,
+ DAMOS_MIGRATE_COLD,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
};
@@ -374,6 +378,7 @@ struct damos_access_pattern {
* @apply_interval_us: The time between applying the @action.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
+ * @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
@@ -389,6 +394,10 @@ struct damos_access_pattern {
* monitoring context are inactive, DAMON stops monitoring either, and just
* repeatedly checks the watermarks.
*
+ * @target_nid is used to set the migration target node for migrate_hot or
+ * migrate_cold actions, which means it's only meaningful when @action is either
+ * "migrate_hot" or "migrate_cold".
+ *
* Before applying the &action to a memory region, &struct damon_operations
* implementation could check pages of the region and skip &action to respect
* &filters
@@ -410,6 +419,9 @@ struct damos {
/* public: */
struct damos_quota quota;
struct damos_watermarks wmarks;
+ union {
+ int target_nid;
+ };
struct list_head filters;
struct damos_stat stat;
struct list_head list;
@@ -726,9 +738,11 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
enum damos_action action,
unsigned long apply_interval_us,
struct damos_quota *quota,
- struct damos_watermarks *wmarks);
+ struct damos_watermarks *wmarks,
+ int target_nid);
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
void damon_destroy_scheme(struct damos *s);
+int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src);
struct damon_target *damon_new_target(void);
void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
@@ -742,6 +756,7 @@ void damon_destroy_ctx(struct damon_ctx *ctx);
int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs);
void damon_set_schemes(struct damon_ctx *ctx,
struct damos **schemes, ssize_t nr_schemes);
+int damon_commit_ctx(struct damon_ctx *old_ctx, struct damon_ctx *new_ctx);
int damon_nr_running_ctxs(void);
bool damon_is_registered_ops(enum damon_ops_id id);
int damon_register_ops(struct damon_operations *ops);
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index ad9e2506c2f4..68c3c1e41014 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -85,6 +85,10 @@ dma_fence_chain_contained(struct dma_fence *fence)
* dma_fence_chain_alloc
*
* Returns a new struct dma_fence_chain object or NULL on failure.
+ *
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
*/
#define dma_fence_chain_alloc() \
((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL))
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 6d5edef09d45..354413950d34 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -91,22 +91,19 @@ static inline void fault_config_init(struct fault_config *config,
struct kmem_cache;
-bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
-
#ifdef CONFIG_FAIL_PAGE_ALLOC
-bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
#else
-static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
return false;
}
#endif /* CONFIG_FAIL_PAGE_ALLOC */
-int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#else
-static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
return false;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7f9691d375f0..f53f76e0b17e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -303,6 +303,8 @@ struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order);
struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
#else
@@ -319,6 +321,11 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
}
+static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
+ struct mempolicy *mpol, pgoff_t ilx, int nid)
+{
+ return folio_alloc_noprof(gfp, order);
+}
#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \
folio_alloc_noprof(gfp, order)
#endif
@@ -326,6 +333,7 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__))
#define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
+#define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
#define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 9ca96fc90449..d4d063cf63b5 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -228,6 +228,11 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; }
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \
((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL))
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index a3028e400a9c..dd100e849f5e 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -131,22 +131,17 @@ static inline void __kunmap_atomic(const void *addr)
preempt_enable();
}
-unsigned int __nr_free_highpages(void);
-extern atomic_long_t _totalhigh_pages;
+unsigned long __nr_free_highpages(void);
+unsigned long __totalhigh_pages(void);
-static inline unsigned int nr_free_highpages(void)
+static inline unsigned long nr_free_highpages(void)
{
return __nr_free_highpages();
}
static inline unsigned long totalhigh_pages(void)
{
- return (unsigned long)atomic_long_read(&_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_add(long count)
-{
- atomic_long_add(count, &_totalhigh_pages);
+ return __totalhigh_pages();
}
static inline bool is_kmap_addr(const void *x)
@@ -239,8 +234,8 @@ static inline void __kunmap_atomic(const void *addr)
preempt_enable();
}
-static inline unsigned int nr_free_highpages(void) { return 0; }
-static inline unsigned long totalhigh_pages(void) { return 0UL; }
+static inline unsigned long nr_free_highpages(void) { return 0; }
+static inline unsigned long totalhigh_pages(void) { return 0; }
static inline bool is_kmap_addr(const void *x)
{
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 00341b56d291..930a591b9b61 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -179,7 +179,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset);
static inline void *kmap_atomic(struct page *page);
/* Highmem related interfaces for management code */
-static inline unsigned int nr_free_highpages(void);
+static inline unsigned long nr_free_highpages(void);
static inline unsigned long totalhigh_pages(void);
#ifndef ARCH_HAS_FLUSH_ANON_PAGE
@@ -352,6 +352,9 @@ static inline int copy_mc_user_highpage(struct page *to, struct page *from,
kunmap_local(vto);
kunmap_local(vfrom);
+ if (ret)
+ memory_failure_queue(page_to_pfn(from), 0);
+
return ret;
}
@@ -368,6 +371,9 @@ static inline int copy_mc_highpage(struct page *to, struct page *from)
kunmap_local(vto);
kunmap_local(vfrom);
+ if (ret)
+ memory_failure_queue(page_to_pfn(from), 0);
+
return ret;
}
#else
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2aa986a5cd1b..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,7 @@
#include <linux/mm_types.h>
#include <linux/fs.h> /* only for vma_is_dax() */
+#include <linux/kobject.h>
vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -63,6 +64,7 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf,
enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;
+extern struct kobj_attribute thpsize_shmem_enabled_attr;
/*
* Mask of all large folio orders supported for anonymous THP; all orders up to
@@ -126,18 +128,6 @@ static inline bool hugepage_global_always(void)
(1<<TRANSPARENT_HUGEPAGE_FLAG);
}
-static inline bool hugepage_flags_enabled(void)
-{
- /*
- * We cover both the anon and the file-backed case here; we must return
- * true if globally enabled, even when all anon sizes are set to never.
- * So we don't need to look at huge_anon_orders_inherit.
- */
- return hugepage_global_enabled() ||
- huge_anon_orders_always ||
- huge_anon_orders_madvise;
-}
-
static inline int highest_order(unsigned long orders)
{
return fls_long(orders) - 1;
@@ -265,12 +255,26 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
}
+struct thpsize {
+ struct kobject kobj;
+ struct list_head node;
+ int order;
+};
+
+#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
+
enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_ALLOC,
MTHP_STAT_ANON_FAULT_FALLBACK,
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_SWPOUT,
MTHP_STAT_SWPOUT_FALLBACK,
+ MTHP_STAT_SHMEM_ALLOC,
+ MTHP_STAT_SHMEM_FALLBACK,
+ MTHP_STAT_SHMEM_FALLBACK_CHARGE,
+ MTHP_STAT_SPLIT,
+ MTHP_STAT_SPLIT_FAILED,
+ MTHP_STAT_SPLIT_DEFERRED,
__MTHP_STAT_COUNT
};
@@ -415,6 +419,11 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmd, bool freeze, struct folio *folio);
+bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmdp, struct folio *folio);
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -477,6 +486,16 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze, struct folio *folio) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd,
+ bool freeze, struct folio *folio) {}
+
+static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp,
+ struct folio *folio)
+{
+ return false;
+}
#define split_huge_pud(__vma, __pmd, __address) \
do { } while (0)
@@ -550,6 +569,16 @@ static inline bool thp_migration_supported(void)
{
return false;
}
+
+static inline int highest_order(unsigned long orders)
+{
+ return 0;
+}
+
+static inline int next_order(unsigned long *orders, int prev)
+{
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2b3c3a404769..c9bf68c239a0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,12 +20,6 @@ struct user_struct;
struct mmu_gather;
struct node;
-#ifndef CONFIG_ARCH_HAS_HUGEPD
-typedef struct { unsigned long pd; } hugepd_t;
-#define is_hugepd(hugepd) (0)
-#define __hugepd(x) ((hugepd_t) { (x) })
-#endif
-
void free_huge_folio(struct folio *folio);
#ifdef CONFIG_HUGETLB_PAGE
@@ -616,47 +610,35 @@ static __always_inline \
bool folio_test_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
return test_bit(HPG_##flname, private); \
- } \
-static inline int HPage##uname(struct page *page) \
- { return test_bit(HPG_##flname, &(page->private)); }
+ }
#define SETHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_set_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
set_bit(HPG_##flname, private); \
- } \
-static inline void SetHPage##uname(struct page *page) \
- { set_bit(HPG_##flname, &(page->private)); }
+ }
#define CLEARHPAGEFLAG(uname, flname) \
static __always_inline \
void folio_clear_hugetlb_##flname(struct folio *folio) \
{ void *private = &folio->private; \
clear_bit(HPG_##flname, private); \
- } \
-static inline void ClearHPage##uname(struct page *page) \
- { clear_bit(HPG_##flname, &(page->private)); }
+ }
#else
#define TESTHPAGEFLAG(uname, flname) \
static inline bool \
folio_test_hugetlb_##flname(struct folio *folio) \
- { return 0; } \
-static inline int HPage##uname(struct page *page) \
{ return 0; }
#define SETHPAGEFLAG(uname, flname) \
static inline void \
folio_set_hugetlb_##flname(struct folio *folio) \
- { } \
-static inline void SetHPage##uname(struct page *page) \
{ }
#define CLEARHPAGEFLAG(uname, flname) \
static inline void \
folio_clear_hugetlb_##flname(struct folio *folio) \
- { } \
-static inline void ClearHPage##uname(struct page *page) \
{ }
#endif
@@ -681,6 +663,7 @@ HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
/* Defines one hugetlb page size */
struct hstate {
struct mutex resize_lock;
+ struct lock_class_key resize_key;
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
@@ -698,11 +681,6 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
-#ifdef CONFIG_CGROUP_HUGETLB
- /* cgroup control files */
- struct cftype cgroup_files_dfl[8];
- struct cftype cgroup_files_legacy[10];
-#endif
char name[HSTATE_NAME_LEN];
};
diff --git a/include/linux/init.h b/include/linux/init.h
index 58cef4c2e59a..ee1309473bc6 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -84,11 +84,15 @@
#define __exit __section(".exit.text") __exitused __cold notrace
-/* Used for MEMORY_HOTPLUG */
-#define __meminit __section(".meminit.text") __cold notrace \
- __latent_entropy
-#define __meminitdata __section(".meminit.data")
-#define __meminitconst __section(".meminit.rodata")
+#ifdef CONFIG_MEMORY_HOTPLUG
+#define __meminit
+#define __meminitdata
+#define __meminitconst
+#else
+#define __meminit __init
+#define __meminitdata __initdata
+#define __meminitconst __initconst
+#endif
/* For assembly routines */
#define __HEAD .section ".head.text","ax"
@@ -99,10 +103,6 @@
#define __INITRODATA .section ".init.rodata","a",%progbits
#define __FINITDATA .previous
-#define __MEMINIT .section ".meminit.text", "ax"
-#define __MEMINITDATA .section ".meminit.data", "aw"
-#define __MEMINITRODATA .section ".meminit.rodata", "a"
-
/* silence warnings when references are OK */
#define __REF .section ".ref.text", "ax"
#define __REFDATA .section ".ref.data", "aw"
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 3a36e64119c8..3f30c88e0b4c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -6,13 +6,13 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/cleanup.h>
-#include <linux/cpumask.h>
#include <linux/irqreturn.h>
#include <linux/irqnr.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/hrtimer.h>
#include <linux/kref.h>
+#include <linux/cpumask_types.h>
#include <linux/workqueue.h>
#include <linux/jump_label.h>
@@ -169,7 +169,7 @@ static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
const char *name, void *dev)
{
- return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+ return request_threaded_irq(irq, handler, NULL, flags | IRQF_COND_ONESHOT, name, dev);
}
extern int __must_check
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index db7fe25f3370..6e9fb667a1c5 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -188,6 +188,42 @@ enum {
#define DEFINE_RES_DMA(_dma) \
DEFINE_RES_DMA_NAMED((_dma), NULL)
+/**
+ * typedef resource_alignf - Resource alignment callback
+ * @data: Private data used by the callback
+ * @res: Resource candidate range (an empty resource space)
+ * @size: The minimum size of the empty space
+ * @align: Alignment from the constraints
+ *
+ * Callback allows calculating resource placement and alignment beyond min,
+ * max, and align fields in the struct resource_constraint.
+ *
+ * Return: Start address for the resource.
+ */
+typedef resource_size_t (*resource_alignf)(void *data,
+ const struct resource *res,
+ resource_size_t size,
+ resource_size_t align);
+
+/**
+ * struct resource_constraint - constraints to be met while searching empty
+ * resource space
+ * @min: The minimum address for the memory range
+ * @max: The maximum address for the memory range
+ * @align: Alignment for the start address of the empty space
+ * @alignf: Additional alignment constraints callback
+ * @alignf_data: Data provided for @alignf callback
+ *
+ * Contains the range and alignment constraints that have to be met during
+ * find_resource_space(). @alignf can be NULL indicating no alignment beyond
+ * @align is necessary.
+ */
+struct resource_constraint {
+ resource_size_t min, max, align;
+ resource_alignf alignf;
+ void *alignf_data;
+};
+
/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
extern struct resource ioport_resource;
extern struct resource iomem_resource;
@@ -207,10 +243,7 @@ extern void arch_remove_reservations(struct resource *avail);
extern int allocate_resource(struct resource *root, struct resource *new,
resource_size_t size, resource_size_t min,
resource_size_t max, resource_size_t align,
- resource_size_t (*alignf)(void *,
- const struct resource *,
- resource_size_t,
- resource_size_t),
+ resource_alignf alignf,
void *alignf_data);
struct resource *lookup_resource(struct resource *root, resource_size_t start);
int adjust_resource(struct resource *res, resource_size_t start,
@@ -264,6 +297,9 @@ static inline bool resource_union(const struct resource *r1, const struct resour
return true;
}
+int find_resource_space(struct resource *root, struct resource *new,
+ resource_size_t size, struct resource_constraint *constraint);
+
/* Convenience shorthand with allocation */
#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0)
#define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index a217e1029c1d..1f5dbf1f92c9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1106,6 +1106,7 @@ enum irq_gc_flags {
* @irq_flags_to_set: IRQ* flags to set on irq setup
* @irq_flags_to_clear: IRQ* flags to clear on irq setup
* @gc_flags: Generic chip specific setup flags
+ * @exit: Function called on each chip when they are destroyed.
* @gc: Array of pointers to generic interrupt chips
*/
struct irq_domain_chip_generic {
@@ -1114,9 +1115,37 @@ struct irq_domain_chip_generic {
unsigned int irq_flags_to_clear;
unsigned int irq_flags_to_set;
enum irq_gc_flags gc_flags;
+ void (*exit)(struct irq_chip_generic *gc);
struct irq_chip_generic *gc[];
};
+/**
+ * struct irq_domain_chip_generic_info - Generic chip information structure
+ * @name: Name of the generic interrupt chip
+ * @handler: Interrupt handler used by the generic interrupt chip
+ * @irqs_per_chip: Number of interrupts each chip handles (max 32)
+ * @num_ct: Number of irq_chip_type instances associated with each
+ * chip
+ * @irq_flags_to_clear: IRQ_* bits to clear in the mapping function
+ * @irq_flags_to_set: IRQ_* bits to set in the mapping function
+ * @gc_flags: Generic chip specific setup flags
+ * @init: Function called on each chip when they are created.
+ * Allow to do some additional chip initialisation.
+ * @exit: Function called on each chip when they are destroyed.
+ * Allow to do some chip cleanup operation.
+ */
+struct irq_domain_chip_generic_info {
+ const char *name;
+ irq_flow_handler_t handler;
+ unsigned int irqs_per_chip;
+ unsigned int num_ct;
+ unsigned int irq_flags_to_clear;
+ unsigned int irq_flags_to_set;
+ enum irq_gc_flags gc_flags;
+ int (*init)(struct irq_chip_generic *gc);
+ void (*exit)(struct irq_chip_generic *gc);
+};
+
/* Generic chip callback functions */
void irq_gc_noop(struct irq_data *d);
void irq_gc_mask_disable_reg(struct irq_data *d);
@@ -1153,6 +1182,20 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+int irq_domain_alloc_generic_chips(struct irq_domain *d,
+ const struct irq_domain_chip_generic_info *info);
+void irq_domain_remove_generic_chips(struct irq_domain *d);
+#else
+static inline int
+irq_domain_alloc_generic_chips(struct irq_domain *d,
+ const struct irq_domain_chip_generic_info *info)
+{
+ return -EINVAL;
+}
+static inline void irq_domain_remove_generic_chips(struct irq_domain *d) { }
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
+
int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
int num_ct, const char *name,
irq_flow_handler_t handler,
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 2c63375bbd43..ecabed6d3307 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -25,6 +25,14 @@ struct its_vm {
irq_hw_number_t db_lpi_base;
unsigned long *db_bitmap;
int nr_db_lpis;
+ /*
+ * Ensures mutual exclusion between updates to vlpi_count[]
+ * and map/unmap when using the ITSList mechanism.
+ *
+ * The lock order for any sequence involving the ITSList is
+ * vmapp_lock -> vpe_lock ->vmovp_lock.
+ */
+ raw_spinlock_t vmapp_lock;
u32 vlpi_count[GICv4_ITS_LIST_MAX];
};
diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h
index 2f6ae7551748..b35ee22c278f 100644
--- a/include/linux/irqchip/irq-partition-percpu.h
+++ b/include/linux/irqchip/irq-partition-percpu.h
@@ -8,7 +8,7 @@
#define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
#include <linux/fwnode.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/irqdomain.h>
struct partition_affinity {
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 21ecf582a0fe..de6105f68fec 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -74,11 +74,24 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
* struct irq_domain_ops - Methods for irq_domain objects
* @match: Match an interrupt controller device node to a host, returns
* 1 on a match
+ * @select: Match an interrupt controller fw specification. It is more generic
+ * than @match as it receives a complete struct irq_fwspec. Therefore,
+ * @select is preferred if provided. Returns 1 on a match.
* @map: Create or update a mapping between a virtual irq number and a hw
* irq number. This is called only once for a given mapping.
* @unmap: Dispose of such a mapping
* @xlate: Given a device tree node and interrupt specifier, decode
* the hardware irq number and linux irq type value.
+ * @alloc: Allocate @nr_irqs interrupts starting from @virq.
+ * @free: Free @nr_irqs interrupts starting from @virq.
+ * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only
+ * reserve the vector. If unset, assign the vector (called from
+ * request_irq()).
+ * @deactivate: Disarm one interrupt (@irqd).
+ * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
+ * linux irq type value (@out_type). This is a generalised @xlate
+ * (over struct irq_fwspec) and is preferred if provided.
+ * @debug_show: For domains to show specific data for an interrupt in debugfs.
*
* Functions below are provided by the driver and called whenever a new mapping
* is created or an old mapping is disposed. The driver can then proceed to
@@ -131,6 +144,9 @@ struct irq_domain_chip_generic;
* Optional elements:
* @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy
* to swap it for the of_node via the irq_domain_get_of_node accessor
+ * @bus_token: @fwnode's device_node might be used for several irq domains. But
+ * in connection with @bus_token, the pair shall be unique in a
+ * system.
* @gc: Pointer to a list of generic chips. There is a helper function for
* setting up one or more generic chips for interrupt controllers
* drivers using the generic chip library which uses this pointer.
@@ -141,9 +157,12 @@ struct irq_domain_chip_generic;
* purposes related to the irq domain.
* @parent: Pointer to parent irq_domain to support hierarchy irq_domains
* @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init
+ * @exit: Function called when the domain is destroyed
*
* Revmap data, used internally by the irq domain code:
- * @revmap_size: Size of the linear map table @revmap[]
+ * @hwirq_max: Top limit for the HW irq number. Especially to avoid
+ * conflicts/failures with reserved HW irqs. Can be ~0.
+ * @revmap_size: Size of the linear map table @revmap
* @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
* @revmap: Linear table of irq_data pointers
*/
@@ -169,6 +188,7 @@ struct irq_domain {
#ifdef CONFIG_GENERIC_MSI_IRQ
const struct msi_parent_ops *msi_parent_ops;
#endif
+ void (*exit)(struct irq_domain *d);
/* reverse map data. The linear map gets appended to the irq_domain */
irq_hw_number_t hwirq_max;
@@ -182,7 +202,7 @@ enum {
/* Irq domain is hierarchical */
IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0),
- /* Irq domain name was allocated in __irq_domain_add() */
+ /* Irq domain name was allocated internally */
IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1),
/* Irq domain is an IPI domain with virq per cpu */
@@ -208,6 +228,9 @@ enum {
/* Irq domain is a MSI device domain */
IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9),
+ /* Irq domain must destroy generic chips when removed */
+ IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10),
+
/*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
@@ -257,10 +280,51 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
}
void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data);
+
+struct irq_domain_chip_generic_info;
+
+/**
+ * struct irq_domain_info - Domain information structure
+ * @fwnode: firmware node for the interrupt controller
+ * @domain_flags: Additional flags to add to the domain flags
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps;
+ * Use ~0 for no limit; 0 for no direct mapping
+ * @bus_token: Domain bus token
+ * @ops: Domain operation callbacks
+ * @host_data: Controller private data pointer
+ * @dgc_info: Geneneric chip information structure pointer used to
+ * create generic chips for the domain if not NULL.
+ * @init: Function called when the domain is created.
+ * Allow to do some additional domain initialisation.
+ * @exit: Function called when the domain is destroyed.
+ * Allow to do some additional cleanup operation.
+ */
+struct irq_domain_info {
+ struct fwnode_handle *fwnode;
+ unsigned int domain_flags;
+ unsigned int size;
+ irq_hw_number_t hwirq_max;
+ int direct_max;
+ enum irq_domain_bus_token bus_token;
+ const struct irq_domain_ops *ops;
+ void *host_data;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ /**
+ * @parent: Pointer to the parent irq domain used in a hierarchy domain
+ */
+ struct irq_domain *parent;
+#endif
+ struct irq_domain_chip_generic_info *dgc_info;
+ int (*init)(struct irq_domain *d);
+ void (*exit)(struct irq_domain *d);
+};
+
+struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
+struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
+ const struct irq_domain_info *info);
+
struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
unsigned int size,
unsigned int first_irq,
@@ -293,7 +357,7 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
extern const struct fwnode_operations irqchip_fwnode_ops;
-static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
+static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
{
return fwnode && fwnode->ops == &irqchip_fwnode_ops;
}
@@ -350,7 +414,17 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
const struct irq_domain_ops *ops,
void *host_data)
{
- return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
+ struct irq_domain_info info = {
+ .fwnode = of_node_to_fwnode(of_node),
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
#ifdef CONFIG_IRQ_DOMAIN_NOMAP
@@ -359,7 +433,17 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
const struct irq_domain_ops *ops,
void *host_data)
{
- return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
+ struct irq_domain_info info = {
+ .fwnode = of_node_to_fwnode(of_node),
+ .hwirq_max = max_irq,
+ .direct_max = max_irq,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
@@ -369,7 +453,16 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node
const struct irq_domain_ops *ops,
void *host_data)
{
- return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data);
+ struct irq_domain_info info = {
+ .fwnode = of_node_to_fwnode(of_node),
+ .hwirq_max = ~0U,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
@@ -377,14 +470,33 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *
const struct irq_domain_ops *ops,
void *host_data)
{
- return __irq_domain_add(fwnode, size, size, 0, ops, host_data);
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
const struct irq_domain_ops *ops,
void *host_data)
{
- return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data);
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .hwirq_max = ~0,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *d;
+
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
extern void irq_domain_remove(struct irq_domain *host);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index b900c642210c..5157d92b6f23 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1595,6 +1595,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
*/
extern struct kmem_cache *jbd2_handle_cache;
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
#define jbd2_alloc_handle(_gfp_flags) \
((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags))
@@ -1609,6 +1614,11 @@ static inline void jbd2_free_handle(handle_t *handle)
*/
extern struct kmem_cache *jbd2_inode_cache;
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
#define jbd2_alloc_inode(_gfp_flags) \
((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags))
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index ab7f8c152b89..fa26a2dd3b52 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -31,7 +31,7 @@
/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
#define jhash_mask(n) (jhash_size(n)-1)
-/* __jhash_mix -- mix 3 32-bit values reversibly. */
+/* __jhash_mix - mix 3 32-bit values reversibly. */
#define __jhash_mix(a, b, c) \
{ \
a -= c; a ^= rol32(c, 4); c += b; \
@@ -60,7 +60,7 @@
/* jhash - hash an arbitrary key
* @k: sequence of bytes as key
* @length: the length of the key
- * @initval: the previous hash, or an arbitray value
+ * @initval: the previous hash, or an arbitrary value
*
* The generic version, hashes an arbitrary sequence of bytes.
* No alignment or length assumptions are made about the input key.
@@ -110,7 +110,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
/* jhash2 - hash an array of u32's
* @k: the key which must be an array of u32's
* @length: the number of u32's in the key
- * @initval: the previous hash, or an arbitray value
+ * @initval: the previous hash, or an arbitrary value
*
* Returns the hash value of the key.
*/
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 9c042c6384bb..b97ce2df376f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -5,7 +5,6 @@
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/percpu.h>
-#include <linux/cpumask.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/vtime.h>
diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index e0c23a32cdf0..2b1432cc16d5 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -230,6 +230,67 @@ void kmsan_handle_urb(const struct urb *urb, bool is_out);
*/
void kmsan_unpoison_entry_regs(const struct pt_regs *regs);
+/**
+ * kmsan_get_metadata() - Return a pointer to KMSAN shadow or origins.
+ * @addr: kernel address.
+ * @is_origin: whether to return origins or shadow.
+ *
+ * Return NULL if metadata cannot be found.
+ */
+void *kmsan_get_metadata(void *addr, bool is_origin);
+
+/**
+ * kmsan_enable_current(): Enable KMSAN for the current task.
+ *
+ * Each kmsan_enable_current() current call must be preceded by a
+ * kmsan_disable_current() call. These call pairs may be nested.
+ */
+void kmsan_enable_current(void);
+
+/**
+ * kmsan_disable_current(): Disable KMSAN for the current task.
+ *
+ * Each kmsan_disable_current() current call must be followed by a
+ * kmsan_enable_current() call. These call pairs may be nested.
+ */
+void kmsan_disable_current(void);
+
+/**
+ * memset_no_sanitize_memory(): Fill memory without KMSAN instrumentation.
+ * @s: address of kernel memory to fill.
+ * @c: constant byte to fill the memory with.
+ * @n: number of bytes to fill.
+ *
+ * This is like memset(), but without KMSAN instrumentation.
+ */
+static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
+{
+ return __memset(s, c, n);
+}
+
+extern bool kmsan_enabled;
+extern int panic_on_kmsan;
+
+/*
+ * KMSAN performs a lot of consistency checks that are currently enabled by
+ * default. BUG_ON is normally discouraged in the kernel, unless used for
+ * debugging, but KMSAN itself is a debugging tool, so it makes little sense to
+ * recover if something goes wrong.
+ */
+#define KMSAN_WARN_ON(cond) \
+ ({ \
+ const bool __cond = WARN_ON(cond); \
+ if (unlikely(__cond)) { \
+ WRITE_ONCE(kmsan_enabled, false); \
+ if (panic_on_kmsan) { \
+ /* Can't call panic() here because */ \
+ /* of uaccess checks. */ \
+ BUG(); \
+ } \
+ } \
+ __cond; \
+ })
+
#else
static inline void kmsan_init_shadow(void)
@@ -329,6 +390,21 @@ static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs)
{
}
+static inline void kmsan_enable_current(void)
+{
+}
+
+static inline void kmsan_disable_current(void)
+{
+}
+
+static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
+{
+ return memset(s, c, n);
+}
+
+#define KMSAN_WARN_ON WARN_ON
+
#endif
#endif /* _LINUX_KMSAN_H */
diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h
index 929287981afe..dfc59918b3c0 100644
--- a/include/linux/kmsan_types.h
+++ b/include/linux/kmsan_types.h
@@ -31,7 +31,7 @@ struct kmsan_context_state {
struct kmsan_ctx {
struct kmsan_context_state cstate;
int kmsan_in_runtime;
- bool allow_reporting;
+ unsigned int depth;
};
#endif /* _LINUX_KMSAN_TYPES_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 692c01e41a18..689e8be873a7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -378,8 +378,10 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
+ bool wants_to_run;
bool preempted;
bool ready;
+ bool scheduled_out;
struct kvm_vcpu_arch arch;
struct kvm_vcpu_stat stat;
char stats_id[KVM_STATS_NAME_SIZE];
@@ -1494,8 +1496,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id);
@@ -1955,8 +1955,6 @@ struct _kvm_stats_desc {
HALT_POLL_HIST_COUNT), \
STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
-extern struct dentry *kvm_debugfs_dir;
-
ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
const struct _kvm_stats_desc *desc,
void *stats, size_t size_stats,
@@ -2096,6 +2094,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
+int kvm_init_irq_routing(struct kvm *kvm);
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
@@ -2105,6 +2104,11 @@ void kvm_free_irq_routing(struct kvm *kvm);
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
+static inline int kvm_init_irq_routing(struct kvm *kvm)
+{
+ return 0;
+}
+
#endif
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
@@ -2441,4 +2445,45 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
}
#endif /* CONFIG_KVM_PRIVATE_MEM */
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
+#endif
+
+/**
+ * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
+ *
+ * @kvm: KVM instance
+ * @gfn: starting GFN to be populated
+ * @src: userspace-provided buffer containing data to copy into GFN range
+ * (passed to @post_populate, and incremented on each iteration
+ * if not NULL)
+ * @npages: number of pages to copy from userspace-buffer
+ * @post_populate: callback to issue for each gmem page that backs the GPA
+ * range
+ * @opaque: opaque data to pass to @post_populate callback
+ *
+ * This is primarily intended for cases where a gmem-backed GPA range needs
+ * to be initialized with userspace-provided data prior to being mapped into
+ * the guest as a private page. This should be called with the slots->lock
+ * held so that caller-enforced invariants regarding the expected memory
+ * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
+ *
+ * Returns the number of pages that were populated.
+ */
+typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+ void __user *src, int order, void *opaque);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+ kvm_gmem_populate_cb post_populate, void *opaque);
+
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
+#endif
+
+#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
+ struct kvm_pre_fault_memory *range);
+#endif
+
#endif
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 792b67ceb631..5099a8ccd5f4 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -50,7 +50,7 @@ struct list_lru_node {
struct list_lru {
struct list_lru_node *node;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
struct list_head list;
int shrinker_id;
bool memcg_aware;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 45cac33334c8..fc4d75c6cec3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -316,8 +316,6 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
for (; i != U64_MAX; \
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
-int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask);
-
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
/**
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 030d34e9d117..7e2eb091049a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -69,18 +69,6 @@ struct mem_cgroup_id {
refcount_t ref;
};
-/*
- * Per memcg event counter is incremented at every pagein/pageout. With THP,
- * it will be incremented by the number of pages. This counter is used
- * to trigger some periodic events. This is straightforward and better
- * than using jiffies etc. to handle periodic memcg event.
- */
-enum mem_cgroup_events_target {
- MEM_CGROUP_TARGET_THRESH,
- MEM_CGROUP_TARGET_SOFTLIMIT,
- MEM_CGROUP_NTARGETS,
-};
-
struct memcg_vmstats_percpu;
struct memcg_vmstats;
struct lruvec_stats_percpu;
@@ -96,23 +84,33 @@ struct mem_cgroup_reclaim_iter {
* per-node information in memory controller.
*/
struct mem_cgroup_per_node {
- struct lruvec lruvec;
+ /* Keep the read-only fields at the start */
+ struct mem_cgroup *memcg; /* Back pointer, we cannot */
+ /* use container_of */
struct lruvec_stats_percpu __percpu *lruvec_stats_percpu;
struct lruvec_stats *lruvec_stats;
-
- unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
-
- struct mem_cgroup_reclaim_iter iter;
-
struct shrinker_info __rcu *shrinker_info;
+#ifdef CONFIG_MEMCG_V1
+ /*
+ * Memcg-v1 only stuff in middle as buffer between read mostly fields
+ * and update often fields to avoid false sharing. If v1 stuff is
+ * not present, an explicit padding is needed.
+ */
+
struct rb_node tree_node; /* RB tree node */
unsigned long usage_in_excess;/* Set to the value by which */
/* the soft limit is exceeded*/
bool on_tree;
- struct mem_cgroup *memcg; /* Back pointer, we cannot */
- /* use container_of */
+#else
+ CACHELINE_PADDING(_pad1_);
+#endif
+
+ /* Fields which get updated often at the end. */
+ struct lruvec lruvec;
+ unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
+ struct mem_cgroup_reclaim_iter iter;
};
struct mem_cgroup_threshold {
@@ -194,14 +192,10 @@ struct mem_cgroup {
struct page_counter memsw; /* v1 only */
};
- /* Legacy consumer-oriented counters */
- struct page_counter kmem; /* v1 only */
- struct page_counter tcpmem; /* v1 only */
-
/* Range enforcement for interrupt charges */
struct work_struct high_work;
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+#ifdef CONFIG_ZSWAP
unsigned long zswap_max;
/*
@@ -211,8 +205,6 @@ struct mem_cgroup {
bool zswap_writeback;
#endif
- unsigned long soft_limit;
-
/* vmpressure notifications */
struct vmpressure vmpressure;
@@ -221,13 +213,7 @@ struct mem_cgroup {
*/
bool oom_group;
- /* protected by memcg_oom_lock */
- bool oom_lock;
- int under_oom;
-
- int swappiness;
- /* OOM-Killer disable */
- int oom_kill_disable;
+ int swappiness;
/* memory.events and memory.events.local */
struct cgroup_file events_file;
@@ -236,29 +222,6 @@ struct mem_cgroup {
/* handle for "memory.swap.events" */
struct cgroup_file swap_events_file;
- /* protect arrays of thresholds */
- struct mutex thresholds_lock;
-
- /* thresholds for memory usage. RCU-protected */
- struct mem_cgroup_thresholds thresholds;
-
- /* thresholds for mem+swap usage. RCU-protected */
- struct mem_cgroup_thresholds memsw_thresholds;
-
- /* For oom notifier event fd */
- struct list_head oom_notify;
-
- /*
- * Should we move charges of a task when a task is moved into this
- * mem_cgroup ? And what type of charges should we move ?
- */
- unsigned long move_charge_at_immigrate;
- /* taken only while moving_account > 0 */
- spinlock_t move_lock;
- unsigned long move_lock_flags;
-
- CACHELINE_PADDING(_pad1_);
-
/* memory.stat */
struct memcg_vmstats *vmstats;
@@ -273,11 +236,6 @@ struct mem_cgroup {
*/
unsigned long socket_pressure;
- /* Legacy tcp memory accounting */
- bool tcpmem_active;
- int tcpmem_pressure;
-
-#ifdef CONFIG_MEMCG_KMEM
int kmemcg_id;
/*
* memcg->objcg is wiped out as a part of the objcg repaprenting
@@ -288,15 +246,6 @@ struct mem_cgroup {
struct obj_cgroup *orig_objcg;
/* list of inherited objcgs, protected by objcg_lock */
struct list_head objcg_list;
-#endif
-
- CACHELINE_PADDING(_pad2_);
-
- /*
- * set > 0 if pages under this cgroup are moving to other cgroup.
- */
- atomic_t moving_account;
- struct task_struct *move_lock_task;
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
@@ -306,10 +255,6 @@ struct mem_cgroup {
struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT];
#endif
- /* List of events which userspace want to receive */
- struct list_head event_list;
- spinlock_t event_list_lock;
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split deferred_split_queue;
#endif
@@ -319,6 +264,56 @@ struct mem_cgroup {
struct lru_gen_mm_list mm_list;
#endif
+#ifdef CONFIG_MEMCG_V1
+ /* Legacy consumer-oriented counters */
+ struct page_counter kmem; /* v1 only */
+ struct page_counter tcpmem; /* v1 only */
+
+ unsigned long soft_limit;
+
+ /* protected by memcg_oom_lock */
+ bool oom_lock;
+ int under_oom;
+
+ /* OOM-Killer disable */
+ int oom_kill_disable;
+
+ /* protect arrays of thresholds */
+ struct mutex thresholds_lock;
+
+ /* thresholds for memory usage. RCU-protected */
+ struct mem_cgroup_thresholds thresholds;
+
+ /* thresholds for mem+swap usage. RCU-protected */
+ struct mem_cgroup_thresholds memsw_thresholds;
+
+ /* For oom notifier event fd */
+ struct list_head oom_notify;
+
+ /*
+ * Should we move charges of a task when a task is moved into this
+ * mem_cgroup ? And what type of charges should we move ?
+ */
+ unsigned long move_charge_at_immigrate;
+ /* taken only while moving_account > 0 */
+ spinlock_t move_lock;
+ unsigned long move_lock_flags;
+
+ /* Legacy tcp memory accounting */
+ bool tcpmem_active;
+ int tcpmem_pressure;
+
+ /*
+ * set > 0 if pages under this cgroup are moving to other cgroup.
+ */
+ atomic_t moving_account;
+ struct task_struct *move_lock_task;
+
+ /* List of events which userspace want to receive */
+ struct list_head event_list;
+ spinlock_t event_list_lock;
+#endif /* CONFIG_MEMCG_V1 */
+
struct mem_cgroup_per_node *nodeinfo[];
};
@@ -443,11 +438,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
return __folio_memcg(folio);
}
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
- return folio_memcg(page_folio(page));
-}
-
/**
* folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
* @folio: Pointer to the folio.
@@ -540,7 +530,6 @@ retry:
return memcg;
}
-#ifdef CONFIG_MEMCG_KMEM
/*
* folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
* @folio: Pointer to the folio.
@@ -556,15 +545,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
return folio->memcg_data & MEMCG_DATA_KMEM;
}
-
-#else
-static inline bool folio_memcg_kmem(struct folio *folio)
-{
- return false;
-}
-
-#endif
-
static inline bool PageMemcgKmem(struct page *page)
{
return folio_memcg_kmem(page_folio(page));
@@ -949,51 +929,13 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);
-static inline void mem_cgroup_enter_user_fault(void)
-{
- WARN_ON(current->in_user_fault);
- current->in_user_fault = 1;
-}
-
-static inline void mem_cgroup_exit_user_fault(void)
-{
- WARN_ON(!current->in_user_fault);
- current->in_user_fault = 0;
-}
-
-static inline bool task_in_memcg_oom(struct task_struct *p)
-{
- return p->memcg_in_oom;
-}
-
-bool mem_cgroup_oom_synchronize(bool wait);
struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
struct mem_cgroup *oom_domain);
void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
-void folio_memcg_lock(struct folio *folio);
-void folio_memcg_unlock(struct folio *folio);
-
void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
int val);
-/* try to stablize folio_memcg() for all the pages in a memcg */
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
- rcu_read_lock();
-
- if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
- return true;
-
- rcu_read_unlock();
- return false;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
- rcu_read_unlock();
-}
-
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val)
@@ -1014,7 +956,7 @@ static inline void mod_memcg_page_state(struct page *page,
return;
rcu_read_lock();
- memcg = page_memcg(page);
+ memcg = folio_memcg(page_folio(page));
if (memcg)
mod_memcg_state(memcg, idx, val);
rcu_read_unlock();
@@ -1120,10 +1062,6 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
void split_page_memcg(struct page *head, int old_order, int new_order);
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned);
-
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
@@ -1133,11 +1071,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
return NULL;
}
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
- return NULL;
-}
-
static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1439,48 +1372,10 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
{
}
-static inline void folio_memcg_lock(struct folio *folio)
-{
-}
-
-static inline void folio_memcg_unlock(struct folio *folio)
-{
-}
-
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
- /* to match folio_memcg_rcu() */
- rcu_read_lock();
- return true;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
- rcu_read_unlock();
-}
-
static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
}
-static inline void mem_cgroup_enter_user_fault(void)
-{
-}
-
-static inline void mem_cgroup_exit_user_fault(void)
-{
-}
-
-static inline bool task_in_memcg_oom(struct task_struct *p)
-{
- return false;
-}
-
-static inline bool mem_cgroup_oom_synchronize(bool wait)
-{
- return false;
-}
-
static inline struct mem_cgroup *mem_cgroup_get_oom_group(
struct task_struct *victim, struct mem_cgroup *oom_domain)
{
@@ -1574,14 +1469,6 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
static inline void split_page_memcg(struct page *head, int old_order, int new_order)
{
}
-
-static inline
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned)
-{
- return 0;
-}
#endif /* CONFIG_MEMCG */
/*
@@ -1589,7 +1476,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
* if MEMCG_DATA_OBJEXTS is set.
*/
struct slabobj_ext {
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
struct obj_cgroup *objcg;
#endif
#ifdef CONFIG_MEM_ALLOC_PROFILING
@@ -1636,7 +1523,7 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
}
-/* Test requires a stable page->memcg binding, see page_memcg() */
+/* Test requires a stable folio->memcg binding, see folio_memcg() */
static inline bool folio_matches_lruvec(struct folio *folio,
struct lruvec *lruvec)
{
@@ -1734,8 +1621,10 @@ void mem_cgroup_sk_alloc(struct sock *sk);
void mem_cgroup_sk_free(struct sock *sk);
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
{
+#ifdef CONFIG_MEMCG_V1
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return !!memcg->tcpmem_pressure;
+#endif /* CONFIG_MEMCG_V1 */
do {
if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
return true;
@@ -1762,7 +1651,7 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg,
}
#endif
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
bool mem_cgroup_kmem_disabled(void);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);
@@ -1905,9 +1794,9 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
{
}
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
@@ -1932,4 +1821,100 @@ static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
}
#endif
+
+/* Cgroup v1-related declarations */
+
+#ifdef CONFIG_MEMCG_V1
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned);
+
+bool mem_cgroup_oom_synchronize(bool wait);
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+ return p->memcg_in_oom;
+}
+
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
+
+/* try to stablize folio_memcg() for all the pages in a memcg */
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ rcu_read_lock();
+
+ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
+ return true;
+
+ rcu_read_unlock();
+ return false;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
+static inline void mem_cgroup_enter_user_fault(void)
+{
+ WARN_ON(current->in_user_fault);
+ current->in_user_fault = 1;
+}
+
+static inline void mem_cgroup_exit_user_fault(void)
+{
+ WARN_ON(!current->in_user_fault);
+ current->in_user_fault = 0;
+}
+
+#else /* CONFIG_MEMCG_V1 */
+static inline
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+ gfp_t gfp_mask,
+ unsigned long *total_scanned)
+{
+ return 0;
+}
+
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+ /* to match folio_memcg_rcu() */
+ rcu_read_lock();
+ return true;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+ rcu_read_unlock();
+}
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_oom_synchronize(bool wait)
+{
+ return false;
+}
+
+static inline void mem_cgroup_enter_user_fault(void)
+{
+}
+
+static inline void mem_cgroup_exit_user_fault(void)
+{
+}
+
+#endif /* CONFIG_MEMCG_V1 */
+
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index e7abf6fa4c52..3f2cf339ceaf 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -6,11 +6,16 @@
#ifdef CONFIG_MEMFD_CREATE
extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg);
+struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx);
#else
static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a)
{
return -EINVAL;
}
+static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif
#endif /* __LINUX_MEMFD_H */
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0d70788558f4..0dc0cf2863e2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -38,6 +38,7 @@ struct access_coordinate;
#ifdef CONFIG_NUMA
extern bool numa_demotion_enabled;
extern struct memory_dev_type *default_dram_type;
+extern nodemask_t default_dram_nodes;
struct memory_dev_type *alloc_memory_type(int adistance);
void put_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
@@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
#define numa_demotion_enabled false
#define default_dram_type NULL
+#define default_dram_nodes NODE_MASK_NONE
/*
* CONFIG_NUMA implementation returns non NULL error.
*/
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 7a9ff464608d..ebe876930e78 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -175,8 +175,8 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
-extern void __offline_isolated_pages(unsigned long start_pfn,
- unsigned long end_pfn);
+extern unsigned long __offline_isolated_pages(unsigned long start_pfn,
+ unsigned long end_pfn);
typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 2ce13e8a309b..644be30b69c8 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -63,8 +63,6 @@ extern const char *migrate_reason_names[MR_TYPES];
#ifdef CONFIG_MIGRATION
void putback_movable_pages(struct list_head *l);
-int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
- struct folio *src, enum migrate_mode mode, int extra_count);
int migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode);
int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
@@ -78,7 +76,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
__releases(ptl);
void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
-void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
int folio_migrate_mapping(struct address_space *mapping,
struct folio *newfolio, struct folio *folio, int extra_count);
@@ -142,9 +139,16 @@ const struct movable_operations *page_movable_ops(struct page *page)
}
#ifdef CONFIG_NUMA_BALANCING
+int migrate_misplaced_folio_prepare(struct folio *folio,
+ struct vm_area_struct *vma, int node);
int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
int node);
#else
+static inline int migrate_misplaced_folio_prepare(struct folio *folio,
+ struct vm_area_struct *vma, int node)
+{
+ return -EAGAIN; /* can't migrate now */
+}
static inline int migrate_misplaced_folio(struct folio *folio,
struct vm_area_struct *vma, int node)
{
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index f37cc03f9369..265c4328b36a 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -7,16 +7,11 @@
* on most operations but not ->writepage as the potential stall time
* is too significant
* MIGRATE_SYNC will block when migrating pages
- * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages
- * with the CPU. Instead, page copy happens outside the migratepage()
- * callback and is likely using a DMA engine. See migrate_vma() and HMM
- * (mm/hmm.c) for users of this mode.
*/
enum migrate_mode {
MIGRATE_ASYNC,
MIGRATE_SYNC_LIGHT,
MIGRATE_SYNC,
- MIGRATE_SYNC_NO_COPY,
};
enum migrate_reason {
@@ -29,6 +24,7 @@ enum migrate_reason {
MR_CONTIG_RANGE,
MR_LONGTERM_PIN,
MR_DEMOTION,
+ MR_DAMON,
MR_TYPES
};
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index d52daf45861b..43a7b9dcf15e 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -7,46 +7,89 @@
#include <linux/types.h>
/**
- * struct min_heap - Data structure to hold a min-heap.
- * @data: Start of array holding the heap elements.
+ * Data structure to hold a min-heap.
* @nr: Number of elements currently in the heap.
* @size: Maximum number of elements that can be held in current storage.
+ * @data: Pointer to the start of array holding the heap elements.
+ * @preallocated: Start of the static preallocated array holding the heap elements.
*/
-struct min_heap {
- void *data;
- int nr;
- int size;
-};
+#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
+struct _name { \
+ int nr; \
+ int size; \
+ _type *data; \
+ _type preallocated[_nr]; \
+}
+
+#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0)
+
+typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char;
+
+#define __minheap_cast(_heap) (typeof((_heap)->data[0]) *)
+#define __minheap_obj_size(_heap) sizeof((_heap)->data[0])
/**
* struct min_heap_callbacks - Data/functions to customise the min_heap.
- * @elem_size: The nr of each element in bytes.
* @less: Partial order function for this heap.
* @swp: Swap elements function.
*/
struct min_heap_callbacks {
- int elem_size;
- bool (*less)(const void *lhs, const void *rhs);
- void (*swp)(void *lhs, void *rhs);
+ bool (*less)(const void *lhs, const void *rhs, void *args);
+ void (*swp)(void *lhs, void *rhs, void *args);
};
+/* Initialize a min-heap. */
+static __always_inline
+void __min_heap_init(min_heap_char *heap, void *data, int size)
+{
+ heap->nr = 0;
+ heap->size = size;
+ if (data)
+ heap->data = data;
+ else
+ heap->data = heap->preallocated;
+}
+
+#define min_heap_init(_heap, _data, _size) \
+ __min_heap_init((min_heap_char *)_heap, _data, _size)
+
+/* Get the minimum element from the heap. */
+static __always_inline
+void *__min_heap_peek(struct min_heap_char *heap)
+{
+ return heap->nr ? heap->data : NULL;
+}
+
+#define min_heap_peek(_heap) \
+ (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
+
+/* Check if the heap is full. */
+static __always_inline
+bool __min_heap_full(min_heap_char *heap)
+{
+ return heap->nr == heap->size;
+}
+
+#define min_heap_full(_heap) \
+ __min_heap_full((min_heap_char *)_heap)
+
/* Sift the element at pos down the heap. */
static __always_inline
-void min_heapify(struct min_heap *heap, int pos,
- const struct min_heap_callbacks *func)
+void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
{
void *left, *right;
void *data = heap->data;
- void *root = data + pos * func->elem_size;
+ void *root = data + pos * elem_size;
int i = pos, j;
/* Find the sift-down path all the way to the leaves. */
for (;;) {
if (i * 2 + 2 >= heap->nr)
break;
- left = data + (i * 2 + 1) * func->elem_size;
- right = data + (i * 2 + 2) * func->elem_size;
- i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
+ left = data + (i * 2 + 1) * elem_size;
+ right = data + (i * 2 + 2) * elem_size;
+ i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
}
/* Special case for the last leaf with no sibling. */
@@ -54,83 +97,140 @@ void min_heapify(struct min_heap *heap, int pos,
i = i * 2 + 1;
/* Backtrack to the correct location. */
- while (i != pos && func->less(root, data + i * func->elem_size))
+ while (i != pos && func->less(root, data + i * elem_size, args))
i = (i - 1) / 2;
/* Shift the element into its correct place. */
j = i;
while (i != pos) {
i = (i - 1) / 2;
- func->swp(data + i * func->elem_size, data + j * func->elem_size);
+ func->swp(data + i * elem_size, data + j * elem_size, args);
+ }
+}
+
+#define min_heap_sift_down(_heap, _pos, _func, _args) \
+ __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
+
+/* Sift up ith element from the heap, O(log2(nr)). */
+static __always_inline
+void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
+ const struct min_heap_callbacks *func, void *args)
+{
+ void *data = heap->data;
+ size_t parent;
+
+ while (idx) {
+ parent = (idx - 1) / 2;
+ if (func->less(data + parent * elem_size, data + idx * elem_size, args))
+ break;
+ func->swp(data + parent * elem_size, data + idx * elem_size, args);
+ idx = parent;
}
}
+#define min_heap_sift_up(_heap, _idx, _func, _args) \
+ __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+
/* Floyd's approach to heapification that is O(nr). */
static __always_inline
-void min_heapify_all(struct min_heap *heap,
- const struct min_heap_callbacks *func)
+void __min_heapify_all(min_heap_char *heap, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
{
int i;
for (i = heap->nr / 2 - 1; i >= 0; i--)
- min_heapify(heap, i, func);
+ __min_heap_sift_down(heap, i, elem_size, func, args);
}
+#define min_heapify_all(_heap, _func, _args) \
+ __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+
/* Remove minimum element from the heap, O(log2(nr)). */
static __always_inline
-void min_heap_pop(struct min_heap *heap,
- const struct min_heap_callbacks *func)
+bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
- return;
+ return false;
/* Place last element at the root (position 0) and then sift down. */
heap->nr--;
- memcpy(data, data + (heap->nr * func->elem_size), func->elem_size);
- min_heapify(heap, 0, func);
+ memcpy(data, data + (heap->nr * elem_size), elem_size);
+ __min_heap_sift_down(heap, 0, elem_size, func, args);
+
+ return true;
}
+#define min_heap_pop(_heap, _func, _args) \
+ __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
+
/*
* Remove the minimum element and then push the given element. The
* implementation performs 1 sift (O(log2(nr))) and is therefore more
* efficient than a pop followed by a push that does 2.
*/
static __always_inline
-void min_heap_pop_push(struct min_heap *heap,
- const void *element,
- const struct min_heap_callbacks *func)
+void __min_heap_pop_push(min_heap_char *heap,
+ const void *element, size_t elem_size,
+ const struct min_heap_callbacks *func,
+ void *args)
{
- memcpy(heap->data, element, func->elem_size);
- min_heapify(heap, 0, func);
+ memcpy(heap->data, element, elem_size);
+ __min_heap_sift_down(heap, 0, elem_size, func, args);
}
+#define min_heap_pop_push(_heap, _element, _func, _args) \
+ __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
+
/* Push an element on to the heap, O(log2(nr)). */
static __always_inline
-void min_heap_push(struct min_heap *heap, const void *element,
- const struct min_heap_callbacks *func)
+bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
- void *child, *parent;
int pos;
if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
- return;
+ return false;
/* Place at the end of data. */
pos = heap->nr;
- memcpy(data + (pos * func->elem_size), element, func->elem_size);
+ memcpy(data + (pos * elem_size), element, elem_size);
heap->nr++;
/* Sift child at pos up. */
- for (; pos > 0; pos = (pos - 1) / 2) {
- child = data + (pos * func->elem_size);
- parent = data + ((pos - 1) / 2) * func->elem_size;
- if (func->less(parent, child))
- break;
- func->swp(parent, child);
- }
+ __min_heap_sift_up(heap, elem_size, pos, func, args);
+
+ return true;
}
+#define min_heap_push(_heap, _element, _func, _args) \
+ __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
+
+/* Remove ith element from the heap, O(log2(nr)). */
+static __always_inline
+bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
+ const struct min_heap_callbacks *func, void *args)
+{
+ void *data = heap->data;
+
+ if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
+ return false;
+
+ /* Place last element at the root (position 0) and then sift down. */
+ heap->nr--;
+ if (idx == heap->nr)
+ return true;
+ func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
+ __min_heap_sift_up(heap, elem_size, idx, func, args);
+ __min_heap_sift_down(heap, idx, elem_size, func, args);
+
+ return true;
+}
+
+#define min_heap_del(_heap, _idx, _func, _args) \
+ __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+
#endif /* _LINUX_MIN_HEAP_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ab3d78116043..7d044e737dba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1202,8 +1202,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
/*
* How many times the entire folio is mapped as a single unit (eg by a
* PMD or PUD entry). This is probably not what you want, except for
- * debugging purposes - it does not include PTE-mapped sub-pages; look
- * at folio_mapcount() or page_mapcount() instead.
+ * debugging purposes or implementation of other core folio_*() primitives.
*/
static inline int folio_entire_mapcount(const struct folio *folio)
{
@@ -1211,40 +1210,6 @@ static inline int folio_entire_mapcount(const struct folio *folio)
return atomic_read(&folio->_entire_mapcount) + 1;
}
-/*
- * The atomic page->_mapcount, starts from -1: so that transitions
- * both from it and to it can be tracked, using atomic_inc_and_test
- * and atomic_add_negative(-1).
- */
-static inline void page_mapcount_reset(struct page *page)
-{
- atomic_set(&(page)->_mapcount, -1);
-}
-
-/**
- * page_mapcount() - Number of times this precise page is mapped.
- * @page: The page.
- *
- * The number of times this page is mapped. If this page is part of
- * a large folio, it includes the number of times this page is mapped
- * as part of that folio.
- *
- * Will report 0 for pages which cannot be mapped into userspace, eg
- * slab, page tables and similar.
- */
-static inline int page_mapcount(struct page *page)
-{
- int mapcount = atomic_read(&page->_mapcount) + 1;
-
- /* Handle page_has_type() pages */
- if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
- mapcount = 0;
- if (unlikely(PageCompound(page)))
- mapcount += folio_entire_mapcount(page_folio(page));
-
- return mapcount;
-}
-
static inline int folio_large_mapcount(const struct folio *folio)
{
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
@@ -1326,6 +1291,7 @@ void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
void folio_copy(struct folio *dst, struct folio *src);
+int folio_mc_copy(struct folio *dst, struct folio *src);
unsigned long nr_free_buffer_pages(void);
@@ -1612,17 +1578,19 @@ static inline void put_page(struct page *page)
* issue.
*
* Locking: the lockless algorithm described in folio_try_get_rcu()
- * provides safe operation for get_user_pages(), page_mkclean() and
+ * provides safe operation for get_user_pages(), folio_mkclean() and
* other calls that race to set up page table entries.
*/
#define GUP_PIN_COUNTING_BIAS (1U << 10)
void unpin_user_page(struct page *page);
+void unpin_folio(struct folio *folio);
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty);
void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
bool make_dirty);
void unpin_user_pages(struct page **pages, unsigned long npages);
+void unpin_folios(struct folio **folios, unsigned long nfolios);
static inline bool is_cow_mapping(vm_flags_t flags)
{
@@ -1953,8 +1921,8 @@ static inline struct folio *pfn_folio(unsigned long pfn)
*
* For more information, please see Documentation/core-api/pin_user_pages.rst.
*
- * Return: True, if it is likely that the page has been "dma-pinned".
- * False, if the page is definitely not dma-pinned.
+ * Return: True, if it is likely that the folio has been "dma-pinned".
+ * False, if the folio is definitely not dma-pinned.
*/
static inline bool folio_maybe_dma_pinned(struct folio *folio)
{
@@ -1973,11 +1941,6 @@ static inline bool folio_maybe_dma_pinned(struct folio *folio)
GUP_PIN_COUNTING_BIAS;
}
-static inline bool page_maybe_dma_pinned(struct page *page)
-{
- return folio_maybe_dma_pinned(page_folio(page));
-}
-
/*
* This should most likely only be called during fork() to see whether we
* should break the cow immediately for an anon page on the src mm.
@@ -2295,19 +2258,6 @@ static inline void *folio_address(const struct folio *folio)
return page_address(&folio->page);
}
-extern pgoff_t __page_file_index(struct page *page);
-
-/*
- * Return the pagecache index of the passed page. Regular pagecache pages
- * use ->index whereas swapcache pages use swp_offset(->private)
- */
-static inline pgoff_t page_index(struct page *page)
-{
- if (unlikely(PageSwapCache(page)))
- return __page_file_index(page);
- return page->index;
-}
-
/*
* Return true only if the page has been allocated with
* ALLOC_NO_WATERMARKS and the low watermark was not
@@ -2550,6 +2500,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
+long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
+ struct folio **folios, unsigned int max_folios,
+ pgoff_t *offset);
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
@@ -4038,7 +3991,6 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
-struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
#else
static inline void memory_failure_queue(unsigned long pfn, int flags)
{
@@ -4059,12 +4011,6 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
}
#endif
-#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM)
-void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
- struct vm_area_struct *vma, struct list_head *to_kill,
- unsigned long ksm_addr);
-#endif
-
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
extern void memblk_nr_poison_inc(unsigned long pfn);
extern void memblk_nr_poison_sub(unsigned long pfn, long i);
@@ -4105,10 +4051,10 @@ enum mf_result {
enum mf_action_page_type {
MF_MSG_KERNEL,
MF_MSG_KERNEL_HIGH_ORDER,
- MF_MSG_SLAB,
MF_MSG_DIFFERENT_COMPOUND,
MF_MSG_HUGE,
MF_MSG_FREE_HUGE,
+ MF_MSG_GET_HWPOISON,
MF_MSG_UNMAP_FAILED,
MF_MSG_DIRTY_SWAPCACHE,
MF_MSG_CLEAN_SWAPCACHE,
@@ -4122,13 +4068,12 @@ enum mf_action_page_type {
MF_MSG_BUDDY,
MF_MSG_DAX,
MF_MSG_UNSPLIT_THP,
+ MF_MSG_ALREADY_POISONED,
MF_MSG_UNKNOWN,
};
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
-extern void clear_huge_page(struct page *page,
- unsigned long addr_hint,
- unsigned int pages_per_huge_page);
+void folio_zero_user(struct folio *folio, unsigned long addr_hint);
int copy_user_large_folio(struct folio *dst, struct folio *src,
unsigned long addr_hint,
struct vm_area_struct *vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a199c48bc462..485424979254 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -46,9 +46,7 @@ struct mem_cgroup;
* which is guaranteed to be aligned. If you use the same storage as
* page->mapping, you must restore it to NULL before freeing the page.
*
- * If your page will not be mapped to userspace, you can also use the four
- * bytes in the mapcount union, but you must call page_mapcount_reset()
- * before freeing it.
+ * The mapcount field must not be used for own purposes.
*
* If you want to use the refcount field, it must be used in such a way
* that other CPUs temporarily incrementing and then decrementing the
@@ -152,18 +150,31 @@ struct page {
union { /* This union is 4 bytes in size. */
/*
- * If the page can be mapped to userspace, encodes the number
- * of times this page is referenced by a page table.
+ * For head pages of typed folios, the value stored here
+ * allows for determining what this page is used for. The
+ * tail pages of typed folios will not store a type
+ * (page_type == _mapcount == -1).
+ *
+ * See page-flags.h for a list of page types which are currently
+ * stored here.
+ *
+ * Owners of typed folios may reuse the lower 16 bit of the
+ * head page page_type field after setting the page type,
+ * but must reset these 16 bit to -1 before clearing the
+ * page type.
*/
- atomic_t _mapcount;
+ unsigned int page_type;
/*
- * If the page is neither PageSlab nor mappable to userspace,
- * the value stored here may help determine what this page
- * is used for. See page-flags.h for a list of page types
- * which are currently stored here.
+ * For pages that are part of non-typed folios for which mappings
+ * are tracked via the RMAP, encodes the number of times this page
+ * is directly referenced by a page table.
+ *
+ * Note that the mapcount is always initialized to -1, so that
+ * transitions both from it and to it can be tracked, using
+ * atomic_inc_and_test() and atomic_add_negative(-1).
*/
- unsigned int page_type;
+ atomic_t _mapcount;
};
/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1dc6248feb83..41458892bc8a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,8 @@ enum node_stat_item {
PGDEMOTE_KSWAPD,
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
+ NR_MEMMAP, /* page metadata allocated through buddy allocator */
+ NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
NR_VM_NODE_STAT_ITEMS
};
diff --git a/include/linux/msi.h b/include/linux/msi.h
index dc27cf3903d5..944979763825 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -19,13 +19,9 @@
*/
#include <linux/irqdomain_defs.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/msi_api.h>
-#include <linux/xarray.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
#include <linux/irq.h>
-#include <linux/bits.h>
#include <asm/msi.h>
@@ -81,7 +77,6 @@ extern int pci_msi_ignore_mask;
/* Helper functions */
struct msi_desc;
struct pci_dev;
-struct platform_msi_priv_data;
struct device_attribute;
struct irq_domain;
struct irq_affinity_desc;
@@ -228,22 +223,6 @@ struct msi_dev_domain {
struct irq_domain *domain;
};
-/**
- * msi_device_data - MSI per device data
- * @properties: MSI properties which are interesting to drivers
- * @platform_data: Platform-MSI specific data
- * @mutex: Mutex protecting the MSI descriptor store
- * @__domains: Internal data for per device MSI domains
- * @__iter_idx: Index to search the next entry for iterators
- */
-struct msi_device_data {
- unsigned long properties;
- struct platform_msi_priv_data *platform_data;
- struct mutex mutex;
- struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS];
- unsigned long __iter_idx;
-};
-
int msi_setup_device_data(struct device *dev);
void msi_lock_descs(struct device *dev);
@@ -556,6 +535,8 @@ enum {
MSI_FLAG_USE_DEV_FWNODE = (1 << 7),
/* Set parent->dev into domain->pm_dev on device domain creation */
MSI_FLAG_PARENT_PM_DEV = (1 << 8),
+ /* Support for parent mask/unmask */
+ MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9),
/* Mask for the generic functionality */
MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0),
@@ -639,35 +620,6 @@ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid);
struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
-struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
- struct msi_domain_info *info,
- struct irq_domain *parent);
-
-/* When an MSI domain is used as an intermediate domain */
-int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
- int nvec, msi_alloc_info_t *args);
-int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
- int virq, int nvec, msi_alloc_info_t *args);
-void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
-
-struct irq_domain *
-__platform_msi_create_device_domain(struct device *dev,
- unsigned int nvec,
- bool is_tree,
- irq_write_msi_msg_t write_msi_msg,
- const struct irq_domain_ops *ops,
- void *host_data);
-
-#define platform_msi_create_device_domain(dev, nvec, write, ops, data) \
- __platform_msi_create_device_domain(dev, nvec, false, write, ops, data)
-#define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
- __platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
-
-int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq,
- unsigned int nr_irqs);
-void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
- unsigned int nvec);
-void *platform_msi_get_host_data(struct irq_domain *domain);
/* Per device platform MSI */
int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
irq_write_msi_msg_t write_msi_msg);
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 947410faf9e2..35ca19ae21ae 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -308,32 +308,32 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
{
map_word val = map_read(map, addr);
- if (map_bankwidth_is_1(map)) {
+ if (map_bankwidth_is_1(map))
return val.x[0];
- } else if (map_bankwidth_is_2(map)) {
+ if (map_bankwidth_is_2(map))
return cfi16_to_cpu(map, val.x[0]);
- } else {
- /* No point in a 64-bit byteswap since that would just be
- swapping the responses from different chips, and we are
- only interested in one chip (a representative sample) */
- return cfi32_to_cpu(map, val.x[0]);
- }
+ /*
+ * No point in a 64-bit byteswap since that would just be
+ * swapping the responses from different chips, and we are
+ * only interested in one chip (a representative sample)
+ */
+ return cfi32_to_cpu(map, val.x[0]);
}
static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
{
map_word val = map_read(map, addr);
- if (map_bankwidth_is_1(map)) {
+ if (map_bankwidth_is_1(map))
return val.x[0] & 0xff;
- } else if (map_bankwidth_is_2(map)) {
+ if (map_bankwidth_is_2(map))
return cfi16_to_cpu(map, val.x[0]);
- } else {
- /* No point in a 64-bit byteswap since that would just be
- swapping the responses from different chips, and we are
- only interested in one chip (a representative sample) */
- return cfi32_to_cpu(map, val.x[0]);
- }
+ /*
+ * No point in a 64-bit byteswap since that would just be
+ * swapping the responses from different chips, and we are
+ * only interested in one chip (a representative sample)
+ */
+ return cfi32_to_cpu(map, val.x[0]);
}
void cfi_udelay(int us);
diff --git a/include/linux/node.h b/include/linux/node.h
index dfc004e4bee7..9a881c2208b3 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -16,7 +16,6 @@
#define _LINUX_NODE_H_
#include <linux/device.h>
-#include <linux/cpumask.h>
#include <linux/list.h>
/**
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 89ea1ebd975a..9f6acadfe0c8 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -620,7 +620,7 @@ enum {
*
* Structure used between LLDD and nvmet-fc layer to represent the exchange
* context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
- * memory transfers, and its assocated cqe transfer).
+ * memory transfers, and its associated cqe transfer).
*
* The structure is allocated by the LLDD whenever a FCP CMD IU is received
* from the FC link. The address of the structure is passed to the nvmet-fc
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b9e914e1face..5769fe6e4950 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -30,16 +30,11 @@
* - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
* to read/write these pages might end badly. Don't touch!
* - The zero page(s)
- * - Pages not added to the page allocator when onlining a section because
- * they were excluded via the online_page_callback() or because they are
- * PG_hwpoison.
* - Pages allocated in the context of kexec/kdump (loaded kernel image,
* control pages, vmcoreinfo)
* - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
* not marked PG_reserved (as they might be in use by somebody else who does
* not respect the caching strategy).
- * - Pages part of an offline section (struct pages of offline sections should
- * not be trusted as they will be initialized when first onlined).
* - MCA pages on ia64
* - Pages holding CPU notes for POWER Firmware Assisted Dump
* - Device memory (e.g. PMEM, DAX, HMM)
@@ -616,11 +611,6 @@ PAGEFLAG_FALSE(Uncached, uncached)
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
-#define MAGIC_HWPOISON 0x48575053U /* HWPS */
-extern void SetPageHWPoisonTakenOff(struct page *page);
-extern void ClearPageHWPoisonTakenOff(struct page *page);
-extern bool take_page_off_buddy(struct page *page);
-extern bool put_page_back_buddy(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
@@ -655,27 +645,28 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
#endif
/*
- * On an anonymous page mapped into a user virtual memory area,
- * page->mapping points to its anon_vma, not to a struct address_space;
+ * On an anonymous folio mapped into a user virtual memory area,
+ * folio->mapping points to its anon_vma, not to a struct address_space;
* with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.
*
* On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
* the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON
- * bit; and then page->mapping points, not to an anon_vma, but to a private
+ * bit; and then folio->mapping points, not to an anon_vma, but to a private
* structure which KSM associates with that merged page. See ksm.h.
*
* PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
- * page and then page->mapping points to a struct movable_operations.
+ * page and then folio->mapping points to a struct movable_operations.
*
- * Please note that, confusingly, "page_mapping" refers to the inode
- * address_space which maps the page from disk; whereas "page_mapped"
- * refers to user virtual address space into which the page is mapped.
+ * Please note that, confusingly, "folio_mapping" refers to the inode
+ * address_space which maps the folio from disk; whereas "folio_mapped"
+ * refers to user virtual address space into which the folio is mapped.
*
* For slab pages, since slab reuses the bits in struct page to store its
- * internal states, the page->mapping does not exist as such, nor do these
- * flags below. So in order to avoid testing non-existent bits, please
- * make sure that PageSlab(page) actually evaluates to false before calling
- * the following functions (e.g., PageAnon). See mm/slab.h.
+ * internal states, the folio->mapping does not exist as such, nor do
+ * these flags below. So in order to avoid testing non-existent bits,
+ * please make sure that folio_test_slab(folio) actually evaluates to
+ * false before calling the following functions (e.g., folio_test_anon).
+ * See mm/slab.h.
*/
#define PAGE_MAPPING_ANON 0x1
#define PAGE_MAPPING_MOVABLE 0x2
@@ -945,22 +936,28 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
*/
enum pagetype {
- PG_buddy = 0x00000080,
- PG_offline = 0x00000100,
- PG_table = 0x00000200,
- PG_guard = 0x00000400,
- PG_hugetlb = 0x00000800,
- PG_slab = 0x00001000,
-
- PAGE_TYPE_BASE = 0xf0000000,
- /* Reserve 0x0000007f to catch underflows of _mapcount */
- PAGE_MAPCOUNT_RESERVE = -128,
+ PG_buddy = 0x40000000,
+ PG_offline = 0x20000000,
+ PG_table = 0x10000000,
+ PG_guard = 0x08000000,
+ PG_hugetlb = 0x04000000,
+ PG_slab = 0x02000000,
+ PG_zsmalloc = 0x01000000,
+
+ PAGE_TYPE_BASE = 0x80000000,
+
+ /*
+ * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and
+ * allow owners that set a type to reuse the lower 16 bit for their own
+ * purposes.
+ */
+ PAGE_MAPCOUNT_RESERVE = ~0x0000ffff,
};
#define PageType(page, flag) \
- ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+ ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
#define folio_test_type(folio, flag) \
- ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+ ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
static inline int page_type_has_type(unsigned int page_type)
{
@@ -969,7 +966,7 @@ static inline int page_type_has_type(unsigned int page_type)
static inline int page_has_type(const struct page *page)
{
- return page_type_has_type(page->page_type);
+ return page_type_has_type(READ_ONCE(page->page_type));
}
#define FOLIO_TYPE_OPS(lname, fname) \
@@ -1018,15 +1015,22 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy)
* The content of these pages is effectively stale. Such pages should not
* be touched (read/write/dump/save) except by their owner.
*
+ * When a memory block gets onlined, all pages are initialized with a
+ * refcount of 1 and PageOffline(). generic_online_page() will
+ * take care of clearing PageOffline().
+ *
* If a driver wants to allow to offline unmovable PageOffline() pages without
* putting them back to the buddy, it can do so via the memory notifier by
* decrementing the reference count in MEM_GOING_OFFLINE and incrementing the
* reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline()
- * pages (now with a reference count of zero) are treated like free pages,
- * allowing the containing memory block to get offlined. A driver that
+ * pages (now with a reference count of zero) are treated like free (unmanaged)
+ * pages, allowing the containing memory block to get offlined. A driver that
* relies on this feature is aware that re-onlining the memory block will
- * require to re-set the pages PageOffline() and not giving them to the
- * buddy via online_page_callback_t.
+ * require not giving them to the buddy via generic_online_page().
+ *
+ * Memory offlining code will not adjust the managed page count for any
+ * PageOffline() pages, treating them like they were never exposed to the
+ * buddy using generic_online_page().
*
* There are drivers that mark a page PageOffline() and expect there won't be
* any further access to page content. PFN walkers that read content of random
@@ -1070,6 +1074,8 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
FOLIO_TEST_FLAG_FALSE(hugetlb)
#endif
+PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
+
/**
* PageHuge - Determine if the page belongs to hugetlbfs
* @page: The page to test.
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 8cd858d912c4..904c52f97284 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -81,4 +81,8 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
counter->watermark = page_counter_read(counter);
}
+void page_counter_calculate_protection(struct page_counter *root,
+ struct page_counter *counter,
+ bool recursive_protection);
+
#endif /* _LINUX_PAGE_COUNTER_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a0a026d2d244..483a191bb4df 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -208,7 +208,8 @@ enum mapping_flags {
AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */
AS_STABLE_WRITES, /* must wait for writeback before modifying
folio contents */
- AS_UNMOVABLE, /* The mapping cannot be moved, ever */
+ AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping,
+ including to move the mapping */
};
/**
@@ -309,20 +310,20 @@ static inline void mapping_clear_stable_writes(struct address_space *mapping)
clear_bit(AS_STABLE_WRITES, &mapping->flags);
}
-static inline void mapping_set_unmovable(struct address_space *mapping)
+static inline void mapping_set_inaccessible(struct address_space *mapping)
{
/*
- * It's expected unmovable mappings are also unevictable. Compaction
+ * It's expected inaccessible mappings are also unevictable. Compaction
* migrate scanner (isolate_migratepages_block()) relies on this to
* reduce page locking.
*/
set_bit(AS_UNEVICTABLE, &mapping->flags);
- set_bit(AS_UNMOVABLE, &mapping->flags);
+ set_bit(AS_INACCESSIBLE, &mapping->flags);
}
-static inline bool mapping_unmovable(struct address_space *mapping)
+static inline bool mapping_inaccessible(struct address_space *mapping)
{
- return test_bit(AS_UNMOVABLE, &mapping->flags);
+ return test_bit(AS_INACCESSIBLE, &mapping->flags);
}
static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
@@ -433,7 +434,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
#endif
}
-struct address_space *page_mapping(struct page *);
struct address_space *folio_mapping(struct folio *);
struct address_space *swapcache_mapping(struct folio *);
@@ -799,7 +799,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
-#define swapcache_index(folio) __page_file_index(&(folio)->page)
+extern pgoff_t __folio_swap_cache_index(struct folio *folio);
/**
* folio_index - File index of a folio.
@@ -814,9 +814,9 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
*/
static inline pgoff_t folio_index(struct folio *folio)
{
- if (unlikely(folio_test_swapcache(folio)))
- return swapcache_index(folio);
- return folio->index;
+ if (unlikely(folio_test_swapcache(folio)))
+ return __folio_swap_cache_index(folio);
+ return folio->index;
}
/**
@@ -939,11 +939,6 @@ static inline loff_t page_offset(struct page *page)
return ((loff_t)page->index) << PAGE_SHIFT;
}
-static inline loff_t page_file_offset(struct page *page)
-{
- return ((loff_t)page_index(page)) << PAGE_SHIFT;
-}
-
/**
* folio_pos - Returns the byte position of this folio in its file.
* @folio: The folio.
@@ -953,18 +948,6 @@ static inline loff_t folio_pos(struct folio *folio)
return page_offset(&folio->page);
}
-/**
- * folio_file_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
- *
- * This differs from folio_pos() for folios which belong to a swap file.
- * NFS is the only filesystem today which needs to use folio_file_pos().
- */
-static inline loff_t folio_file_pos(struct folio *folio)
-{
- return page_file_offset(&folio->page);
-}
-
/*
* Get the offset in PAGE_SIZE (even for hugetlb folios).
*/
@@ -1318,8 +1301,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
* @mapping: address_space which holds the pagecache and I/O vectors
* @ra: file_ra_state which holds the readahead state
* @file: Used by the filesystem for authentication.
- * @folio: The folio at @index which triggered the readahead call.
- * @index: Index of first page to be read.
+ * @folio: The folio which triggered the readahead call.
* @req_count: Total number of pages being read by the caller.
*
* page_cache_async_readahead() should be called when a page is used which
@@ -1330,9 +1312,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
static inline
void page_cache_async_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *file,
- struct folio *folio, pgoff_t index, unsigned long req_count)
+ struct folio *folio, unsigned long req_count)
{
- DEFINE_READAHEAD(ractl, file, ra, mapping, index);
+ DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index);
page_cache_async_ra(&ractl, folio, req_count);
}
diff --git a/include/linux/panic.h b/include/linux/panic.h
index 6717b15e798c..3130e0b5116b 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -77,9 +77,10 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
struct taint_flag {
- char c_true; /* character printed when tainted */
- char c_false; /* character printed when not tainted */
- bool module; /* also show as a per-module taint flag */
+ char c_true; /* character printed when tainted */
+ char c_false; /* character printed when not tainted */
+ bool module; /* also show as a per-module taint flag */
+ const char *desc; /* verbose description of the set taint flag */
};
extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
@@ -90,6 +91,7 @@ enum lockdep_ok {
};
extern const char *print_tainted(void);
+extern const char *print_tainted_verbose(void);
extern void add_taint(unsigned flag, enum lockdep_ok);
extern int test_taint(unsigned flag);
extern unsigned long get_taint(void);
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index acc5f96161fe..85bdf2adb760 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -197,6 +197,8 @@ struct pci_epc_features {
#define to_pci_epc(device) container_of((device), struct pci_epc, dev)
+#ifdef CONFIG_PCI_ENDPOINT
+
#define pci_epc_create(dev, ops) \
__pci_epc_create((dev), (ops), THIS_MODULE)
#define devm_pci_epc_create(dev, ops) \
@@ -226,7 +228,8 @@ void pci_epc_linkup(struct pci_epc *epc);
void pci_epc_linkdown(struct pci_epc *epc);
void pci_epc_init_notify(struct pci_epc *epc);
void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf);
-void pci_epc_bme_notify(struct pci_epc *epc);
+void pci_epc_deinit_notify(struct pci_epc *epc);
+void pci_epc_bus_master_enable_notify(struct pci_epc *epc);
void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
enum pci_epc_interface_type type);
int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
@@ -272,4 +275,14 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
phys_addr_t *phys_addr, size_t size);
void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
void __iomem *virt_addr, size_t size);
+
+#else
+static inline void pci_epc_init_notify(struct pci_epc *epc)
+{
+}
+
+static inline void pci_epc_deinit_notify(struct pci_epc *epc)
+{
+}
+#endif /* CONFIG_PCI_ENDPOINT */
#endif /* __LINUX_PCI_EPC_H */
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index adee6a1b35db..0639d4dc8986 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -70,16 +70,18 @@ struct pci_epf_ops {
/**
* struct pci_epc_event_ops - Callbacks for capturing the EPC events
- * @core_init: Callback for the EPC initialization complete event
+ * @epc_init: Callback for the EPC initialization complete event
+ * @epc_deinit: Callback for the EPC deinitialization event
* @link_up: Callback for the EPC link up event
* @link_down: Callback for the EPC link down event
- * @bme: Callback for the EPC BME (Bus Master Enable) event
+ * @bus_master_enable: Callback for the EPC Bus Master Enable event
*/
struct pci_epc_event_ops {
- int (*core_init)(struct pci_epf *epf);
+ int (*epc_init)(struct pci_epf *epf);
+ void (*epc_deinit)(struct pci_epf *epf);
int (*link_up)(struct pci_epf *epf);
int (*link_down)(struct pci_epf *epf);
- int (*bme)(struct pci_epf *epf);
+ int (*bus_master_enable)(struct pci_epf *epf);
};
/**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cafc5ab1cbcb..9e36b6c1810e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -367,10 +367,11 @@ struct pci_dev {
this is D0-D3, D0 being fully
functional, and D3 being off. */
u8 pm_cap; /* PM capability offset */
- unsigned int imm_ready:1; /* Supports Immediate Readiness */
unsigned int pme_support:5; /* Bitmask of states from which PME#
can be generated */
unsigned int pme_poll:1; /* Poll device's PME status bit */
+ unsigned int pinned:1; /* Whether this dev is pinned */
+ unsigned int imm_ready:1; /* Supports Immediate Readiness */
unsigned int d1_support:1; /* Low power state D1 is supported */
unsigned int d2_support:1; /* Low power state D2 is supported */
unsigned int no_d1d2:1; /* D1 and D2 are forbidden */
@@ -1549,10 +1550,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
struct resource *res, resource_size_t size,
resource_size_t align, resource_size_t min,
unsigned long type_mask,
- resource_size_t (*alignf)(void *,
- const struct resource *,
- resource_size_t,
- resource_size_t),
+ resource_alignf alignf,
void *alignf_data);
@@ -2300,6 +2298,8 @@ int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
const char *name);
void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
+void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long len);
extern int pci_pci_problems;
#define PCIPCI_FAIL 1 /* No PCI PCI DMA */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ec3573119923..8efce7414fad 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -475,6 +475,12 @@ do { \
raw_cpu_cmpxchg(pcp, oval, nval); \
})
+#define __this_cpu_try_cmpxchg(pcp, ovalp, nval) \
+({ \
+ __this_cpu_preempt_check("try_cmpxchg"); \
+ raw_cpu_try_cmpxchg(pcp, ovalp, nval); \
+})
+
#define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val))
#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 03053de557cf..4b2047b78b67 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
#include <linux/mmdebug.h>
#include <linux/preempt.h>
#include <linux/smp.h>
-#include <linux/cpumask.h>
#include <linux/pfn.h>
#include <linux/init.h>
#include <linux/cleanup.h>
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 9cacadbd61f8..18cd0c0c73d9 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -15,7 +15,7 @@ extern struct page_ext_operations page_alloc_tagging_ops;
static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext)
{
- return (void *)page_ext + page_alloc_tagging_ops.offset;
+ return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
}
static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref)
@@ -71,6 +71,7 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
{
int i;
+ struct page_ext *first_page_ext;
struct page_ext *page_ext;
union codetag_ref *ref;
struct alloc_tag *tag;
@@ -78,7 +79,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
if (!mem_alloc_profiling_enabled())
return;
- page_ext = page_ext_get(page);
+ first_page_ext = page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
@@ -94,7 +95,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
page_ext = page_ext_next(page_ext);
}
out:
- page_ext_put(page_ext);
+ page_ext_put(first_page_ext);
}
static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 18019f037bae..2a6a3cccfc36 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -729,13 +729,18 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
* fault. This function updates TLB only, do nothing with cache or others.
* It is the difference with function update_mmu_cache.
*/
-#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
+#ifndef update_mmu_tlb_range
+static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr)
+{
+}
+#endif
+
static inline void update_mmu_tlb(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
+ update_mmu_tlb_range(vma, address, ptep, 1);
}
-#define __HAVE_ARCH_UPDATE_MMU_TLB
-#endif
/*
* Some architectures may be able to avoid expensive synchronization
@@ -1084,6 +1089,15 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
})
#ifndef __HAVE_ARCH_DO_SWAP_PAGE
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t pte, pte_t oldpte,
+ int nr)
+{
+
+}
+#else
/*
* Some architectures support metadata associated with a page. When a
* page is being swapped out, this metadata must be saved so it can be
@@ -1092,12 +1106,17 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
* page as metadata for the page. arch_do_swap_page() can restore this
* metadata when a page is swapped back in.
*/
-static inline void arch_do_swap_page(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long addr,
- pte_t pte, pte_t oldpte)
-{
-
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t pte, pte_t oldpte,
+ int nr)
+{
+ for (int i = 0; i < nr; i++) {
+ arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE,
+ pte_advance_pfn(pte, i),
+ pte_advance_pfn(oldpte, i));
+ }
}
#endif
@@ -1888,9 +1907,12 @@ typedef unsigned int pgtbl_mod_mask;
#ifndef pmd_leaf_size
#define pmd_leaf_size(x) PMD_SIZE
#endif
+#ifndef __pte_leaf_size
#ifndef pte_leaf_size
#define pte_leaf_size(x) PAGE_SIZE
#endif
+#define __pte_leaf_size(x,y) pte_leaf_size(y)
+#endif
/*
* We always define pmd_pfn for all archs as it's used in lots of generic
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 015751b64746..858c8e7851fb 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -16,7 +16,7 @@
#include <linux/of.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/time64.h>
/*
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 9c1a035af97c..331a9a996fa8 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -49,12 +49,6 @@
/********** arch/$ARCH/mm/init.c **********/
#define POISON_FREE_INITMEM 0xcc
-/********** arch/ia64/hp/common/sba_iommu.c **********/
-/*
- * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
- * value of "SBAIOMMU POISON\0" for spill-over poisoning.
- */
-
/********** fs/jbd/journal.c **********/
#define JBD_POISON_FREE 0x5b
#define JBD2_POISON_FREE 0x5c
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c852cc882501..72dc7e45c90c 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -309,18 +309,11 @@ struct power_supply {
#endif
#ifdef CONFIG_LEDS_TRIGGERS
- struct led_trigger *charging_full_trig;
- char *charging_full_trig_name;
+ struct led_trigger *trig;
struct led_trigger *charging_trig;
- char *charging_trig_name;
struct led_trigger *full_trig;
- char *full_trig_name;
- struct led_trigger *online_trig;
- char *online_trig_name;
struct led_trigger *charging_blink_full_solid_trig;
- char *charging_blink_full_solid_trig_name;
struct led_trigger *charging_orange_full_green_trig;
- char *charging_orange_full_green_trig_name;
#endif
};
@@ -743,7 +736,7 @@ struct power_supply_battery_info {
int overvoltage_limit_uv;
int constant_charge_current_max_ua;
int constant_charge_voltage_max_uv;
- struct power_supply_maintenance_charge_table *maintenance_charge;
+ const struct power_supply_maintenance_charge_table *maintenance_charge;
int maintenance_charge_size;
int alert_low_temp_charge_current_ua;
int alert_low_temp_charge_voltage_uv;
@@ -762,9 +755,9 @@ struct power_supply_battery_info {
int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
struct power_supply_resistance_temp_table *resist_table;
int resist_table_size;
- struct power_supply_vbat_ri_table *vbat2ri_discharging;
+ const struct power_supply_vbat_ri_table *vbat2ri_discharging;
int vbat2ri_discharging_size;
- struct power_supply_vbat_ri_table *vbat2ri_charging;
+ const struct power_supply_vbat_ri_table *vbat2ri_charging;
int vbat2ri_charging_size;
int bti_resistance_ohm;
int bti_resistance_tolerance;
@@ -817,7 +810,7 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table
int table_len, int temp);
extern int power_supply_vbat2ri(struct power_supply_battery_info *info,
int vbat_uv, bool charging);
-extern struct power_supply_maintenance_charge_table *
+extern const struct power_supply_maintenance_charge_table *
power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index);
extern bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info,
int resistance);
@@ -831,7 +824,7 @@ extern int power_supply_set_battery_charged(struct power_supply *psy);
static inline bool
power_supply_supports_maintenance_charging(struct power_supply_battery_info *info)
{
- struct power_supply_maintenance_charge_table *mt;
+ const struct power_supply_maintenance_charge_table *mt;
mt = power_supply_get_maintenance_charging_setting(info, 0);
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 04ae5ebcb637..2fb487f61d12 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -4,7 +4,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/cpumask.h>
#include <linux/cache.h>
#include <asm/errno.h>
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 3705c2044fc0..903ddfea8585 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -658,6 +658,7 @@ struct sev_data_snp_launch_update {
* @id_auth_paddr: system physical address of ID block authentication structure
* @id_block_en: indicates whether ID block is present
* @auth_key_en: indicates whether author key is present in authentication structure
+ * @vcek_disabled: indicates whether use of VCEK is allowed for attestation reports
* @rsvd: reserved
* @host_data: host-supplied data for guest, not interpreted by firmware
*/
@@ -667,7 +668,8 @@ struct sev_data_snp_launch_finish {
u64 id_auth_paddr;
u8 id_block_en:1;
u8 auth_key_en:1;
- u64 rsvd:62;
+ u8 vcek_disabled:1;
+ u64 rsvd:61;
u8 host_data[32];
} __packed;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index be450a3477be..13f6f00aecf9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -29,7 +29,6 @@
#include <linux/lockdep.h>
#include <linux/cleanup.h>
#include <asm/processor.h>
-#include <linux/cpumask.h>
#include <linux/context_tracking_irq.h>
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 7229b9baf20d..0978c64f49d8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -200,6 +200,9 @@ static inline void __folio_rmap_sanity_checks(struct folio *folio,
/* hugetlb folios are handled separately. */
VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
+ /* When (un)mapping zeropages, we should never touch ref+mapcount. */
+ VM_WARN_ON_FOLIO(is_zero_folio(folio), folio);
+
/*
* TODO: we get driver-allocated folios that have nothing to do with
* the rmap using vm_insert_page(); therefore, we cannot assume that
@@ -241,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
void folio_add_anon_rmap_pmd(struct folio *, struct page *,
struct vm_area_struct *, unsigned long address, rmap_t flags);
void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
- unsigned long address);
+ unsigned long address, rmap_t flags);
void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
struct vm_area_struct *);
#define folio_add_file_rmap_pte(folio, page, vma) \
@@ -681,16 +684,6 @@ struct page_vma_mapped_walk {
unsigned int flags;
};
-#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \
- struct page_vma_mapped_walk name = { \
- .pfn = page_to_pfn(_page), \
- .nr_pages = compound_nr(_page), \
- .pgoff = page_to_pgoff(_page), \
- .vma = _vma, \
- .address = _address, \
- .flags = _flags, \
- }
-
#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \
struct page_vma_mapped_walk name = { \
.pfn = folio_pfn(_folio), \
@@ -710,6 +703,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
spin_unlock(pvmw->ptl);
}
+/**
+ * page_vma_mapped_walk_restart - Restart the page table walk.
+ * @pvmw: Pointer to struct page_vma_mapped_walk.
+ *
+ * It restarts the page table walk when changes occur in the page
+ * table, such as splitting a PMD. Ensures that the PTL held during
+ * the previous walk is released and resets the state to allow for
+ * a new walk starting at the current address stored in pvmw->address.
+ */
+static inline void
+page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
+{
+ WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);
+
+ if (likely(pvmw->ptl))
+ spin_unlock(pvmw->ptl);
+ else
+ WARN_ON_ONCE(1);
+
+ pvmw->ptl = NULL;
+ pvmw->pmd = NULL;
+ pvmw->pte = NULL;
+}
+
bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
/*
@@ -730,8 +747,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
-unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
-
/*
* rmap_walk_control: To control rmap traversing for specific needs
*
@@ -787,8 +802,4 @@ static inline int folio_mkclean(struct folio *folio)
}
#endif /* CONFIG_MMU */
-static inline int page_mkclean(struct page *page)
-{
- return folio_mkclean(page_folio(page));
-}
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index d662cf136021..c09cdcc99471 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -36,6 +36,11 @@ struct sbitmap_word {
* @cleared: word holding cleared bits
*/
unsigned long cleared ____cacheline_aligned_in_smp;
+
+ /**
+ * @swap_lock: serializes simultaneous updates of ->word and ->cleared
+ */
+ spinlock_t swap_lock;
} ____cacheline_aligned_in_smp;
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e330ee0205c0..f8d150343d42 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -13,7 +13,7 @@
#include <asm/processor.h>
#include <linux/thread_info.h>
#include <linux/preempt.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/cache.h>
#include <linux/irqflags_types.h>
@@ -942,7 +942,7 @@ struct task_struct {
#ifndef TIF_RESTORE_SIGMASK
unsigned restore_sigmask:1;
#endif
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_MEMCG_V1
unsigned in_user_fault:1;
#endif
#ifdef CONFIG_LRU_GEN
@@ -1458,17 +1458,18 @@ struct task_struct {
unsigned int kcov_softirq;
#endif
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_MEMCG_V1
struct mem_cgroup *memcg_in_oom;
+#endif
+#ifdef CONFIG_MEMCG
/* Number of pages to reclaim on returning to userland: */
unsigned int memcg_nr_pages_over_high;
/* Used by memcontrol for targeted memcg charge: */
struct mem_cgroup *active_memcg;
-#endif
-#ifdef CONFIG_MEMCG_KMEM
+ /* Cache for current->cgroups->memcg->objcg lookups: */
struct obj_cgroup *objcg;
#endif
@@ -1617,7 +1618,7 @@ static inline char task_index_to_char(unsigned int state)
{
static const char state_char[] = "RSDTtXZPI";
- BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
+ BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1));
return state_char[state];
}
@@ -1791,7 +1792,8 @@ static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpuma
}
static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
- if (!cpumask_test_cpu(0, new_mask))
+ /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */
+ if ((*cpumask_bits(new_mask) & 1) == 0)
return -EINVAL;
return 0;
}
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 8bd4fda6e027..2fb266ea69fa 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -7,7 +7,6 @@
#include <linux/string_helpers.h>
#include <linux/bug.h>
#include <linux/mutex.h>
-#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/fs.h>
#include <linux/cred.h>
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 3fb18f7eb73e..1d06b1e5408a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -113,12 +113,21 @@ int shmem_unuse(unsigned int type);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags);
+unsigned long shmem_allowable_huge_orders(struct inode *inode,
+ struct vm_area_struct *vma, pgoff_t index,
+ bool global_huge);
#else
static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags)
{
return false;
}
+static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
+ struct vm_area_struct *vma, pgoff_t index,
+ bool global_huge)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_SHMEM
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c29bdd5596d..29c3ea5b6e93 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3429,6 +3429,10 @@ static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask,
}
#define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__))
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
#define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order)
/**
@@ -3445,6 +3449,10 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask)
}
#define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__))
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
#define dev_alloc_page() dev_alloc_pages(0)
/**
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c9efda9df285..d9b03e0746e7 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -414,6 +414,11 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg);
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
#define sk_psock_init_link() \
((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \
GFP_ATOMIC | __GFP_NOWARN))
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d99afce36098..eb2bf4629157 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -41,7 +41,7 @@ enum _slab_flag_bits {
#ifdef CONFIG_FAILSLAB
_SLAB_FAILSLAB,
#endif
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
_SLAB_ACCOUNT,
#endif
#ifdef CONFIG_KASAN_GENERIC
@@ -171,7 +171,7 @@ enum _slab_flag_bits {
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
#endif
/* Account to memcg */
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
#else
# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
@@ -407,7 +407,7 @@ enum kmalloc_cache_type {
#ifndef CONFIG_ZONE_DMA
KMALLOC_DMA = KMALLOC_NORMAL,
#endif
-#ifndef CONFIG_MEMCG_KMEM
+#ifndef CONFIG_MEMCG
KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
KMALLOC_RANDOM_START = KMALLOC_NORMAL,
@@ -420,7 +420,7 @@ enum kmalloc_cache_type {
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
KMALLOC_CGROUP,
#endif
NR_KMALLOC_TYPES
@@ -436,7 +436,7 @@ extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
#define KMALLOC_NOT_NORMAL_BITS \
(__GFP_RECLAIMABLE | \
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
- (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
+ (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0))
extern unsigned long random_kmalloc_seed;
@@ -464,7 +464,7 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigne
*/
if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA))
return KMALLOC_DMA;
- if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE))
+ if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE))
return KMALLOC_RECLAIM;
else
return KMALLOC_CGROUP;
diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h
index 8c9ca857ccf6..c06d17599ae7 100644
--- a/include/linux/soc/apple/rtkit.h
+++ b/include/linux/soc/apple/rtkit.h
@@ -69,7 +69,7 @@ struct apple_rtkit;
* Initializes the internal state required to handle RTKit. This
* should usually be called within _probe.
*
- * @dev: Pointer to the device node this coprocessor is assocated with
+ * @dev: Pointer to the device node this coprocessor is associated with
* @cookie: opaque cookie passed to all functions defined in rtkit_ops
* @mbox_name: mailbox name used to communicate with the co-processor
* @mbox_idx: mailbox index to be used if mbox_name is NULL
@@ -83,7 +83,7 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie,
* Non-devm version of devm_apple_rtkit_init. Must be freed with
* apple_rtkit_free.
*
- * @dev: Pointer to the device node this coprocessor is assocated with
+ * @dev: Pointer to the device node this coprocessor is associated with
* @cookie: opaque cookie passed to all functions defined in rtkit_ops
* @mbox_name: mailbox name used to communicate with the co-processor
* @mbox_idx: mailbox index to be used if mbox_name is NULL
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6f6cb5fc1242..835bbb2d1f88 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -378,6 +378,20 @@ static inline void smp_mb__after_srcu_read_unlock(void)
/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
}
+/**
+ * smp_mb__after_srcu_read_lock - ensure full ordering after srcu_read_lock
+ *
+ * Converts the preceding srcu_read_lock into a two-way memory barrier.
+ *
+ * Call this after srcu_read_lock, to guarantee that all memory operations
+ * that occur after smp_mb__after_srcu_read_lock will appear to happen after
+ * the preceding srcu_read_lock.
+ */
+static inline void smp_mb__after_srcu_read_lock(void)
+{
+ /* __srcu_read_lock has smp_mb() internally so nothing to do here. */
+}
+
DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
_T->idx = srcu_read_lock(_T->lock),
srcu_read_unlock(_T->lock, _T->idx),
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index ea7a74ea7389..3132262a404d 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -3,7 +3,7 @@
#define _LINUX_STOP_MACHINE
#include <linux/cpu.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/smp.h>
#include <linux/list.h>
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e685e93ba354..ba7ea95d1c57 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -405,10 +405,13 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MIN_SWAPPINESS 0
+#define MAX_SWAPPINESS 200
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options);
+ unsigned int reclaim_options,
+ int *swappiness);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -478,7 +481,7 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
-extern void swap_free(swp_entry_t);
+extern void swap_free_nr(swp_entry_t entry, int nr_pages);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
int swap_type_of(dev_t device, sector_t offset);
@@ -556,7 +559,7 @@ static inline int swapcache_prepare(swp_entry_t swp)
return 0;
}
-static inline void swap_free(swp_entry_t swp)
+static inline void swap_free_nr(swp_entry_t entry, int nr_pages)
{
}
@@ -604,6 +607,11 @@ static inline void free_swap_and_cache(swp_entry_t entry)
free_swap_and_cache_nr(entry, 1);
}
+static inline void swap_free(swp_entry_t entry)
+{
+ swap_free_nr(entry, 1);
+}
+
#ifdef CONFIG_MEMCG
static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
{
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index a5c560a2f8c2..cb468e418ea1 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -334,7 +334,7 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address);
-extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte);
#else /* CONFIG_MIGRATION */
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
@@ -359,7 +359,7 @@ static inline int is_migration_entry(swp_entry_t swp)
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { }
static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
- pte_t *pte) { }
+ unsigned long addr, pte_t *pte) { }
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index 8d8fac1626bd..cdb58d61c152 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -521,6 +521,6 @@ static inline struct switchtec_dev *to_stdev(struct device *dev)
return container_of(dev, struct switchtec_dev, dev);
}
-extern struct class *switchtec_class;
+extern const struct class switchtec_class;
#endif
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 1541454da03e..c2e979f82f8d 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -14,7 +14,7 @@
#include <linux/cache.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/seqlock.h>
#include <linux/lockdep.h>
#include <linux/completion.h>
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..6be396bb4297 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -16,7 +16,6 @@
#include <linux/srcu.h>
#include <linux/errno.h>
#include <linux/types.h>
-#include <linux/cpumask.h>
#include <linux/rcupdate.h>
#include <linux/tracepoint-defs.h>
#include <linux/static_call.h>
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 735eae6e272c..16b0cfa80502 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -624,4 +624,8 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
{
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
+
+void __meminit mod_node_early_perpage_metadata(int nid, long delta);
+void __meminit store_early_perpage_metadata(void);
+
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d9968bfc8eac..4eb8f9563136 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -12,7 +12,7 @@
#include <linux/lockdep.h>
#include <linux/threads.h>
#include <linux/atomic.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
#include <linux/rcupdate.h>
#include <linux/workqueue_types.h>
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 2a85b941db97..6cecb4a4f68b 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -35,7 +35,8 @@ void zswap_swapoff(int type);
void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
void zswap_lruvec_state_init(struct lruvec *lruvec);
void zswap_folio_swapin(struct folio *folio);
-bool is_zswap_enabled(void);
+bool zswap_is_enabled(void);
+bool zswap_never_enabled(void);
#else
struct zswap_lruvec_state {};
@@ -60,11 +61,16 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
static inline void zswap_folio_swapin(struct folio *folio) {}
-static inline bool is_zswap_enabled(void)
+static inline bool zswap_is_enabled(void)
{
return false;
}
+static inline bool zswap_never_enabled(void)
+{
+ return true;
+}
+
#endif
#endif /* _LINUX_ZSWAP_H */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 7c47151d5c72..e5f7ee0864e7 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -356,10 +356,9 @@ TRACE_EVENT(aer_event,
#define MF_PAGE_TYPE \
EM ( MF_MSG_KERNEL, "reserved kernel page" ) \
EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \
- EM ( MF_MSG_SLAB, "kernel slab page" ) \
- EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
EM ( MF_MSG_HUGE, "huge page" ) \
EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
+ EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \
EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \
EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \
EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \
@@ -373,6 +372,7 @@ TRACE_EVENT(aer_event,
EM ( MF_MSG_BUDDY, "free buddy page" ) \
EM ( MF_MSG_DAX, "dax page" ) \
EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
+ EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \
EMe ( MF_MSG_UNKNOWN, "unknown page" )
/*
diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 5ccc0d91b220..b108176deb22 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
// Copyright (c) 2024 Takashi Sakamoto
+#undef TRACE_SYSTEM
#define TRACE_SYSTEM firewire
#if !defined(_FIREWIRE_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ)
@@ -11,7 +12,7 @@
#include <linux/firewire-constants.h>
-#include "../../../drivers/firewire/packet-header-definitions.h"
+// Some macros are defined in 'drivers/firewire/packet-header-definitions.h'.
// The content of TP_printk field is preprocessed, then put to the module binary.
#define ASYNC_HEADER_GET_DESTINATION(header) \
@@ -366,6 +367,544 @@ TRACE_EVENT(bus_reset_handle,
)
);
+// Some macros are defined in 'drivers/firewire/phy-packet-definitions.h'.
+
+// The content of TP_printk field is preprocessed, then put to the module binary.
+
+#define PHY_PACKET_SELF_ID_GET_PHY_ID(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_PHY_ID_MASK) >> SELF_ID_PHY_ID_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_LINK_ACTIVE_MASK) >> SELF_ID_ZERO_LINK_ACTIVE_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_GAP_COUNT(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_GAP_COUNT_MASK) >> SELF_ID_ZERO_GAP_COUNT_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_SCODE(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_SCODE_MASK) >> SELF_ID_ZERO_SCODE_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_CONTENDER(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_CONTENDER_MASK) >> SELF_ID_ZERO_CONTENDER_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_POWER_CLASS(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_POWER_CLASS_MASK) >> SELF_ID_ZERO_POWER_CLASS_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads) \
+ ((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT)
+
+TRACE_EVENT(self_id_sequence,
+ TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation),
+ TP_ARGS(card_index, self_id_sequence, quadlet_count, generation),
+ TP_STRUCT__entry(
+ __field(u8, card_index)
+ __field(u8, generation)
+ __dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count))
+ __dynamic_array(u32, self_id_sequence, quadlet_count)
+ ),
+ TP_fast_assign(
+ __entry->card_index = card_index;
+ __entry->generation = generation;
+ {
+ u8 *port_status = __get_dynamic_array(port_status);
+ unsigned int port_index;
+
+ for (port_index = 0; port_index < __get_dynamic_array_len(port_status); ++port_index) {
+ port_status[port_index] =
+ self_id_sequence_get_port_status(self_id_sequence,
+ quadlet_count, port_index);
+ }
+ }
+ memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence,
+ __get_dynamic_array_len(self_id_sequence));
+ ),
+ TP_printk(
+ "card_index=%u generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s",
+ __entry->card_index,
+ __entry->generation,
+ PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)),
+ PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+ PHY_PACKET_SELF_ID_GET_GAP_COUNT(__get_dynamic_array(self_id_sequence)),
+ PHY_PACKET_SELF_ID_GET_SCODE(__get_dynamic_array(self_id_sequence)),
+ PHY_PACKET_SELF_ID_GET_CONTENDER(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+ PHY_PACKET_SELF_ID_GET_POWER_CLASS(__get_dynamic_array(self_id_sequence)),
+ PHY_PACKET_SELF_ID_GET_INITIATED_RESET(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+ __print_array(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), 1),
+ __print_array(__get_dynamic_array(self_id_sequence),
+ __get_dynamic_array_len(self_id_sequence) / QUADLET_SIZE, QUADLET_SIZE)
+ )
+);
+
+#undef PHY_PACKET_SELF_ID_GET_PHY_ID
+#undef PHY_PACKET_SELF_ID_GET_LINK_ACTIVE
+#undef PHY_PACKET_SELF_ID_GET_GAP_COUNT
+#undef PHY_PACKET_SELF_ID_GET_SCODE
+#undef PHY_PACKET_SELF_ID_GET_CONTENDER
+#undef PHY_PACKET_SELF_ID_GET_POWER_CLASS
+#undef PHY_PACKET_SELF_ID_GET_INITIATED_RESET
+
+TRACE_EVENT_CONDITION(isoc_outbound_allocate,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int scode),
+ TP_ARGS(ctx, channel, scode),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(u8, channel)
+ __field(u8, scode)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->channel = channel;
+ __entry->scode = scode;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u channel=%u scode=%u",
+ __entry->context,
+ __entry->card_index,
+ __entry->channel,
+ __entry->scode
+ )
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_single_allocate,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int header_size),
+ TP_ARGS(ctx, channel, header_size),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(u8, channel)
+ __field(u8, header_size)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->channel = channel;
+ __entry->header_size = header_size;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u channel=%u header_size=%u",
+ __entry->context,
+ __entry->card_index,
+ __entry->channel,
+ __entry->header_size
+ )
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_multiple_allocate,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u",
+ __entry->context,
+ __entry->card_index
+ )
+);
+
+DECLARE_EVENT_CLASS(isoc_destroy_template,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u",
+ __entry->context,
+ __entry->card_index
+ )
+)
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_outbound_destroy,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_single_destroy,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_multiple_destroy,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+TRACE_EVENT(isoc_inbound_multiple_channels,
+ TP_PROTO(const struct fw_iso_context *ctx, u64 channels),
+ TP_ARGS(ctx, channels),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(u64, channels)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->channels = channels;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u channels=0x%016llx",
+ __entry->context,
+ __entry->card_index,
+ __entry->channels
+ )
+);
+
+TRACE_EVENT_CONDITION(isoc_outbound_start,
+ TP_PROTO(const struct fw_iso_context *ctx, int cycle_match),
+ TP_ARGS(ctx, cycle_match),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(bool, cycle_match)
+ __field(u16, cycle)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->cycle_match = cycle_match < 0 ? false : true;
+ __entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x",
+ __entry->context,
+ __entry->card_index,
+ __entry->cycle_match ? "true" : "false",
+ __entry->cycle
+ )
+);
+
+DECLARE_EVENT_CLASS(isoc_inbound_start_template,
+ TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+ TP_ARGS(ctx, cycle_match, sync, tags),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(bool, cycle_match)
+ __field(u16, cycle)
+ __field(u8, sync)
+ __field(u8, tags)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->cycle_match = cycle_match < 0 ? false : true;
+ __entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0;
+ __entry->sync = sync;
+ __entry->tags = tags;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x sync=%u tags=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->cycle_match ? "true" : "false",
+ __entry->cycle,
+ __entry->sync,
+ __print_flags(__entry->tags, "|",
+ { FW_ISO_CONTEXT_MATCH_TAG0, "0" },
+ { FW_ISO_CONTEXT_MATCH_TAG1, "1" },
+ { FW_ISO_CONTEXT_MATCH_TAG2, "2" },
+ { FW_ISO_CONTEXT_MATCH_TAG3, "3" }
+ )
+ )
+);
+
+DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_single_start,
+ TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+ TP_ARGS(ctx, cycle_match, sync, tags),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_multiple_start,
+ TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+ TP_ARGS(ctx, cycle_match, sync, tags),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+DECLARE_EVENT_CLASS(isoc_stop_template,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u",
+ __entry->context,
+ __entry->card_index
+ )
+)
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_outbound_stop,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_single_stop,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_multiple_stop,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+DECLARE_EVENT_CLASS(isoc_flush_template,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u",
+ __entry->context,
+ __entry->card_index
+ )
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_outbound_flush,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_single_flush,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_multiple_flush,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+DECLARE_EVENT_CLASS(isoc_flush_completions_template,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u",
+ __entry->context,
+ __entry->card_index
+ )
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_outbound_flush_completions,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_single_flush_completions,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_multiple_flush_completions,
+ TP_PROTO(const struct fw_iso_context *ctx),
+ TP_ARGS(ctx),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+#define TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet) \
+ TP_STRUCT__entry( \
+ __field(u64, context) \
+ __field(u8, card_index) \
+ __field(u32, buffer_offset) \
+ __field(bool, interrupt) \
+ __field(bool, skip) \
+ __field(u8, sy) \
+ __field(u8, tag) \
+ __dynamic_array(u32, header, packet->header_length / QUADLET_SIZE) \
+ )
+
+#define TP_fast_assign_iso_packet(ctx, buffer_offset, packet) \
+ TP_fast_assign( \
+ __entry->context = (uintptr_t)ctx; \
+ __entry->card_index = ctx->card->index; \
+ __entry->buffer_offset = buffer_offset; \
+ __entry->interrupt = packet->interrupt; \
+ __entry->skip = packet->skip; \
+ __entry->sy = packet->sy; \
+ __entry->tag = packet->tag; \
+ memcpy(__get_dynamic_array(header), packet->header, \
+ __get_dynamic_array_len(header)); \
+ )
+
+TRACE_EVENT_CONDITION(isoc_outbound_queue,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+ TP_ARGS(ctx, buffer_offset, packet),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+ TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+ TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+ TP_printk(
+ "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s sy=%d tag=%u header=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->buffer_offset,
+ __entry->interrupt ? "true" : "false",
+ __entry->skip ? "true" : "false",
+ __entry->sy,
+ __entry->tag,
+ __print_array(__get_dynamic_array(header),
+ __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE)
+ )
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_single_queue,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+ TP_ARGS(ctx, buffer_offset, packet),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE),
+ TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+ TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+ TP_printk(
+ "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->buffer_offset,
+ __entry->interrupt ? "true" : "false",
+ __entry->skip ? "true" : "false"
+ )
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+ TP_ARGS(ctx, buffer_offset, packet),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL),
+ TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+ TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+ TP_printk(
+ "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->buffer_offset,
+ __entry->interrupt ? "true" : "false"
+ )
+);
+
+#undef TP_STRUCT__entry_iso_packet
+#undef TP_fast_assign_iso_packet
+
+#ifndef show_cause
+enum fw_iso_context_completions_cause {
+ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0,
+ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ,
+ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW,
+};
+#define show_cause(cause) \
+ __print_symbolic(cause, \
+ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" }, \
+ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" }, \
+ { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" } \
+ )
+#endif
+
+DECLARE_EVENT_CLASS(isoc_single_completions_template,
+ TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+ TP_ARGS(ctx, timestamp, cause, header, header_length),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(u16, timestamp)
+ __field(u8, cause)
+ __dynamic_array(u32, header, header_length / QUADLET_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->timestamp = timestamp;
+ __entry->cause = cause;
+ memcpy(__get_dynamic_array(header), header, __get_dynamic_array_len(header));
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u timestamp=0x%04x cause=%s header=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->timestamp,
+ show_cause(__entry->cause),
+ __print_array(__get_dynamic_array(header),
+ __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE)
+ )
+)
+
+DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_outbound_completions,
+ TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+ TP_ARGS(ctx, timestamp, cause, header, header_length),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_inbound_single_completions,
+ TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+ TP_ARGS(ctx, timestamp, cause, header, header_length),
+ TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+TRACE_EVENT(isoc_inbound_multiple_completions,
+ TP_PROTO(const struct fw_iso_context *ctx, unsigned int completed, enum fw_iso_context_completions_cause cause),
+ TP_ARGS(ctx, completed, cause),
+ TP_STRUCT__entry(
+ __field(u64, context)
+ __field(u8, card_index)
+ __field(u16, completed)
+ __field(u8, cause)
+ ),
+ TP_fast_assign(
+ __entry->context = (uintptr_t)ctx;
+ __entry->card_index = ctx->card->index;
+ __entry->completed = completed;
+ __entry->cause = cause;
+ ),
+ TP_printk(
+ "context=0x%llx card_index=%u completed=%u cause=%s",
+ __entry->context,
+ __entry->card_index,
+ __entry->completed,
+ show_cause(__entry->cause)
+ )
+);
+
#undef QUADLET_SIZE
#endif // _FIREWIRE_TRACE_EVENT_H
diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h
new file mode 100644
index 000000000000..4f9a7f2577f3
--- /dev/null
+++ b/include/trace/events/firewire_ohci.h
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Takashi Sakamoto
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM firewire_ohci
+
+#if !defined(_FIREWIRE_OHCI_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _FIREWIRE_OHCI_TRACE_EVENT_H
+
+#include <linux/tracepoint.h>
+
+// Some macros and helper functions are defined in 'drivers/firewire/ohci.c'.
+
+TRACE_EVENT(irqs,
+ TP_PROTO(unsigned int card_index, u32 events),
+ TP_ARGS(card_index, events),
+ TP_STRUCT__entry(
+ __field(u8, card_index)
+ __field(u32, events)
+ ),
+ TP_fast_assign(
+ __entry->card_index = card_index;
+ __entry->events = events;
+ ),
+ TP_printk(
+ "card_index=%u events=%s",
+ __entry->card_index,
+ __print_flags(__entry->events, "|",
+ { OHCI1394_selfIDComplete, "selfIDComplete" },
+ { OHCI1394_RQPkt, "RQPkt" },
+ { OHCI1394_RSPkt, "RSPkt" },
+ { OHCI1394_reqTxComplete, "reqTxComplete" },
+ { OHCI1394_respTxComplete, "respTxComplete" },
+ { OHCI1394_isochRx, "isochRx" },
+ { OHCI1394_isochTx, "isochTx" },
+ { OHCI1394_postedWriteErr, "postedWriteErr" },
+ { OHCI1394_cycleTooLong, "cycleTooLong" },
+ { OHCI1394_cycle64Seconds, "cycle64Seconds" },
+ { OHCI1394_cycleInconsistent, "cycleInconsistent" },
+ { OHCI1394_regAccessFail, "regAccessFail" },
+ { OHCI1394_unrecoverableError, "unrecoverableError" },
+ { OHCI1394_busReset, "busReset" }
+ )
+ )
+);
+
+#define QUADLET_SIZE 4
+
+#define SELF_ID_COUNT_IS_ERROR(reg) \
+ (!!(((reg) & OHCI1394_SelfIDCount_selfIDError_MASK) >> OHCI1394_SelfIDCount_selfIDError_SHIFT))
+
+#define SELF_ID_COUNT_GET_GENERATION(reg) \
+ (((reg) & OHCI1394_SelfIDCount_selfIDGeneration_MASK) >> OHCI1394_SelfIDCount_selfIDGeneration_SHIFT)
+
+#define SELF_ID_RECEIVE_Q0_GET_GENERATION(quadlet) \
+ (((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_SHIFT)
+
+#define SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(quadlet) \
+ (((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_SHIFT)
+
+TRACE_EVENT(self_id_complete,
+ TP_PROTO(unsigned int card_index, u32 reg, const __le32 *self_id_receive, bool has_be_header_quirk),
+ TP_ARGS(card_index, reg, self_id_receive, has_be_header_quirk),
+ TP_STRUCT__entry(
+ __field(u8, card_index)
+ __field(u32, reg)
+ __dynamic_array(u32, self_id_receive, ohci1394_self_id_count_get_size(reg))
+ ),
+ TP_fast_assign(
+ __entry->card_index = card_index;
+ __entry->reg = reg;
+ {
+ u32 *ptr = __get_dynamic_array(self_id_receive);
+ int i;
+
+ for (i = 0; i < __get_dynamic_array_len(self_id_receive) / QUADLET_SIZE; ++i)
+ ptr[i] = cond_le32_to_cpu(self_id_receive[i], has_be_header_quirk);
+ }
+ ),
+ TP_printk(
+ "card_index=%u is_error=%s generation_at_bus_reset=%u generation_at_completion=%u timestamp=0x%04x packet_data=%s",
+ __entry->card_index,
+ SELF_ID_COUNT_IS_ERROR(__entry->reg) ? "true" : "false",
+ SELF_ID_COUNT_GET_GENERATION(__entry->reg),
+ SELF_ID_RECEIVE_Q0_GET_GENERATION(((const u32 *)__get_dynamic_array(self_id_receive))[0]),
+ SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(((const u32 *)__get_dynamic_array(self_id_receive))[0]),
+ __print_array(((const u32 *)__get_dynamic_array(self_id_receive)) + 1,
+ (__get_dynamic_array_len(self_id_receive) / QUADLET_SIZE) - 1, QUADLET_SIZE)
+ )
+);
+
+#undef SELF_ID_COUNT_IS_ERROR
+#undef SELF_ID_COUNT_GET_GENERATION
+#undef SELF_ID_RECEIVE_Q0_GET_GENERATION
+#undef SELF_ID_RECEIVE_Q0_GET_TIMESTAMP
+
+#undef QUADLET_SIZE
+
+#endif // _FIREWIRE_OHCI_TRACE_EVENT_H
+
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 8a829e0f6e55..b37eb0a7060f 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -36,7 +36,7 @@ TRACE_EVENT(kmem_cache_alloc,
__entry->bytes_alloc = s->size;
__entry->gfp_flags = (__force unsigned long)gfp_flags;
__entry->node = node;
- __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+ __entry->accounted = IS_ENABLED(CONFIG_MEMCG) ?
((gfp_flags & __GFP_ACCOUNT) ||
(s->flags & SLAB_ACCOUNT)) : false;
),
@@ -87,7 +87,7 @@ TRACE_EVENT(kmalloc,
__entry->bytes_alloc,
show_gfp_flags(__entry->gfp_flags),
__entry->node,
- (IS_ENABLED(CONFIG_MEMCG_KMEM) &&
+ (IS_ENABLED(CONFIG_MEMCG) &&
(__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false")
);
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 0190ef725b43..cd01dd7b3640 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -22,7 +22,8 @@
EM( MR_NUMA_MISPLACED, "numa_misplaced") \
EM( MR_CONTIG_RANGE, "contig_range") \
EM( MR_LONGTERM_PIN, "longterm_pin") \
- EMe(MR_DEMOTION, "demotion")
+ EM( MR_DEMOTION, "demotion") \
+ EMe(MR_DAMON, "damon")
/*
* First define the enums in the above macros to be exported to userspace
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 191a7e88a8ab..753971770733 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -336,8 +336,10 @@ typedef int __bitwise __kernel_rwf_t;
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+#define PROCFS_IOCTL_MAGIC 'f'
+
/* Pagemap ioctl */
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
@@ -396,4 +398,158 @@ struct pm_scan_arg {
__u64 return_mask;
};
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d03842abae57..637efc055145 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -192,11 +192,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -917,6 +930,9 @@ struct kvm_enable_cap {
#define KVM_CAP_MEMORY_ATTRIBUTES 233
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1548,4 +1564,13 @@ struct kvm_create_guest_memfd {
__u64 reserved[6];
};
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+
+struct kvm_pre_fault_memory {
+ __u64 gpa;
+ __u64 size;
+ __u64 flags;
+ __u64 padding[5];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 68625e728f43..2c8dbc74b955 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -12,29 +12,36 @@
#include <linux/types.h>
/**
- * struct landlock_ruleset_attr - Ruleset definition
+ * struct landlock_ruleset_attr - Ruleset definition.
*
- * Argument of sys_landlock_create_ruleset(). This structure can grow in
- * future versions.
+ * Argument of sys_landlock_create_ruleset().
+ *
+ * This structure defines a set of *handled access rights*, a set of actions on
+ * different object types, which should be denied by default when the ruleset is
+ * enacted. Vice versa, access rights that are not specifically listed here are
+ * not going to be denied by this ruleset when it is enacted.
+ *
+ * For historical reasons, the %LANDLOCK_ACCESS_FS_REFER right is always denied
+ * by default, even when its bit is not set in @handled_access_fs. In order to
+ * add new rules with this access right, the bit must still be set explicitly
+ * (cf. `Filesystem flags`_).
+ *
+ * The explicit listing of *handled access rights* is required for backwards
+ * compatibility reasons. In most use cases, processes that use Landlock will
+ * *handle* a wide range or all access rights that they know about at build time
+ * (and that they have tested with a kernel that supported them all).
+ *
+ * This structure can grow in future Landlock versions.
*/
struct landlock_ruleset_attr {
/**
- * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
- * that is handled by this ruleset and should then be forbidden if no
- * rule explicitly allow them: it is a deny-by-default list that should
- * contain as much Landlock access rights as possible. Indeed, all
- * Landlock filesystem access rights that are not part of
- * handled_access_fs are allowed. This is needed for backward
- * compatibility reasons. One exception is the
- * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly
- * handled, but must still be explicitly handled to add new rules with
- * this access right.
+ * @handled_access_fs: Bitmask of handled filesystem actions
+ * (cf. `Filesystem flags`_).
*/
__u64 handled_access_fs;
/**
- * @handled_access_net: Bitmask of actions (cf. `Network flags`_)
- * that is handled by this ruleset and should then be forbidden if no
- * rule explicitly allow them.
+ * @handled_access_net: Bitmask of handled network actions (cf. `Network
+ * flags`_).
*/
__u64 handled_access_net;
};
@@ -97,20 +104,21 @@ struct landlock_path_beneath_attr {
*/
struct landlock_net_port_attr {
/**
- * @allowed_access: Bitmask of allowed access network for a port
+ * @allowed_access: Bitmask of allowed network actions for a port
* (cf. `Network flags`_).
*/
__u64 allowed_access;
/**
* @port: Network port in host endianness.
*
- * It should be noted that port 0 passed to :manpage:`bind(2)` will
- * bind to an available port from a specific port range. This can be
- * configured thanks to the ``/proc/sys/net/ipv4/ip_local_port_range``
- * sysctl (also used for IPv6). A Landlock rule with port 0 and the
- * ``LANDLOCK_ACCESS_NET_BIND_TCP`` right means that requesting to bind
- * on port 0 is allowed and it will automatically translate to binding
- * on the related port range.
+ * It should be noted that port 0 passed to :manpage:`bind(2)` will bind
+ * to an available port from the ephemeral port range. This can be
+ * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl
+ * (also used for IPv6).
+ *
+ * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP``
+ * right means that requesting to bind on port 0 is allowed and it will
+ * automatically translate to binding on the related port range.
*/
__u64 port;
};
@@ -131,10 +139,10 @@ struct landlock_net_port_attr {
* The following access rights apply only to files:
*
* - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file.
- * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. Note that
- * you might additionally need the %LANDLOCK_ACCESS_FS_TRUNCATE right in order
- * to overwrite files with :manpage:`open(2)` using ``O_TRUNC`` or
- * :manpage:`creat(2)`.
+ * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. When
+ * opening files for writing, you will often additionally need the
+ * %LANDLOCK_ACCESS_FS_TRUNCATE right. In many cases, these system calls
+ * truncate existing files when overwriting them (e.g., :manpage:`creat(2)`).
* - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access.
* - %LANDLOCK_ACCESS_FS_TRUNCATE: Truncate a file with :manpage:`truncate(2)`,
* :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with
@@ -256,7 +264,7 @@ struct landlock_net_port_attr {
* These flags enable to restrict a sandboxed process to a set of network
* actions. This is supported since the Landlock ABI version 4.
*
- * TCP sockets with allowed actions:
+ * The following access rights apply to TCP port numbers:
*
* - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port.
* - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index b7a2c2ee35b7..2289b7c76c59 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -31,6 +31,7 @@ enum {
SNP_PLATFORM_STATUS,
SNP_COMMIT,
SNP_SET_CONFIG,
+ SNP_VLEK_LOAD,
SEV_MAX,
};
@@ -215,6 +216,32 @@ struct sev_user_data_snp_config {
} __packed;
/**
+ * struct sev_data_snp_vlek_load - SNP_VLEK_LOAD structure
+ *
+ * @len: length of the command buffer read by the PSP
+ * @vlek_wrapped_version: version of wrapped VLEK hashstick (Must be 0h)
+ * @rsvd: reserved
+ * @vlek_wrapped_address: address of a wrapped VLEK hashstick
+ * (struct sev_user_data_snp_wrapped_vlek_hashstick)
+ */
+struct sev_user_data_snp_vlek_load {
+ __u32 len; /* In */
+ __u8 vlek_wrapped_version; /* In */
+ __u8 rsvd[3]; /* In */
+ __u64 vlek_wrapped_address; /* In */
+} __packed;
+
+/**
+ * struct sev_user_data_snp_vlek_wrapped_vlek_hashstick - Wrapped VLEK data
+ *
+ * @data: Opaque data provided by AMD KDS (as described in SEV-SNP Firmware ABI
+ * 1.54, SNP_VLEK_LOAD)
+ */
+struct sev_user_data_snp_wrapped_vlek_hashstick {
+ __u8 data[432]; /* In */
+} __packed;
+
+/**
* struct sev_issue_cmd - SEV ioctl parameters
*
* @cmd: SEV commands to execute
diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
index 154a87a1eca9..fcdfea767fca 100644
--- a/include/uapi/linux/sev-guest.h
+++ b/include/uapi/linux/sev-guest.h
@@ -89,6 +89,9 @@ struct snp_ext_report_req {
#define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0)
#define SNP_GUEST_VMM_ERR_SHIFT 32
#define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT)
+#define SNP_GUEST_FW_ERR(x) ((x) & SNP_GUEST_FW_ERR_MASK)
+#define SNP_GUEST_ERR(vmm_err, fw_err) (SNP_GUEST_VMM_ERR(vmm_err) | \
+ SNP_GUEST_FW_ERR(fw_err))
#define SNP_GUEST_VMM_ERR_INVALID_LEN 1
#define SNP_GUEST_VMM_ERR_BUSY 2