aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/core-api/printk-formats.rst11
-rw-r--r--arch/alpha/include/asm/pgtable.h8
-rw-r--r--arch/arc/mm/init.c5
-rw-r--r--arch/arm/include/asm/pgtable-3level.h2
-rw-r--r--arch/arm/kernel/setup.c5
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/cacheflush.h6
-rw-r--r--arch/arm64/include/asm/kfence.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h8
-rw-r--r--arch/arm64/include/asm/set_memory.h17
-rw-r--r--arch/arm64/include/uapi/asm/unistd.h1
-rw-r--r--arch/arm64/kernel/machine_kexec.c1
-rw-r--r--arch/arm64/kernel/setup.c5
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/mm/mmu.c7
-rw-r--r--arch/arm64/mm/pageattr.c13
-rw-r--r--arch/csky/kernel/setup.c5
-rw-r--r--arch/h8300/kernel/setup.c5
-rw-r--r--arch/hexagon/Kconfig1
-rw-r--r--arch/hexagon/kernel/vmlinux.lds.S9
-rw-r--r--arch/ia64/include/asm/pgtable.h4
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h2
-rw-r--r--arch/m68k/kernel/setup_mm.c5
-rw-r--r--arch/m68k/kernel/setup_no.c5
-rw-r--r--arch/mips/include/asm/pgtable-64.h8
-rw-r--r--arch/nds32/kernel/setup.c5
-rw-r--r--arch/nios2/kernel/setup.c5
-rw-r--r--arch/openrisc/kernel/setup.c5
-rw-r--r--arch/parisc/include/asm/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable-4k.h6
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h6
-rw-r--r--arch/powerpc/include/asm/tlb.h6
-rw-r--r--arch/powerpc/kernel/setup-common.c5
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c8
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c6
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c44
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/include/asm/pgtable-64.h4
-rw-r--r--arch/riscv/include/asm/unistd.h1
-rw-r--r--arch/riscv/kernel/setup.c5
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/sh/include/asm/pgtable-3level.h4
-rw-r--r--arch/sh/kernel/setup.c5
-rw-r--r--arch/sparc/include/asm/pgtable_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h10
-rw-r--r--arch/um/include/asm/pgtable-3level.h2
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c4
-rw-r--r--arch/x86/mm/pgtable.c2
-rw-r--r--include/asm-generic/pgtable-nop4d.h2
-rw-r--r--include/asm-generic/pgtable-nopmd.h2
-rw-r--r--include/asm-generic/pgtable-nopud.h2
-rw-r--r--include/linux/bootconfig.h4
-rw-r--r--include/linux/buildid.h8
-rw-r--r--include/linux/compaction.h4
-rw-r--r--include/linux/cpumask.h2
-rw-r--r--include/linux/crash_core.h12
-rw-r--r--include/linux/debugobjects.h2
-rw-r--r--include/linux/hmm.h2
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/kallsyms.h21
-rw-r--r--include/linux/list_lru.h4
-rw-r--r--include/linux/lru_cache.h8
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mmu_notifier.h8
-rw-r--r--include/linux/module.h9
-rw-r--r--include/linux/nodemask.h6
-rw-r--r--include/linux/percpu-defs.h2
-rw-r--r--include/linux/percpu-refcount.h2
-rw-r--r--include/linux/pgtable.h4
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/secretmem.h54
-rw-r--r--include/linux/set_memory.h12
-rw-r--r--include/linux/shrinker.h2
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/uapi/asm-generic/unistd.h7
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--init/Kconfig1
-rw-r--r--init/main.c2
-rw-r--r--kernel/crash_core.c50
-rw-r--r--kernel/kallsyms.c104
-rw-r--r--kernel/module.c42
-rw-r--r--kernel/power/hibernate.c5
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--lib/Kconfig.debug17
-rw-r--r--lib/asn1_encoder.c2
-rw-r--r--lib/buildid.c74
-rw-r--r--lib/devres.c2
-rw-r--r--lib/dump_stack.c13
-rw-r--r--lib/dynamic_debug.c2
-rw-r--r--lib/fonts/font_pearl_8x8.c2
-rw-r--r--lib/kfifo.c2
-rw-r--r--lib/list_sort.c2
-rw-r--r--lib/nlattr.c4
-rw-r--r--lib/oid_registry.c2
-rw-r--r--lib/pldmfw/pldmfw.c2
-rw-r--r--lib/reed_solomon/test_rslib.c2
-rw-r--r--lib/refcount.c2
-rw-r--r--lib/rhashtable.c2
-rw-r--r--lib/sbitmap.c2
-rw-r--r--lib/scatterlist.c4
-rw-r--r--lib/seq_buf.c2
-rw-r--r--lib/sort.c2
-rw-r--r--lib/stackdepot.c2
-rw-r--r--lib/test_bitops.c2
-rw-r--r--lib/test_bpf.c2
-rw-r--r--lib/test_kasan.c2
-rw-r--r--lib/test_kmod.c6
-rw-r--r--lib/test_scanf.c2
-rw-r--r--lib/vsprintf.c10
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/Makefile1
-rw-r--r--mm/gup.c12
-rw-r--r--mm/init-mm.c9
-rw-r--r--mm/internal.h3
-rw-r--r--mm/mlock.c3
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/mremap.c108
-rw-r--r--mm/secretmem.c254
-rw-r--r--mm/slub.c79
-rwxr-xr-xscripts/checksyscalls.sh4
-rwxr-xr-xscripts/decode_stacktrace.sh89
-rw-r--r--tools/testing/selftests/vm/.gitignore1
-rw-r--r--tools/testing/selftests/vm/Makefile3
-rw-r--r--tools/testing/selftests/vm/memfd_secret.c296
-rw-r--r--tools/testing/selftests/vm/mremap_test.c118
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh17
137 files changed, 1466 insertions, 438 deletions
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 4346ae17a72c..d941717a191b 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -125,6 +125,17 @@ used when printing stack backtraces. The specifier takes into
consideration the effect of compiler optimisations which may occur
when tail-calls are used and marked with the noreturn GCC attribute.
+If the pointer is within a module, the module name and optionally build ID is
+printed after the symbol name with an extra ``b`` appended to the end of the
+specifier.
+
+::
+ %pS versatile_init+0x0/0x110 [module_name]
+ %pSb versatile_init+0x0/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+ %pSRb versatile_init+0x9/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+ (with __builtin_extract_return_addr() translation)
+ %pBb prev_fn_of_versatile_init+0x88/0x88 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+
Probed Pointers from BPF / tracing
----------------------------------
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index ff690846465e..02f0429f1068 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -236,8 +236,10 @@ pmd_page_vaddr(pmd_t pmd)
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> 32))
#define pud_page(pud) (pfn_to_page(pud_val(pud) >> 32))
-extern inline unsigned long pud_page_vaddr(pud_t pgd)
-{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); }
+extern inline pmd_t *pud_pgtable(pud_t pgd)
+{
+ return (pmd_t *)(PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)));
+}
extern inline int pte_none(pte_t pte) { return !pte_val(pte); }
extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; }
@@ -287,7 +289,7 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu
/* Find an entry in the second-level page table.. */
extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address)
{
- pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
+ pmd_t *ret = pud_pgtable(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1));
smp_rmb(); /* see above */
return ret;
}
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index abfeef7bf6f8..c083bf660cec 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -89,10 +89,7 @@ void __init setup_arch_memory(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
- init_mm.start_code = (unsigned long)_text;
- init_mm.end_code = (unsigned long)_etext;
- init_mm.end_data = (unsigned long)_edata;
- init_mm.brk = (unsigned long)_end;
+ setup_initial_init_mm(_text, _etext, _edata, _end);
/* first page of system - kernel .vector starts here */
min_low_pfn = virt_to_pfn(CONFIG_LINUX_RAM_BASE);
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index d4edab51a77c..eabe72ff7381 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -130,7 +130,7 @@
flush_pmd_entry(pudp); \
} while (0)
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7693a1f05ecc..f97eb2371672 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1130,10 +1130,7 @@ void __init setup_arch(char **cmdline_p)
if (mdesc->reboot_mode != REBOOT_HARD)
reboot_mode = mdesc->reboot_mode;
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_text, _etext, _edata, _end);
/* populate cmd_line too for later use, preserving boot_command_line */
strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 26889dbfe904..64202010b700 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -3,7 +3,6 @@ generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += set_memory.h
generic-y += user.h
generated-y += cpucaps.h
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 543c997eb3b7..5a228e203ef9 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -144,12 +144,6 @@ static __always_inline void icache_inval_all_pou(void)
dsb(ish);
}
-int set_memory_valid(unsigned long addr, int numpages, int enable);
-
-int set_direct_map_invalid_noflush(struct page *page);
-int set_direct_map_default_noflush(struct page *page);
-bool kernel_page_present(struct page *page);
-
#include <asm-generic/cacheflush.h>
#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index d061176d57ea..aa855c6a0ae6 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -8,7 +8,7 @@
#ifndef __ASM_KFENCE_H
#define __ASM_KFENCE_H
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
static inline bool arch_kfence_init_pool(void) { return true; }
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 508c7ffad515..f09bf5c02891 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -649,9 +649,9 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
return __pud_to_phys(pud);
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return (unsigned long)__va(pud_page_paddr(pud));
+ return (pmd_t *)__va(pud_page_paddr(pud));
}
/* Find an entry in the second-level page table. */
@@ -710,9 +710,9 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
}
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
{
- return (unsigned long)__va(p4d_page_paddr(p4d));
+ return (pud_t *)__va(p4d_page_paddr(p4d));
}
/* Find an entry in the frst-level page table. */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
new file mode 100644
index 000000000000..0f740b781187
--- /dev/null
+++ b/arch/arm64/include/asm/set_memory.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARM64_SET_MEMORY_H
+#define _ASM_ARM64_SET_MEMORY_H
+
+#include <asm-generic/set_memory.h>
+
+bool can_set_direct_map(void);
+#define can_set_direct_map can_set_direct_map
+
+int set_memory_valid(unsigned long addr, int numpages, int enable);
+
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
+
+#endif /* _ASM_ARM64_SET_MEMORY_H */
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index f83a70e07df8..ce2ee8f1e361 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -20,5 +20,6 @@
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
#define __ARCH_WANT_SYS_CLONE3
+#define __ARCH_WANT_MEMFD_SECRET
#include <asm-generic/unistd.h>
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 03ceabe4d912..213d56c14f60 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/page-flags.h>
+#include <linux/set_memory.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 880f40bae60e..be5f85b0a24d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -293,10 +293,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b189de5ca6cb..b83c8d911930 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -153,7 +153,7 @@ NOKPROBE_SYMBOL(walk_stackframe);
static void dump_backtrace_entry(unsigned long where, const char *loglvl)
{
- printk("%s %pS\n", loglvl, (void *)where);
+ printk("%s %pSb\n", loglvl, (void *)where);
}
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 595fde9a47dd..d74586508448 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -22,6 +22,7 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -515,8 +516,7 @@ static void __init map_mem(pgd_t *pgdp)
*/
BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
- if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
- IS_ENABLED(CONFIG_KFENCE))
+ if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -1489,8 +1489,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
* KFENCE requires linear map to be mapped at page granularity, so that
* it is possible to protect/unprotect single pages in the KFENCE pool.
*/
- if (rodata_full || debug_pagealloc_enabled() ||
- IS_ENABLED(CONFIG_KFENCE))
+ if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 92eccaf595c8..a3bacd79507a 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -19,6 +19,11 @@ struct page_change_data {
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
+bool can_set_direct_map(void)
+{
+ return rodata_full || debug_pagealloc_enabled();
+}
+
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
@@ -155,7 +160,7 @@ int set_direct_map_invalid_noflush(struct page *page)
.clear_mask = __pgprot(PTE_VALID),
};
- if (!debug_pagealloc_enabled() && !rodata_full)
+ if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
@@ -170,7 +175,7 @@ int set_direct_map_default_noflush(struct page *page)
.clear_mask = __pgprot(PTE_RDONLY),
};
- if (!debug_pagealloc_enabled() && !rodata_full)
+ if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
@@ -181,7 +186,7 @@ int set_direct_map_default_noflush(struct page *page)
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
- if (!debug_pagealloc_enabled() && !rodata_full)
+ if (!can_set_direct_map())
return;
set_memory_valid((unsigned long)page_address(page), numpages, enable);
@@ -206,7 +211,7 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- if (!debug_pagealloc_enabled() && !rodata_full)
+ if (!can_set_direct_map())
return true;
pgdp = pgd_offset_k(addr);
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index e93bc6f74432..c64e7be2045b 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -78,10 +78,7 @@ void __init setup_arch(char **cmdline_p)
pr_info("Phys. mem: %ldMB\n",
(unsigned long) memblock_phys_mem_size()/1024/1024);
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
parse_early_param();
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index c3590b2e9592..61091a76eb7e 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -95,10 +95,7 @@ void __init setup_arch(char **cmdline_p)
{
unflatten_and_copy_device_tree();
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) 0;
+ setup_initial_init_mm(_stext, _etext, _edata, NULL);
pr_notice("\r\n\nuClinux " CPU "\n");
pr_notice("Flat model support (C) 1998,1999 Kenneth Albanowski, D. Jeff Dionne\n");
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 44a409967af1..e5a852080730 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -30,6 +30,7 @@ config HEXAGON
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
select SET_FS
+ select ARCH_WANT_LD_ORPHAN_WARN
help
Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications.
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 35b18e55eae8..57465bff1fe4 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -38,6 +38,8 @@ SECTIONS
.text : AT(ADDR(.text)) {
_text = .;
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -59,14 +61,9 @@ SECTIONS
_end = .;
- /DISCARD/ : {
- EXIT_TEXT
- EXIT_DATA
- EXIT_CALL
- }
-
STABS_DEBUG
DWARF_DEBUG
ELF_DETAILS
+ DISCARDS
}
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 3f5dbbd8b9d8..9584b2c5f394 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -273,7 +273,7 @@ ia64_phys_addr_valid (unsigned long addr)
#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
#define pud_present(pud) (pud_val(pud) != 0UL)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & _PFN_MASK))
#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET))
#if CONFIG_PGTABLE_LEVELS == 4
@@ -281,7 +281,7 @@ ia64_phys_addr_valid (unsigned long addr)
#define p4d_bad(p4d) (!ia64_phys_addr_valid(p4d_val(p4d)))
#define p4d_present(p4d) (p4d_val(p4d) != 0UL)
#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL)
-#define p4d_page_vaddr(p4d) ((unsigned long) __va(p4d_val(p4d) & _PFN_MASK))
+#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & _PFN_MASK))
#define p4d_page(p4d) virt_to_page((p4d_val(p4d) + PAGE_OFFSET))
#endif
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index a2908164ee6f..022c3abc280d 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -131,7 +131,7 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp)
#define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK))
#define pmd_page_vaddr(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK))
-#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK))
+#define pud_pgtable(pud) ((pmd_t *)__va(pud_val(pud) & _TABLE_MASK))
#define pte_none(pte) (!pte_val(pte))
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 017bac3aab80..4b51bfd38e5f 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -258,10 +258,7 @@ void __init setup_arch(char **cmdline_p)
}
}
- init_mm.start_code = PAGE_OFFSET;
- init_mm.end_code = (unsigned long)_etext;
- init_mm.end_data = (unsigned long)_edata;
- init_mm.brk = (unsigned long)_end;
+ setup_initial_init_mm((void *)PAGE_OFFSET, _etext, _edata, _end);
#if defined(CONFIG_BOOTPARAM)
strncpy(m68k_command_line, CONFIG_BOOTPARAM_STRING, CL_SIZE);
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index d1b7988efc91..5e4104f07a44 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -87,10 +87,7 @@ void __init setup_arch(char **cmdline_p)
memory_start = PAGE_ALIGN(_ramstart);
memory_end = _ramend;
- init_mm.start_code = (unsigned long) &_stext;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) 0;
+ setup_initial_init_mm(_stext, _etext, _edata, NULL);
config_BSP(&command_line[0], sizeof(command_line));
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 046465906c82..41921acdc9d8 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -209,9 +209,9 @@ static inline void p4d_clear(p4d_t *p4dp)
p4d_val(*p4dp) = (unsigned long)invalid_pud_table;
}
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
{
- return p4d_val(p4d);
+ return (pud_t *)p4d_val(p4d);
}
#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d))
@@ -313,9 +313,9 @@ static inline void pud_clear(pud_t *pudp)
#endif
#ifndef __PAGETABLE_PMD_FOLDED
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return pud_val(pud);
+ return (pmd_t *)pud_val(pud);
}
#define pud_phys(pud) virt_to_phys((void *)pud_val(pud))
#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT))
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index af82e996f412..41725eaf8bac 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -294,10 +294,7 @@ void __init setup_arch(char **cmdline_p)
setup_cpuinfo();
- init_mm.start_code = (unsigned long)&_stext;
- init_mm.end_code = (unsigned long)&_etext;
- init_mm.end_data = (unsigned long)&_edata;
- init_mm.brk = (unsigned long)&_end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
/* setup bootmem allocator */
setup_memory();
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index d2f21957e99c..cf8d687a2644 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -156,10 +156,7 @@ void __init setup_arch(char **cmdline_p)
memory_start = memblock_start_of_DRAM();
memory_end = memblock_end_of_DRAM();
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
init_task.thread.kregs = &fake_regs;
/* Keep a copy of command line */
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index c6f9e7b9f7cb..8ae2da6ac097 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -293,10 +293,7 @@ void __init setup_arch(char **cmdline_p)
#endif
/* process 1's initial memory region is the kernel code/data */
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long)_etext;
- init_mm.end_data = (unsigned long)_edata;
- init_mm.brk = (unsigned long)_end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start == initrd_end) {
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 7f33c29764cc..43937af127b1 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -322,8 +322,8 @@ static inline void pmd_clear(pmd_t *pmd) {
#if CONFIG_PGTABLE_LEVELS == 3
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud)))
-#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
+#define pud_pgtable(pud) ((pmd_t *) __va(pud_address(pud)))
+#define pud_page(pud) virt_to_page((void *)pud_pgtable(pud))
/* For 64 bit we have three level tables */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 4d9941b2fe51..5d34a8646f08 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1051,8 +1051,15 @@ extern struct page *p4d_page(p4d_t p4d);
/* Pointers in the page table tree are physical addresses */
#define __pgtable_ptr_val(ptr) __pa(ptr)
-#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
-#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
+}
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index a46fd37ad552..77797a2a82eb 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -64,6 +64,8 @@ extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize);
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index fe2f4c9acd9e..10f5cf444d72 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -56,10 +56,14 @@
#define p4d_none(p4d) (!p4d_val(p4d))
#define p4d_bad(p4d) (p4d_val(p4d) == 0)
#define p4d_present(p4d) (p4d_val(p4d) != 0)
-#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS)
#ifndef __ASSEMBLY__
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *) (p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
static inline void p4d_clear(p4d_t *p4dp)
{
*p4dp = __p4d(0);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 53fbfdfac93d..d081704b13fb 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -162,7 +162,11 @@ static inline void pud_clear(pud_t *pudp)
#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
|| (pud_val(pud) & PUD_BAD_BITS))
#define pud_present(pud) (pud_val(pud) != 0)
-#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)(pud_val(pud) & ~PUD_MASKED_BITS);
+}
extern struct page *pud_page(pud_t pud);
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 160422a439aa..09a9ae5f3656 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -83,5 +83,11 @@ static inline int mm_is_thread_local(struct mm_struct *mm)
}
#endif
+#define arch_supports_page_table_move arch_supports_page_table_move
+static inline bool arch_supports_page_table_move(void)
+{
+ return radix_enabled();
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_TLB_H */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 26328fd2990c..aa9c2d01424a 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -926,10 +926,7 @@ void __init setup_arch(char **cmdline_p)
klp_init_thread_info(&init_task);
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long)_end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
mm_iommu_init(&init_mm);
irqstack_early_init();
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cb91071eef52..23d3e08911d3 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -32,7 +32,13 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long st
struct hstate *hstate = hstate_file(vma->vm_file);
psize = hstate_get_psize(hstate);
- radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ /*
+ * Flush PWC even if we get PUD_SIZE hugetlb invalidate to keep this simpler.
+ */
+ if (end - start >= PUD_SIZE)
+ radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
+ else
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 2176a5f70746..e50ddf129c15 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -820,7 +820,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
}
- pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+ pmd_base = pud_pgtable(*pud);
remove_pmd_table(pmd_base, addr, next);
free_pmd_table(pmd_base, pud);
}
@@ -854,7 +854,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
continue;
}
- pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+ pud_base = p4d_pgtable(*p4d);
remove_pud_table(pud_base, addr, next);
free_pud_table(pud_base, p4d);
}
@@ -1105,7 +1105,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
pmd_t *pmd;
int i;
- pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd = pud_pgtable(*pud);
pud_clear(pud);
flush_tlb_kernel_range(addr, addr + PUD_SIZE);
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index ae12fb5559cb..aefc100d79a7 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1111,14 +1111,13 @@ static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
-
{
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool fullmm = (end == TLB_FLUSH_ALL);
- bool flush_pid;
+ bool flush_pid, flush_pwc = false;
enum tlb_flush_type type;
pid = mm->context.id;
@@ -1137,8 +1136,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
else
flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+ /*
+ * full pid flush already does the PWC flush. if it is not full pid
+ * flush check the range is more than PMD and force a pwc flush
+ * mremap() depends on this behaviour.
+ */
+ if (!flush_pid && (end - start) >= PMD_SIZE)
+ flush_pwc = true;
if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+ unsigned long type = H_RPTI_TYPE_TLB;
unsigned long tgt = H_RPTI_TARGET_CMMU;
unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
@@ -1146,19 +1153,20 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
if (atomic_read(&mm->context.copros) > 0)
tgt |= H_RPTI_TARGET_NMMU;
- pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
- start, end);
+ if (flush_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
} else if (flush_pid) {
+ /*
+ * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
+ */
if (type == FLUSH_TYPE_LOCAL) {
- _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
} else {
if (cputlb_use_tlbie()) {
- if (mm_needs_flush_escalation(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
- else
- _tlbie_pid(pid, RIC_FLUSH_TLB);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
} else {
- _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
}
}
} else {
@@ -1174,6 +1182,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (type == FLUSH_TYPE_LOCAL) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
@@ -1181,6 +1192,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
ppc_after_tlbiel_barrier();
} else if (cputlb_use_tlbie()) {
asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
@@ -1188,10 +1201,10 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
- start, end, pid, page_size, mmu_virtual_psize, false);
+ start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
if (hflush)
_tlbiel_va_range_multicast(mm,
- hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
}
}
out:
@@ -1265,9 +1278,6 @@ void radix__flush_all_lpid_guest(unsigned int lpid)
_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize);
-
void radix__tlb_flush(struct mmu_gather *tlb)
{
int psize = 0;
@@ -1374,8 +1384,8 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
}
-static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long end, int psize)
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
{
__radix__flush_tlb_range_psize(mm, start, end, psize, true);
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index cc6e2f94517f..78c8cf01db5f 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -105,7 +105,7 @@ struct page *p4d_page(p4d_t p4d)
VM_WARN_ON(!p4d_huge(p4d));
return pte_page(p4d_pte(p4d));
}
- return virt_to_page(p4d_page_vaddr(p4d));
+ return virt_to_page(p4d_pgtable(p4d));
}
#endif
@@ -115,7 +115,7 @@ struct page *pud_page(pud_t pud)
VM_WARN_ON(!pud_huge(pud));
return pte_page(pud_pte(pud));
}
- return virt_to_page(pud_page_vaddr(pud));
+ return virt_to_page(pud_pgtable(pud));
}
/*
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 0e1bb1cedd13..6794145603de 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -102,6 +102,8 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_NUMA_BALANCING
+ select HAVE_MOVE_PMD
+ select HAVE_MOVE_PUD
select IRQ_WORK
select PPC_MM_SLICES
select PPC_HAVE_KUEP
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 469a70bd8da6..c711ccd7047e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -26,8 +26,8 @@ config RISCV
select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB
select ARCH_HAS_PTE_SPECIAL
- select ARCH_HAS_SET_DIRECT_MAP
- select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SET_DIRECT_MAP if MMU
+ select ARCH_HAS_SET_MEMORY if MMU
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index f3b0da64c6c8..0e863f3f7187 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -60,9 +60,9 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
+ return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
static inline struct page *pud_page(pud_t pud)
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 977ee6181dab..6c316093a1e5 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -9,6 +9,7 @@
*/
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_MEMFD_SECRET
#include <uapi/asm/unistd.h>
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 9a1b7a0603b2..8e318f207ac6 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -264,10 +264,7 @@ static void __init parse_dtb(void)
void __init setup_arch(char **cmdline_p)
{
parse_dtb();
- init_mm.start_code = (unsigned long) _stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b58ee83f30e3..5486d82470b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1028,10 +1028,7 @@ void __init setup_arch(char **cmdline_p)
ROOT_DEV = Root_RAM0;
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_text, _etext, _edata, _end);
if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
nospec_auto_detect();
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 82d74472dfcd..56bf35c2f29c 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -32,9 +32,9 @@ typedef struct { unsigned long long pmd; } pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) } )
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return pud_val(pud);
+ return (pmd_t *)pud_val(pud);
}
/* only used by the stubbed out hugetlb gup code, should never be called */
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 4144be650d41..1fcb6659822a 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -294,10 +294,7 @@ void __init setup_arch(char **cmdline_p)
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_text, _etext, _edata, _end);
code_resource.start = virt_to_phys(_text);
code_resource.end = virt_to_phys(_etext)-1;
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index ebaf374b55ab..ffccfe3b22ed 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -151,13 +151,13 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return (unsigned long)__nocache_va(v << 4);
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
if (srmmu_device_memory(pud_val(pud))) {
- return ~0;
+ return (pmd_t *)~0;
} else {
unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK;
- return (unsigned long)__nocache_va(v << 4);
+ return (pmd_t *)__nocache_va(v << 4);
}
}
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index e0ee48ec3903..4679e45c8348 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -841,23 +841,23 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return ((unsigned long) __va(pfn << PAGE_SHIFT));
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
unsigned long pfn;
pfn = pte_pfn(pte);
- return ((unsigned long) __va(pfn << PAGE_SHIFT));
+ return ((pmd_t *) __va(pfn << PAGE_SHIFT));
}
#define pmd_page(pmd) virt_to_page((void *)pmd_page_vaddr(pmd))
-#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
+#define pud_page(pud) virt_to_page((void *)pud_pgtable(pud))
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-#define p4d_page_vaddr(p4d) \
- ((unsigned long) __va(p4d_val(p4d)))
+#define p4d_pgtable(p4d) \
+ ((pud_t *) __va(p4d_val(p4d)))
#define p4d_present(p4d) (p4d_val(p4d) != 0U)
#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL)
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 9289a86643a9..cb896e6121c8 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -83,7 +83,7 @@ static inline void pud_clear (pud_t *pud)
}
#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK)
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
+#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK))
static inline unsigned long pte_pfn(pte_t pte)
{
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index fba2f615119a..ce763a12311c 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -451,3 +451,4 @@
444 i386 landlock_create_ruleset sys_landlock_create_ruleset
445 i386 landlock_add_rule sys_landlock_add_rule
446 i386 landlock_restrict_self sys_landlock_restrict_self
+447 i386 memfd_secret sys_memfd_secret
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index af973e400053..f6b57799c1ea 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -368,6 +368,7 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
+447 common memfd_secret sys_memfd_secret
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index ec0d8e106646..448cd01eb3ec 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -836,9 +836,9 @@ static inline int pud_present(pud_t pud)
return pud_flags(pud) & _PAGE_PRESENT;
}
-static inline unsigned long pud_page_vaddr(pud_t pud)
+static inline pmd_t *pud_pgtable(pud_t pud)
{
- return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
+ return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
@@ -877,9 +877,9 @@ static inline int p4d_present(p4d_t p4d)
return p4d_flags(p4d) & _PAGE_PRESENT;
}
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+static inline pud_t *p4d_pgtable(p4d_t p4d)
{
- return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+ return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
}
/*
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 299c20f0a38b..ea4fe192189d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -69,7 +69,7 @@ static void printk_stack_address(unsigned long address, int reliable,
const char *log_lvl)
{
touch_nmi_watchdog();
- printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
+ printk("%s %s%pBb\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9f1d9215a9fb..bff3a784aec5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -847,10 +847,7 @@ void __init setup_arch(char **cmdline_p)
if (!boot_params.hdr.root_flags)
root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = _brk_end;
+ setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end);
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 65ea58527176..ddeaba947eb3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -194,8 +194,8 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
spin_lock(pgt_lock);
if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
- BUG_ON(p4d_page_vaddr(*p4d)
- != p4d_page_vaddr(*p4d_ref));
+ BUG_ON(p4d_pgtable(*p4d)
+ != p4d_pgtable(*p4d_ref));
if (p4d_none(*p4d))
set_p4d(p4d, *p4d_ref);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 156cd235659f..ad8a5c586a35 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1134,7 +1134,7 @@ static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
unsigned long start, unsigned long end)
{
if (unmap_pte_range(pmd, start, end))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (try_to_free_pmd_page(pud_pgtable(*pud)))
pud_clear(pud);
}
@@ -1178,7 +1178,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
* Try again to free the PMD page if haven't succeeded above.
*/
if (!pud_none(*pud))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (try_to_free_pmd_page(pud_pgtable(*pud)))
pud_clear(pud);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1303ff6ef7be..3364fe62b903 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -801,7 +801,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
pte_t *pte;
int i;
- pmd = (pmd_t *)pud_page_vaddr(*pud);
+ pmd = pud_pgtable(*pud);
pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
if (!pmd_sv)
return 0;
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index 2f6b1befb129..03b7dae47dd4 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -41,7 +41,7 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
#define __p4d(x) ((p4d_t) { __pgd(x) })
#define pgd_page(pgd) (p4d_page((p4d_t){ pgd }))
-#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd }))
+#define pgd_page_vaddr(pgd) ((unsigned long)(p4d_pgtable((p4d_t){ pgd })))
/*
* allocating and freeing a p4d is trivial: the 1-entry p4d is
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 3e13acd019ae..10789cf51d16 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -51,7 +51,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
#define __pmd(x) ((pmd_t) { __pud(x) } )
#define pud_page(pud) (pmd_page((pmd_t){ pud }))
-#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud }))
+#define pud_pgtable(pud) ((pmd_t *)(pmd_page_vaddr((pmd_t){ pud })))
/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index a9d751fbda9e..eb70c6d7ceff 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -49,7 +49,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
#define __pud(x) ((pud_t) { __p4d(x) })
#define p4d_page(p4d) (pud_page((pud_t){ p4d }))
-#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d }))
+#define p4d_pgtable(p4d) ((pud_t *)(pud_pgtable((pud_t){ p4d })))
/*
* allocating and freeing a pud is trivial: the 1-entry pud is
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 6bdd94cff4e2..abe089c27529 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -214,10 +214,10 @@ static inline struct xbc_node * __init xbc_node_get_subkey(struct xbc_node *node
* @value: Iterated value of array entry.
*
* Iterate array entries of given @key under @node. Each array entry node
- * is stroed to @anode and @value. If the @node doesn't have @key node,
+ * is stored to @anode and @value. If the @node doesn't have @key node,
* it does nothing.
* Note that even if the found key node has only one value (not array)
- * this executes block once. Hoever, if the found key node has no value
+ * this executes block once. However, if the found key node has no value
* (key-only node), this does nothing. So don't use this for testing the
* key-value pair existence.
*/
diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 40232f90db6e..3b7a0ff4642f 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -8,5 +8,13 @@
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size);
+int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
+
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE)
+extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX];
+void init_vmlinux_build_id(void);
+#else
+static inline void init_vmlinux_build_id(void) { }
+#endif
#endif
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 4221888bdcd6..c24098c7acca 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -35,12 +35,12 @@ enum compact_result {
COMPACT_CONTINUE,
/*
- * The full zone was compacted scanned but wasn't successfull to compact
+ * The full zone was compacted scanned but wasn't successful to compact
* suitable pages.
*/
COMPACT_COMPLETE,
/*
- * direct compaction has scanned part of the zone but wasn't successfull
+ * direct compaction has scanned part of the zone but wasn't successful
* to compact suitable pages.
*/
COMPACT_PARTIAL_SKIPPED,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bfc4690de4f4..f3689a52bfd0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -259,7 +259,7 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
/**
* for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
* @cpu: the (optionally unsigned) integer iterator
- * @mask: the cpumask poiter
+ * @mask: the cpumask pointer
* @start: the start location
*
* The implementation does not assume any bit in @mask is set (including @start).
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 206bde8308b2..de62a722431e 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -38,8 +38,12 @@ phys_addr_t paddr_vmcoreinfo_note(void);
#define VMCOREINFO_OSRELEASE(value) \
vmcoreinfo_append_str("OSRELEASE=%s\n", value)
-#define VMCOREINFO_BUILD_ID(value) \
- vmcoreinfo_append_str("BUILD-ID=%s\n", value)
+#define VMCOREINFO_BUILD_ID() \
+ ({ \
+ static_assert(sizeof(vmlinux_build_id) == 20); \
+ vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \
+ })
+
#define VMCOREINFO_PAGESIZE(value) \
vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
#define VMCOREINFO_SYMBOL(name) \
@@ -69,10 +73,6 @@ extern unsigned char *vmcoreinfo_data;
extern size_t vmcoreinfo_size;
extern u32 *vmcoreinfo_note;
-/* raw contents of kernel .notes section */
-extern const void __start_notes __weak;
-extern const void __stop_notes __weak;
-
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
void final_note(Elf_Word *buf);
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 8d2dde23e9fb..32444686b6ff 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -18,7 +18,7 @@ enum debug_obj_state {
struct debug_obj_descr;
/**
- * struct debug_obj - representaion of an tracked object
+ * struct debug_obj - representation of an tracked object
* @node: hlist node to link the object into the tracker list
* @state: tracked object state
* @astate: current active state
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 866a0fa104c4..2fd2e91d5107 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -113,7 +113,7 @@ int hmm_range_fault(struct hmm_range *range);
* HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range
*
* When waiting for mmu notifiers we need some kind of time out otherwise we
- * could potentialy wait for ever, 1000ms ie 1s sounds like a long time to
+ * could potentially wait for ever, 1000ms ie 1s sounds like a long time to
* wait already.
*/
#define HMM_RANGE_DEFAULT_TIMEOUT 1000
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8e0f32f935bd..f7ca1a3870ea 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -51,7 +51,7 @@ struct hugepage_subpool {
long count;
long max_hpages; /* Maximum huge pages or -1 if no maximum. */
long used_hpages; /* Used count against maximum, includes */
- /* both alloced and reserved pages. */
+ /* both allocated and reserved pages. */
struct hstate *hstate;
long min_hpages; /* Minimum huge pages or -1 if no minimum. */
long rsv_hpages; /* Pages reserved against global pool to */
@@ -85,7 +85,7 @@ struct resv_map {
* by a resv_map's lock. The set of regions within the resv_map represent
* reservations for huge pages, or huge pages that have already been
* instantiated within the map. The from and to elements are huge page
- * indicies into the associated mapping. from indicates the starting index
+ * indices into the associated mapping. from indicates the starting index
* of the region. to represents the first index past the end of the region.
*
* For example, a file region structure with from == 0 and to == 4 represents
@@ -797,7 +797,7 @@ static inline bool hugepage_migration_supported(struct hstate *h)
* It determines whether or not a huge page should be placed on
* movable zone or not. Movability of any huge page should be
* required only if huge page size is supported for migration.
- * There wont be any reason for the huge page to be movable if
+ * There won't be any reason for the huge page to be movable if
* it is not migratable to start with. Also the size of the huge
* page should be large enough to be placed under a movable zone
* and still feasible enough to be migratable. Just the presence
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 465060acc981..a1d6fc82d7f0 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -7,6 +7,7 @@
#define _LINUX_KALLSYMS_H
#include <linux/errno.h>
+#include <linux/buildid.h>
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/mm.h>
@@ -15,8 +16,10 @@
#include <asm/sections.h>
#define KSYM_NAME_LEN 128
-#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \
- 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1)
+#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \
+ (KSYM_NAME_LEN - 1) + \
+ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \
+ (BUILD_ID_SIZE_MAX * 2) + 1)
struct cred;
struct module;
@@ -91,8 +94,10 @@ const char *kallsyms_lookup(unsigned long addr,
/* Look up a kernel symbol and return it in a text buffer. */
extern int sprint_symbol(char *buffer, unsigned long address);
+extern int sprint_symbol_build_id(char *buffer, unsigned long address);
extern int sprint_symbol_no_offset(char *buffer, unsigned long address);
extern int sprint_backtrace(char *buffer, unsigned long address);
+extern int sprint_backtrace_build_id(char *buffer, unsigned long address);
int lookup_symbol_name(unsigned long addr, char *symname);
int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
@@ -128,6 +133,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr)
return 0;
}
+static inline int sprint_symbol_build_id(char *buffer, unsigned long address)
+{
+ *buffer = '\0';
+ return 0;
+}
+
static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr)
{
*buffer = '\0';
@@ -140,6 +151,12 @@ static inline int sprint_backtrace(char *buffer, unsigned long addr)
return 0;
}
+static inline int sprint_backtrace_build_id(char *buffer, unsigned long addr)
+{
+ *buffer = '\0';
+ return 0;
+}
+
static inline int lookup_symbol_name(unsigned long addr, char *symname)
{
return -ERANGE;
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 9dcaa3e582c9..1b5fceb565df 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -146,7 +146,7 @@ typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
* @lru: the lru pointer.
* @nid: the node id to scan from.
* @memcg: the cgroup to scan from.
- * @isolate: callback function that is resposible for deciding what to do with
+ * @isolate: callback function that is responsible for deciding what to do with
* the item currently being scanned
* @cb_arg: opaque type that will be passed to @isolate
* @nr_to_walk: how many items to scan.
@@ -172,7 +172,7 @@ unsigned long list_lru_walk_one(struct list_lru *lru,
* @lru: the lru pointer.
* @nid: the node id to scan from.
* @memcg: the cgroup to scan from.
- * @isolate: callback function that is resposible for deciding what to do with
+ * @isolate: callback function that is responsible for deciding what to do with
* the item currently being scanned
* @cb_arg: opaque type that will be passed to @isolate
* @nr_to_walk: how many items to scan.
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 429d67d815ce..07add7882a5d 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -32,7 +32,7 @@ This header file (and its .c file; kernel-doc of functions see there)
Because of this later property, it is called "lru_cache".
As it actually Tracks Objects in an Active SeT, we could also call it
toast (incidentally that is what may happen to the data on the
- backend storage uppon next resync, if we don't get it right).
+ backend storage upon next resync, if we don't get it right).
What for?
@@ -152,7 +152,7 @@ struct lc_element {
* for paranoia, and for "lc_element_to_index" */
unsigned lc_index;
/* if we want to track a larger set of objects,
- * it needs to become arch independend u64 */
+ * it needs to become an architecture independent u64 */
unsigned lc_number;
/* special label when on free list */
#define LC_FREE (~0U)
@@ -263,7 +263,7 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
*
* Allows (expects) the set to be "dirty". Note that the reference counts and
* order on the active and lru lists may still change. Used to serialize
- * changing transactions. Returns true if we aquired the lock.
+ * changing transactions. Returns true if we acquired the lock.
*/
static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
{
@@ -275,7 +275,7 @@ static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
* @lc: the lru cache to operate on
*
* Note that the reference counts and order on the active and lru lists may
- * still change. Only works on a "clean" set. Returns true if we aquired the
+ * still change. Only works on a "clean" set. Returns true if we acquired the
* lock, which means there are no pending changes, and any further attempt to
* change the set will not succeed until the next lc_unlock().
*/
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 788a0b1323d0..57453dba41b9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -238,6 +238,9 @@ int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
+void setup_initial_init_mm(void *start_code, void *end_code,
+ void *end_data, void *brk);
+
/*
* Linux kernel virtual memory manager primitives.
* The idea being to have a "virtual" mm in the same way
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 6692da8d121d..45fc2c81e370 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -33,7 +33,7 @@ struct mmu_interval_notifier;
*
* @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same
* access flags). User should soft dirty the page in the end callback to make
- * sure that anyone relying on soft dirtyness catch pages that might be written
+ * sure that anyone relying on soft dirtiness catch pages that might be written
* through non CPU mappings.
*
* @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal
@@ -167,7 +167,7 @@ struct mmu_notifier_ops {
* decrease the refcount. If the refcount is decreased on
* invalidate_range_start() then the VM can free pages as page
* table entries are removed. If the refcount is only
- * droppped on invalidate_range_end() then the driver itself
+ * dropped on invalidate_range_end() then the driver itself
* will drop the last refcount but it must take care to flush
* any secondary tlb before doing the final free on the
* page. Pages will no longer be referenced by the linux
@@ -196,7 +196,7 @@ struct mmu_notifier_ops {
* If invalidate_range() is used to manage a non-CPU TLB with
* shared page-tables, it not necessary to implement the
* invalidate_range_start()/end() notifiers, as
- * invalidate_range() alread catches the points in time when an
+ * invalidate_range() already catches the points in time when an
* external TLB range needs to be flushed. For more in depth
* discussion on this see Documentation/vm/mmu_notifier.rst
*
@@ -369,7 +369,7 @@ mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub,
* mmu_interval_read_retry() will return true.
*
* False is not reliable and only suggests a collision may not have
- * occured. It can be called many times and does not have to hold the user
+ * occurred. It can be called many times and does not have to hold the user
* provided lock.
*
* This call can be used as part of loops and other expensive operations to
diff --git a/include/linux/module.h b/include/linux/module.h
index 8100bb477d86..8a298d820dbc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/stat.h>
+#include <linux/buildid.h>
#include <linux/compiler.h>
#include <linux/cache.h>
#include <linux/kmod.h>
@@ -369,6 +370,11 @@ struct module {
/* Unique handle for this module */
char name[MODULE_NAME_LEN];
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+ /* Module build ID */
+ unsigned char build_id[BUILD_ID_SIZE_MAX];
+#endif
+
/* Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
@@ -636,7 +642,7 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr);
const char *module_address_lookup(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset,
- char **modname,
+ char **modname, const unsigned char **modbuildid,
char *namebuf);
int lookup_module_symbol_name(unsigned long addr, char *symname);
int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
@@ -740,6 +746,7 @@ static inline const char *module_address_lookup(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset,
char **modname,
+ const unsigned char **modbuildid,
char *namebuf)
{
return NULL;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index ac398e143c9a..567c3ddba2c4 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -119,7 +119,7 @@ static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
* The inline keyword gives the compiler room to decide to inline, or
* not inline a function as it sees best. However, as these functions
* are called in both __init and non-__init functions, if they are not
- * inlined we will end up with a section mis-match error (of the type of
+ * inlined we will end up with a section mismatch error (of the type of
* freeable items not being freed). So we must use __always_inline here
* to fix the problem. If other functions in the future also end up in
* this situation they will also need to be annotated as __always_inline
@@ -515,7 +515,7 @@ static inline int node_random(const nodemask_t *mask)
#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
/*
- * For nodemask scrach area.
+ * For nodemask scratch area.
* NODEMASK_ALLOC(type, name) allocates an object with a specified type and
* name.
*/
@@ -528,7 +528,7 @@ static inline int node_random(const nodemask_t *mask)
#define NODEMASK_FREE(m) do {} while (0)
#endif
-/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
+/* Example structure for using NODEMASK_ALLOC, used in mempolicy. */
struct nodemask_scratch {
nodemask_t mask1;
nodemask_t mask2;
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index dff7040f629a..af1071535de8 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -412,7 +412,7 @@ do { \
* instead.
*
* If there is no other protection through preempt disable and/or disabling
- * interupts then one of these RMW operations can show unexpected behavior
+ * interrupts then one of these RMW operations can show unexpected behavior
* because the execution thread was rescheduled on another processor or an
* interrupt occurred and the same percpu variable was modified from the
* interrupt context.
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 16c35a728b4c..ae16a9856305 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -213,7 +213,7 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
* percpu_ref_get - increment a percpu refcount
* @ref: percpu_ref to get
*
- * Analagous to atomic_long_inc().
+ * Analogous to atomic_long_inc().
*
* This function is safe to call as long as @ref is between init and exit.
*/
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e82660f7b9e4..d147480cdefc 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -106,7 +106,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
#ifndef pmd_offset
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+ return pud_pgtable(*pud) + pmd_index(address);
}
#define pmd_offset pmd_offset
#endif
@@ -114,7 +114,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#ifndef pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
- return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+ return p4d_pgtable(*p4d) + pud_index(address);
}
#define pud_offset pud_offset
#endif
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 6f70572b2938..ecf87484814f 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -474,7 +474,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter)
* Iterates over sg entries mapping page-by-page. On each successful
* iteration, @miter->page points to the mapped page and
* @miter->length bytes of data can be accessed at @miter->addr. As
- * long as an interation is enclosed between start and stop, the user
+ * long as an iteration is enclosed between start and stop, the user
* is free to choose control structure and when to stop.
*
* @miter->consumed is set to @miter->length on each iteration. It
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
new file mode 100644
index 000000000000..21c3771e6a56
--- /dev/null
+++ b/include/linux/secretmem.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_SECRETMEM_H
+#define _LINUX_SECRETMEM_H
+
+#ifdef CONFIG_SECRETMEM
+
+extern const struct address_space_operations secretmem_aops;
+
+static inline bool page_is_secretmem(struct page *page)
+{
+ struct address_space *mapping;
+
+ /*
+ * Using page_mapping() is quite slow because of the actual call
+ * instruction and repeated compound_head(page) inside the
+ * page_mapping() function.
+ * We know that secretmem pages are not compound and LRU so we can
+ * save a couple of cycles here.
+ */
+ if (PageCompound(page) || !PageLRU(page))
+ return false;
+
+ mapping = (struct address_space *)
+ ((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
+
+ if (mapping != page->mapping)
+ return false;
+
+ return mapping->a_ops == &secretmem_aops;
+}
+
+bool vma_is_secretmem(struct vm_area_struct *vma);
+bool secretmem_active(void);
+
+#else
+
+static inline bool vma_is_secretmem(struct vm_area_struct *vma)
+{
+ return false;
+}
+
+static inline bool page_is_secretmem(struct page *page)
+{
+ return false;
+}
+
+static inline bool secretmem_active(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_SECRETMEM */
+
+#endif /* _LINUX_SECRETMEM_H */
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index fe1aa4e54680..f36be5166c19 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -28,7 +28,19 @@ static inline bool kernel_page_present(struct page *page)
{
return true;
}
+#else /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
+/*
+ * Some architectures, e.g. ARM64 can disable direct map modifications at
+ * boot time. Let them overrive this query.
+ */
+#ifndef can_set_direct_map
+static inline bool can_set_direct_map(void)
+{
+ return true;
+}
+#define can_set_direct_map can_set_direct_map
#endif
+#endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
#ifndef set_mce_nospec
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1eac79ce57d4..9814fff58a69 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -4,7 +4,7 @@
/*
* This struct is used to pass information from page reclaim to the shrinkers.
- * We consolidate the values for easier extention later.
+ * We consolidate the values for easier extension later.
*
* The 'gfpmask' refers to the allocation we are currently trying to
* fulfil.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 586128d5c3b8..69c9a7010081 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1050,6 +1050,7 @@ asmlinkage long sys_landlock_create_ruleset(const struct landlock_ruleset_attr _
asmlinkage long sys_landlock_add_rule(int ruleset_fd, enum landlock_rule_type rule_type,
const void __user *rule_attr, __u32 flags);
asmlinkage long sys_landlock_restrict_self(int ruleset_fd, __u32 flags);
+asmlinkage long sys_memfd_secret(unsigned int flags);
/*
* Architecture-specific system calls
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 1dabd6f22486..2644425b6dce 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -29,7 +29,7 @@ struct notifier_block; /* in notifier.h */
#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
/*
- * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
+ * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC.
*
* If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after
* shadow memory has been mapped. It's used to handle allocation errors so that
@@ -247,7 +247,7 @@ static inline void set_vm_flush_reset_perms(void *addr)
extern long vread(char *buf, char *addr, unsigned long count);
/*
- * Internals. Dont't use..
+ * Internals. Don't use..
*/
extern struct list_head vmap_area_list;
extern __init void vm_area_add_early(struct vm_struct *vm);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f211961ce1da..a9d6fcd95f42 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -873,8 +873,13 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
#define __NR_landlock_restrict_self 446
__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+#ifdef __ARCH_WANT_MEMFD_SECRET
+#define __NR_memfd_secret 447
+__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
+#endif
+
#undef __NR_syscalls
-#define __NR_syscalls 447
+#define __NR_syscalls 448
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index f3956fc11de6..35687dcb1a42 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -97,5 +97,6 @@
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
#define Z3FOLD_MAGIC 0x33
#define PPC_CMM_MAGIC 0xc7571590
+#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
#endif /* __LINUX_MAGIC_H__ */
diff --git a/init/Kconfig b/init/Kconfig
index 55f9f7738ebb..bb0d6e6262b1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1847,6 +1847,7 @@ config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
depends on SLUB && SYSFS
+ select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
result in significant savings in code size. This also disables
diff --git a/init/main.c b/init/main.c
index af521b30a3b8..f5b8246e8aa1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -45,6 +45,7 @@
#include <linux/srcu.h>
#include <linux/moduleparam.h>
#include <linux/kallsyms.h>
+#include <linux/buildid.h>
#include <linux/writeback.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
@@ -913,6 +914,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
set_task_stack_end_magic(&init_task);
smp_setup_processor_id();
debug_objects_early_init();
+ init_vmlinux_build_id();
cgroup_init_early();
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index da449c1cdca7..eb53f5ec62c9 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -4,6 +4,7 @@
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
*/
+#include <linux/buildid.h>
#include <linux/crash_core.h>
#include <linux/utsname.h>
#include <linux/vmalloc.h>
@@ -378,53 +379,6 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
}
EXPORT_SYMBOL(paddr_vmcoreinfo_note);
-#define NOTES_SIZE (&__stop_notes - &__start_notes)
-#define BUILD_ID_MAX SHA1_DIGEST_SIZE
-#define NT_GNU_BUILD_ID 3
-
-struct elf_note_section {
- struct elf_note n_hdr;
- u8 n_data[];
-};
-
-/*
- * Add build ID from .notes section as generated by the GNU ld(1)
- * or LLVM lld(1) --build-id option.
- */
-static void add_build_id_vmcoreinfo(void)
-{
- char build_id[BUILD_ID_MAX * 2 + 1];
- int n_remain = NOTES_SIZE;
-
- while (n_remain >= sizeof(struct elf_note)) {
- const struct elf_note_section *note_sec =
- &__start_notes + NOTES_SIZE - n_remain;
- const u32 n_namesz = note_sec->n_hdr.n_namesz;
-
- if (note_sec->n_hdr.n_type == NT_GNU_BUILD_ID &&
- n_namesz != 0 &&
- !strcmp((char *)&note_sec->n_data[0], "GNU")) {
- if (note_sec->n_hdr.n_descsz <= BUILD_ID_MAX) {
- const u32 n_descsz = note_sec->n_hdr.n_descsz;
- const u8 *s = &note_sec->n_data[n_namesz];
-
- s = PTR_ALIGN(s, 4);
- bin2hex(build_id, s, n_descsz);
- build_id[2 * n_descsz] = '\0';
- VMCOREINFO_BUILD_ID(build_id);
- return;
- }
- pr_warn("Build ID is too large to include in vmcoreinfo: %u > %u\n",
- note_sec->n_hdr.n_descsz,
- BUILD_ID_MAX);
- return;
- }
- n_remain -= sizeof(struct elf_note) +
- ALIGN(note_sec->n_hdr.n_namesz, 4) +
- ALIGN(note_sec->n_hdr.n_descsz, 4);
- }
-}
-
static int __init crash_save_vmcoreinfo_init(void)
{
vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
@@ -443,7 +397,7 @@ static int __init crash_save_vmcoreinfo_init(void)
}
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
- add_build_id_vmcoreinfo();
+ VMCOREINFO_BUILD_ID();
VMCOREINFO_PAGESIZE(PAGE_SIZE);
VMCOREINFO_SYMBOL(init_uts_ns);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index c851ca0ed357..0ba87982d017 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -25,7 +25,10 @@
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
+#include <linux/build_bug.h>
#include <linux/compiler.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
/*
* These will be re-linked against their real values
@@ -297,21 +300,14 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
get_symbol_pos(addr, symbolsize, offset);
return 1;
}
- return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) ||
+ return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
!!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
-/*
- * Lookup an address
- * - modname is set to NULL if it's in the kernel.
- * - We guarantee that the returned name is valid until we reschedule even if.
- * It resides in a module.
- * - We also guarantee that modname will be valid until rescheduled.
- */
-const char *kallsyms_lookup(unsigned long addr,
- unsigned long *symbolsize,
- unsigned long *offset,
- char **modname, char *namebuf)
+static const char *kallsyms_lookup_buildid(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset, char **modname,
+ const unsigned char **modbuildid, char *namebuf)
{
const char *ret;
@@ -327,6 +323,8 @@ const char *kallsyms_lookup(unsigned long addr,
namebuf, KSYM_NAME_LEN);
if (modname)
*modname = NULL;
+ if (modbuildid)
+ *modbuildid = NULL;
ret = namebuf;
goto found;
@@ -334,7 +332,7 @@ const char *kallsyms_lookup(unsigned long addr,
/* See if it's in a module or a BPF JITed image. */
ret = module_address_lookup(addr, symbolsize, offset,
- modname, namebuf);
+ modname, modbuildid, namebuf);
if (!ret)
ret = bpf_address_lookup(addr, symbolsize,
offset, modname, namebuf);
@@ -348,6 +346,22 @@ found:
return ret;
}
+/*
+ * Lookup an address
+ * - modname is set to NULL if it's in the kernel.
+ * - We guarantee that the returned name is valid until we reschedule even if.
+ * It resides in a module.
+ * - We also guarantee that modname will be valid until rescheduled.
+ */
+const char *kallsyms_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, char *namebuf)
+{
+ return kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
+ NULL, namebuf);
+}
+
int lookup_symbol_name(unsigned long addr, char *symname)
{
int res;
@@ -404,15 +418,17 @@ found:
/* Look up a kernel symbol and return it in a text buffer. */
static int __sprint_symbol(char *buffer, unsigned long address,
- int symbol_offset, int add_offset)
+ int symbol_offset, int add_offset, int add_buildid)
{
char *modname;
+ const unsigned char *buildid;
const char *name;
unsigned long offset, size;
int len;
address += symbol_offset;
- name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
+ name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
+ buffer);
if (!name)
return sprintf(buffer, "0x%lx", address - symbol_offset);
@@ -424,8 +440,19 @@ static int __sprint_symbol(char *buffer, unsigned long address,
if (add_offset)
len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
- if (modname)
- len += sprintf(buffer + len, " [%s]", modname);
+ if (modname) {
+ len += sprintf(buffer + len, " [%s", modname);
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
+ if (add_buildid && buildid) {
+ /* build ID should match length of sprintf */
+#if IS_ENABLED(CONFIG_MODULES)
+ static_assert(sizeof(typeof_member(struct module, build_id)) == 20);
+#endif
+ len += sprintf(buffer + len, " %20phN", buildid);
+ }
+#endif
+ len += sprintf(buffer + len, "]");
+ }
return len;
}
@@ -443,11 +470,28 @@ static int __sprint_symbol(char *buffer, unsigned long address,
*/
int sprint_symbol(char *buffer, unsigned long address)
{
- return __sprint_symbol(buffer, address, 0, 1);
+ return __sprint_symbol(buffer, address, 0, 1, 0);
}
EXPORT_SYMBOL_GPL(sprint_symbol);
/**
+ * sprint_symbol_build_id - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name,
+ * offset, size, module name and module build ID to @buffer if possible. If no
+ * symbol was found, just saves its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol_build_id(char *buffer, unsigned long address)
+{
+ return __sprint_symbol(buffer, address, 0, 1, 1);
+}
+EXPORT_SYMBOL_GPL(sprint_symbol_build_id);
+
+/**
* sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer
* @buffer: buffer to be stored
* @address: address to lookup
@@ -460,7 +504,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol);
*/
int sprint_symbol_no_offset(char *buffer, unsigned long address)
{
- return __sprint_symbol(buffer, address, 0, 0);
+ return __sprint_symbol(buffer, address, 0, 0, 0);
}
EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
@@ -480,7 +524,27 @@ EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
*/
int sprint_backtrace(char *buffer, unsigned long address)
{
- return __sprint_symbol(buffer, address, -1, 1);
+ return __sprint_symbol(buffer, address, -1, 1, 0);
+}
+
+/**
+ * sprint_backtrace_build_id - Look up a backtrace symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function is for stack backtrace and does the same thing as
+ * sprint_symbol() but with modified/decreased @address. If there is a
+ * tail-call to the function marked "noreturn", gcc optimized out code after
+ * the call so that the stack-saved return address could point outside of the
+ * caller. This function ensures that kallsyms will find the original caller
+ * by decreasing @address. This function also appends the module build ID to
+ * the @buffer if @address is within a kernel module.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_backtrace_build_id(char *buffer, unsigned long address)
+{
+ return __sprint_symbol(buffer, address, -1, 1, 1);
}
/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
diff --git a/kernel/module.c b/kernel/module.c
index 64bd61b2d3ad..ed13917ea5f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -13,6 +13,7 @@
#include <linux/trace_events.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
+#include <linux/buildid.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
@@ -1465,6 +1466,13 @@ resolve_symbol_wait(struct module *mod,
return ksym;
}
+#ifdef CONFIG_KALLSYMS
+static inline bool sect_empty(const Elf_Shdr *sect)
+{
+ return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+}
+#endif
+
/*
* /sys/module/foo/sections stuff
* J. Corbet <corbet@lwn.net>
@@ -1472,11 +1480,6 @@ resolve_symbol_wait(struct module *mod,
#ifdef CONFIG_SYSFS
#ifdef CONFIG_KALLSYMS
-static inline bool sect_empty(const Elf_Shdr *sect)
-{
- return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
-}
-
struct module_sect_attr {
struct bin_attribute battr;
unsigned long address;
@@ -2797,6 +2800,26 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
}
#endif /* CONFIG_KALLSYMS */
+#if IS_ENABLED(CONFIG_KALLSYMS) && IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
+static void init_build_id(struct module *mod, const struct load_info *info)
+{
+ const Elf_Shdr *sechdr;
+ unsigned int i;
+
+ for (i = 0; i < info->hdr->e_shnum; i++) {
+ sechdr = &info->sechdrs[i];
+ if (!sect_empty(sechdr) && sechdr->sh_type == SHT_NOTE &&
+ !build_id_parse_buf((void *)sechdr->sh_addr, mod->build_id,
+ sechdr->sh_size))
+ break;
+ }
+}
+#else
+static void init_build_id(struct module *mod, const struct load_info *info)
+{
+}
+#endif
+
static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsigned int num)
{
if (!debug)
@@ -4021,6 +4044,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_arch_cleanup;
}
+ init_build_id(mod, info);
dynamic_debug_setup(mod, info->debug, info->num_debug);
/* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
@@ -4254,6 +4278,7 @@ const char *module_address_lookup(unsigned long addr,
unsigned long *size,
unsigned long *offset,
char **modname,
+ const unsigned char **modbuildid,
char *namebuf)
{
const char *ret = NULL;
@@ -4264,6 +4289,13 @@ const char *module_address_lookup(unsigned long addr,
if (mod) {
if (modname)
*modname = mod->name;
+ if (modbuildid) {
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
+ *modbuildid = mod->build_id;
+#else
+ *modbuildid = NULL;
+#endif
+ }
ret = find_kallsyms_symbol(mod, addr, size, offset);
}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index da0b41914177..559acef3fddb 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -31,6 +31,7 @@
#include <linux/genhd.h>
#include <linux/ktime.h>
#include <linux/security.h>
+#include <linux/secretmem.h>
#include <trace/events/power.h>
#include "power.h"
@@ -81,7 +82,9 @@ void hibernate_release(void)
bool hibernation_available(void)
{
- return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
+ return nohibernate == 0 &&
+ !security_locked_down(LOCKDOWN_HIBERNATION) &&
+ !secretmem_active();
}
/**
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index dad4d994641e..30971b1dd4a9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -358,6 +358,8 @@ COND_SYSCALL(pkey_mprotect);
COND_SYSCALL(pkey_alloc);
COND_SYSCALL(pkey_free);
+/* memfd_secret */
+COND_SYSCALL(memfd_secret);
/*
* Architecture specific weak syscall entries.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8acc01d7d816..2987925efe7d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -35,6 +35,17 @@ config PRINTK_CALLER
no option to enable/disable at the kernel command line parameter or
sysfs interface.
+config STACKTRACE_BUILD_ID
+ bool "Show build ID information in stacktraces"
+ depends on PRINTK
+ help
+ Selecting this option adds build ID information for symbols in
+ stacktraces printed with the printk format '%p[SR]b'.
+
+ This option is intended for distros where debuginfo is not easily
+ accessible but can be downloaded given the build ID of the vmlinux or
+ kernel module where the function is located.
+
config CONSOLE_LOGLEVEL_DEFAULT
int "Default console loglevel (1-15)"
range 1 15
@@ -1282,7 +1293,7 @@ config PROVE_RAW_LOCK_NESTING
option expect lockdep splats until these problems have been fully
addressed which is work in progress. This config switch allows to
identify and analyze these problems. It will be removed and the
- check permanentely enabled once the main issues have been fixed.
+ check permanently enabled once the main issues have been fixed.
If unsure, select N.
@@ -1448,7 +1459,7 @@ config DEBUG_LOCKING_API_SELFTESTS
Say Y here if you want the kernel to run a short self-test during
bootup. The self-test checks whether common types of locking bugs
are detected by debugging mechanisms or not. (if you disable
- lock debugging then those bugs wont be detected of course.)
+ lock debugging then those bugs won't be detected of course.)
The following locking APIs are covered: spinlocks, rwlocks,
mutexes and rwsems.
@@ -1928,7 +1939,7 @@ config FAIL_IO_TIMEOUT
thus exercising the error handling.
Only works with drivers that use the generic timeout handling,
- for others it wont do anything.
+ for others it won't do anything.
config FAIL_FUTEX
bool "Fault-injection capability for futexes"
diff --git a/lib/asn1_encoder.c b/lib/asn1_encoder.c
index 41e71aae3ef6..27bbe891714f 100644
--- a/lib/asn1_encoder.c
+++ b/lib/asn1_encoder.c
@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(asn1_encode_oid);
/**
* asn1_encode_length() - encode a length to follow an ASN.1 tag
* @data: pointer to encode at
- * @data_len: pointer to remaning length (adjusted by routine)
+ * @data_len: pointer to remaining length (adjusted by routine)
* @len: length to encode
*
* This routine can encode lengths up to 65535 using the ASN.1 rules.
diff --git a/lib/buildid.c b/lib/buildid.c
index 6156997c3895..dfc62625cae4 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -1,36 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/buildid.h>
+#include <linux/cache.h>
#include <linux/elf.h>
+#include <linux/kernel.h>
#include <linux/pagemap.h>
#define BUILD_ID 3
+
/*
* Parse build id from the note segment. This logic can be shared between
* 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
* identical.
*/
-static inline int parse_build_id(void *page_addr,
- unsigned char *build_id,
- __u32 *size,
- void *note_start,
- Elf32_Word note_size)
+static int parse_build_id_buf(unsigned char *build_id,
+ __u32 *size,
+ const void *note_start,
+ Elf32_Word note_size)
{
Elf32_Word note_offs = 0, new_offs;
- /* check for overflow */
- if (note_start < page_addr || note_start + note_size < note_start)
- return -EINVAL;
-
- /* only supports note that fits in the first page */
- if (note_start + note_size > page_addr + PAGE_SIZE)
- return -EINVAL;
-
while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
if (nhdr->n_type == BUILD_ID &&
nhdr->n_namesz == sizeof("GNU") &&
+ !strcmp((char *)(nhdr + 1), "GNU") &&
nhdr->n_descsz > 0 &&
nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
memcpy(build_id,
@@ -49,11 +44,29 @@ static inline int parse_build_id(void *page_addr,
break;
note_offs = new_offs;
}
+
return -EINVAL;
}
+static inline int parse_build_id(const void *page_addr,
+ unsigned char *build_id,
+ __u32 *size,
+ const void *note_start,
+ Elf32_Word note_size)
+{
+ /* check for overflow */
+ if (note_start < page_addr || note_start + note_size < note_start)
+ return -EINVAL;
+
+ /* only supports note that fits in the first page */
+ if (note_start + note_size > page_addr + PAGE_SIZE)
+ return -EINVAL;
+
+ return parse_build_id_buf(build_id, size, note_start, note_size);
+}
+
/* Parse build ID from 32-bit ELF */
-static int get_build_id_32(void *page_addr, unsigned char *build_id,
+static int get_build_id_32(const void *page_addr, unsigned char *build_id,
__u32 *size)
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
@@ -78,7 +91,7 @@ static int get_build_id_32(void *page_addr, unsigned char *build_id,
}
/* Parse build ID from 64-bit ELF */
-static int get_build_id_64(void *page_addr, unsigned char *build_id,
+static int get_build_id_64(const void *page_addr, unsigned char *build_id,
__u32 *size)
{
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
@@ -108,7 +121,7 @@ static int get_build_id_64(void *page_addr, unsigned char *build_id,
* @build_id: buffer to store build id, at least BUILD_ID_SIZE long
* @size: returns actual build id size in case of success
*
- * Returns 0 on success, otherwise error (< 0).
+ * Return: 0 on success, -EINVAL otherwise
*/
int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
__u32 *size)
@@ -147,3 +160,32 @@ out:
put_page(page);
return ret;
}
+
+/**
+ * build_id_parse_buf - Get build ID from a buffer
+ * @buf: Elf note section(s) to parse
+ * @buf_size: Size of @buf in bytes
+ * @build_id: Build ID parsed from @buf, at least BUILD_ID_SIZE_MAX long
+ *
+ * Return: 0 on success, -EINVAL otherwise
+ */
+int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size)
+{
+ return parse_build_id_buf(build_id, NULL, buf, buf_size);
+}
+
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE)
+unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init;
+
+/**
+ * init_vmlinux_build_id - Compute and stash the running kernel's build ID
+ */
+void __init init_vmlinux_build_id(void)
+{
+ extern const void __start_notes __weak;
+ extern const void __stop_notes __weak;
+ unsigned int size = &__stop_notes - &__start_notes;
+
+ build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
+}
+#endif
diff --git a/lib/devres.c b/lib/devres.c
index bdb06898a977..b0e1c6702c71 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -355,7 +355,7 @@ static void pcim_iomap_release(struct device *gendev, void *res)
* detach.
*
* This function might sleep when the table is first allocated but can
- * be safely called without context and guaranteed to succed once
+ * be safely called without context and guaranteed to succeed once
* allocated.
*/
void __iomem * const *pcim_iomap_table(struct pci_dev *pdev)
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 27f16872320d..cd3387bb34e5 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -5,6 +5,7 @@
*/
#include <linux/kernel.h>
+#include <linux/buildid.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
@@ -36,6 +37,14 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...)
va_end(args);
}
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
+#define BUILD_ID_FMT " %20phN"
+#define BUILD_ID_VAL vmlinux_build_id
+#else
+#define BUILD_ID_FMT "%s"
+#define BUILD_ID_VAL ""
+#endif
+
/**
* dump_stack_print_info - print generic debug info for dump_stack()
* @log_lvl: log level
@@ -45,13 +54,13 @@ void __init dump_stack_set_arch_desc(const char *fmt, ...)
*/
void dump_stack_print_info(const char *log_lvl)
{
- printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s\n",
+ printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s" BUILD_ID_FMT "\n",
log_lvl, raw_smp_processor_id(), current->pid, current->comm,
kexec_crash_loaded() ? "Kdump: loaded " : "",
print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
+ init_utsname()->version, BUILD_ID_VAL);
if (dump_stack_arch_desc_str[0] != '\0')
printk("%sHardware name: %s\n",
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index d3ce78298832..cb5abb42c16a 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -991,7 +991,7 @@ static int ddebug_dyndbg_param_cb(char *param, char *val,
ddebug_exec_queries((val ? val : "+p"), modname);
- return 0; /* query failure shouldnt stop module load */
+ return 0; /* query failure shouldn't stop module load */
}
/* handle both dyndbg and $module.dyndbg params at boot */
diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c
index b1678ed0017c..ae98ca17982e 100644
--- a/lib/fonts/font_pearl_8x8.c
+++ b/lib/fonts/font_pearl_8x8.c
@@ -3,7 +3,7 @@
/* */
/* Font file generated by cpi2fnt */
/* ------------------------------ */
-/* Combined with the alpha-numeric */
+/* Combined with the alphanumeric */
/* portion of Greg Harp's old PEARL */
/* font (from earlier versions of */
/* linux-m86k) by John Shifflett */
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 70dab9ac7827..12f5a347aa13 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -415,7 +415,7 @@ static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize)
)
/*
- * __kfifo_poke_n internal helper function for storeing the length of
+ * __kfifo_poke_n internal helper function for storing the length of
* the record into the fifo
*/
static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize)
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1e1e37762799..0fb59e92ca2d 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -104,7 +104,7 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head,
* @head: the list to sort
* @cmp: the elements comparison function
*
- * The comparison funtion @cmp must return > 0 if @a should sort after
+ * The comparison function @cmp must return > 0 if @a should sort after
* @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
* sort before @b *or* their original order should be preserved. It is
* always called with the element that came first in the input in @a,
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 1d051ef66afe..86029ad5ead4 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -619,7 +619,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
* Validates all attributes in the specified attribute stream against the
* specified policy. Validation depends on the validate flags passed, see
* &enum netlink_validation for more details on that.
- * See documenation of struct nla_policy for more details.
+ * See documentation of struct nla_policy for more details.
*
* Returns 0 on success or a negative error code.
*/
@@ -633,7 +633,7 @@ int __nla_validate(const struct nlattr *head, int len, int maxtype,
EXPORT_SYMBOL(__nla_validate);
/**
- * nla_policy_len - Determin the max. length of a policy
+ * nla_policy_len - Determine the max. length of a policy
* @policy: policy to use
* @n: number of policies
*
diff --git a/lib/oid_registry.c b/lib/oid_registry.c
index 3dfaa836e7c5..e592d48b1974 100644
--- a/lib/oid_registry.c
+++ b/lib/oid_registry.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL_GPL(parse_OID);
* @bufsize: The size of the buffer
*
* The OID is rendered into the buffer in "a.b.c.d" format and the number of
- * bytes is returned. -EBADMSG is returned if the data could not be intepreted
+ * bytes is returned. -EBADMSG is returned if the data could not be interpreted
* and -ENOBUFS if the buffer was too small.
*/
int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize)
diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c
index e5d4b3b2af81..6e77eb6d8e72 100644
--- a/lib/pldmfw/pldmfw.c
+++ b/lib/pldmfw/pldmfw.c
@@ -82,7 +82,7 @@ pldm_check_fw_space(struct pldmfw_priv *data, size_t offset, size_t length)
* @bytes_to_move: number of bytes to move the offset forward by
*
* Check that there is enough space past the current offset, and then move the
- * offset forward by this ammount.
+ * offset forward by this amount.
*
* Returns: zero on success, or -EFAULT if the image is too small to fit the
* expected length.
diff --git a/lib/reed_solomon/test_rslib.c b/lib/reed_solomon/test_rslib.c
index 4eb29f365ece..d9d1c33aebda 100644
--- a/lib/reed_solomon/test_rslib.c
+++ b/lib/reed_solomon/test_rslib.c
@@ -385,7 +385,7 @@ static void test_bc(struct rs_control *rs, int len, int errs,
/*
* We check that the returned word is actually a
- * codeword. The obious way to do this would be to
+ * codeword. The obvious way to do this would be to
* compute the syndrome, but we don't want to replicate
* that code here. However, all the codes are in
* systematic form, and therefore we can encode the
diff --git a/lib/refcount.c b/lib/refcount.c
index ebac8b7d15a7..a207a8f22b3c 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -164,7 +164,7 @@ EXPORT_SYMBOL(refcount_dec_and_lock);
* @flags: saved IRQ-flags if the is acquired
*
* Same as refcount_dec_and_lock() above except that the spinlock is acquired
- * with disabled interupts.
+ * with disabled interrupts.
*
* Return: true and hold spinlock if able to decrement refcount to 0, false
* otherwise
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c949c1e3b87c..e12bbfb240b8 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -703,7 +703,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
*
* Returns zero if successful.
*
- * Returns -EAGAIN if resize event occured. Note that the iterator
+ * Returns -EAGAIN if resize event occurred. Note that the iterator
* will rewind back to the beginning and you may use it immediately
* by calling rhashtable_walk_next.
*
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 47b3691058eb..b25db9be938a 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -583,7 +583,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
/*
* Once the clear bit is set, the bit may be allocated out.
*
- * Orders READ/WRITE on the asssociated instance(such as request
+ * Orders READ/WRITE on the associated instance(such as request
* of blk_mq) by this bit for avoiding race with re-allocation,
* and its pair is the memory barrier implied in __sbitmap_get_word.
*
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a59778946404..27efa6178153 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -38,7 +38,7 @@ EXPORT_SYMBOL(sg_next);
* @sg: The scatterlist
*
* Description:
- * Allows to know how many entries are in sg, taking into acount
+ * Allows to know how many entries are in sg, taking into account
* chaining as well
*
**/
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(sg_nents);
*
* Description:
* Determines the number of entries in sg that are required to meet
- * the supplied length, taking into acount chaining as well
+ * the supplied length, taking into account chaining as well
*
* Returns:
* the number of sg entries needed, negative error on failure
diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 6dafde851333..0a68f7aa85d6 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -289,7 +289,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
}
/**
- * seq_buf_to_user - copy the squence buffer to user space
+ * seq_buf_to_user - copy the sequence buffer to user space
* @s: seq_buf descriptor
* @ubuf: The userspace memory location to copy to
* @cnt: The amount to copy
diff --git a/lib/sort.c b/lib/sort.c
index 3ad454411997..aa18153864d2 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -51,7 +51,7 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
* which basically all CPUs have, to minimize loop overhead computations.
*
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
- * bottom of the loop, even though the zero flag is stil valid from the
+ * bottom of the loop, even though the zero flag is still valid from the
* subtract (since the intervening mov instructions don't alter the flags).
* Gcc 8.1.0 doesn't have that problem.
*/
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index df9179f4f441..0a2e417f83cb 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -11,7 +11,7 @@
* Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
* and free stacks repeat a lot, we save about 100x space.
* Stacks are never removed from depot, so we store them contiguously one after
- * another in a contiguos memory allocation.
+ * another in a contiguous memory allocation.
*
* Author: Alexander Potapenko <glider@google.com>
* Copyright (C) 2016 Google, Inc.
diff --git a/lib/test_bitops.c b/lib/test_bitops.c
index 471141ddd691..3b7bcbee84db 100644
--- a/lib/test_bitops.c
+++ b/lib/test_bitops.c
@@ -15,7 +15,7 @@
* get_count_order/long
*/
-/* use an enum because thats the most common BITMAP usage */
+/* use an enum because that's the most common BITMAP usage */
enum bitops_fun {
BITOPS_4 = 4,
BITOPS_7 = 7,
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 4dc4dcbecd12..d500320778c7 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1095,7 +1095,7 @@ static struct bpf_test tests[] = {
{
"RET_A",
.u.insns = {
- /* check that unitialized X and A contain zeros */
+ /* check that uninitialized X and A contain zeros */
BPF_STMT(BPF_MISC | BPF_TXA, 0),
BPF_STMT(BPF_RET | BPF_A, 0)
},
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index f5f54766cbc2..8f7b0b2f6e11 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -651,7 +651,7 @@ static void kasan_global_oob(struct kunit *test)
{
/*
* Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
- * from failing here and panicing the kernel, access the array via a
+ * from failing here and panicking the kernel, access the array via a
* volatile pointer, which will prevent the compiler from being able to
* determine the array bounds.
*
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 38c250fbace3..ce1589391413 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -286,7 +286,7 @@ static int tally_work_test(struct kmod_test_device_info *info)
* If this ran it means *all* tasks were created fine and we
* are now just collecting results.
*
- * Only propagate errors, do not override with a subsequent sucess case.
+ * Only propagate errors, do not override with a subsequent success case.
*/
static void tally_up_work(struct kmod_test_device *test_dev)
{
@@ -543,7 +543,7 @@ static int trigger_config_run(struct kmod_test_device *test_dev)
* wrong with the setup of the test. If the test setup went fine
* then userspace must just check the result of config->test_result.
* One issue with relying on the return from a call in the kernel
- * is if the kernel returns a possitive value using this trigger
+ * is if the kernel returns a positive value using this trigger
* will not return the value to userspace, it would be lost.
*
* By not relying on capturing the return value of tests we are using
@@ -585,7 +585,7 @@ trigger_config_store(struct device *dev,
* Note: any return > 0 will be treated as success
* and the error value will not be available to userspace.
* Do not rely on trying to send to userspace a test value
- * return value as possitive return errors will be lost.
+ * return value as positive return errors will be lost.
*/
if (WARN_ON(ret > 0))
return -EINVAL;
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index 48ff5747a4da..84fe09eaf55e 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -600,7 +600,7 @@ static void __init numbers_prefix_overflow(void)
/*
* 0x prefix in a field of width 2 using %i conversion: first field
* converts to 0. Next field scan starts at the character after "0x",
- * which will convert if can be intepreted as decimal but will fail
+ * which will convert if can be interpreted as decimal but will fail
* if it contains any hex digits (since no 0x prefix).
*/
test_number_prefix(long long, "0x67", "%2lli%lli", 0, 67, 2, check_ll);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2926cc27623f..26c83943748a 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -993,8 +993,12 @@ char *symbol_string(char *buf, char *end, void *ptr,
value = (unsigned long)ptr;
#ifdef CONFIG_KALLSYMS
- if (*fmt == 'B')
+ if (*fmt == 'B' && fmt[1] == 'b')
+ sprint_backtrace_build_id(sym, value);
+ else if (*fmt == 'B')
sprint_backtrace(sym, value);
+ else if (*fmt == 'S' && (fmt[1] == 'b' || (fmt[1] == 'R' && fmt[2] == 'b')))
+ sprint_symbol_build_id(sym, value);
else if (*fmt != 's')
sprint_symbol(sym, value);
else
@@ -2263,9 +2267,11 @@ early_param("no_hash_pointers", no_hash_pointers_enable);
* - 'S' For symbolic direct pointers (or function descriptors) with offset
* - 's' For symbolic direct pointers (or function descriptors) without offset
* - '[Ss]R' as above with __builtin_extract_return_addr() translation
+ * - 'S[R]b' as above with module build ID (for use in backtraces)
* - '[Ff]' %pf and %pF were obsoleted and later removed in favor of
* %ps and %pS. Be careful when re-using these specifiers.
* - 'B' For backtraced symbolic direct pointers with offset
+ * - 'Bb' as above with module build ID (for use in backtraces)
* - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
* - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
* - 'b[l]' For a bitmap, the number of bits is determined by the field
@@ -3417,7 +3423,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
while (*fmt) {
/* skip any white space in format */
- /* white space in format matchs any amount of
+ /* white space in format matches any amount of
* white space, including none, in the input.
*/
if (isspace(*fmt)) {
diff --git a/mm/Kconfig b/mm/Kconfig
index a02498c0e13d..40a9bfcd5062 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -885,4 +885,8 @@ config KMAP_LOCAL
# struct io_mapping based helper. Selected by drivers that need them
config IO_MAPPING
bool
+
+config SECRETMEM
+ def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+
endmenu
diff --git a/mm/Makefile b/mm/Makefile
index 74b47c354682..e3436741d539 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -113,6 +113,7 @@ obj-$(CONFIG_CMA) += cma.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
+obj-$(CONFIG_SECRETMEM) += secretmem.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
diff --git a/mm/gup.c b/mm/gup.c
index 728d996767cb..42b8b1fa6521 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -10,6 +10,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/secretmem.h>
#include <linux/sched/signal.h>
#include <linux/rwsem.h>
@@ -855,6 +856,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
struct follow_page_context ctx = { NULL };
struct page *page;
+ if (vma_is_secretmem(vma))
+ return NULL;
+
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -988,6 +992,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
return -EOPNOTSUPP;
+ if (vma_is_secretmem(vma))
+ return -EFAULT;
+
if (write) {
if (!(vm_flags & VM_WRITE)) {
if (!(gup_flags & FOLL_FORCE))
@@ -2170,6 +2177,11 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
if (!head)
goto pte_unmap;
+ if (unlikely(page_is_secretmem(page))) {
+ put_compound_head(head, 1, flags);
+ goto pte_unmap;
+ }
+
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_compound_head(head, 1, flags);
goto pte_unmap;
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 153162669f80..b4a6f38fb51d 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -40,3 +40,12 @@ struct mm_struct init_mm = {
.cpu_bitmap = CPU_BITS_NONE,
INIT_MM_CONTEXT(init_mm)
};
+
+void setup_initial_init_mm(void *start_code, void *end_code,
+ void *end_data, void *brk)
+{
+ init_mm.start_code = (unsigned long)start_code;
+ init_mm.end_code = (unsigned long)end_code;
+ init_mm.end_data = (unsigned long)end_data;
+ init_mm.brk = (unsigned long)brk;
+}
diff --git a/mm/internal.h b/mm/internal.h
index 2d7c9a2e0118..31ff935b2547 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -360,6 +360,9 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
extern void mlock_vma_page(struct page *page);
extern unsigned int munlock_vma_page(struct page *page);
+extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
+ unsigned long len);
+
/*
* Clear the page's PageMlocked(). This can be useful in a situation where
* we want to unconditionally remove a page from the pagecache -- e.g.,
diff --git a/mm/mlock.c b/mm/mlock.c
index 0d639bf48794..16d2ee160d43 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -23,6 +23,7 @@
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
+#include <linux/secretmem.h>
#include "internal.h"
@@ -503,7 +504,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
- vma_is_dax(vma))
+ vma_is_dax(vma) || vma_is_secretmem(vma))
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index aa9de981b659..ca54d36d203a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1352,9 +1352,8 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
return hint;
}
-static inline int mlock_future_check(struct mm_struct *mm,
- unsigned long flags,
- unsigned long len)
+int mlock_future_check(struct mm_struct *mm, unsigned long flags,
+ unsigned long len)
{
unsigned long locked, lock_limit;
diff --git a/mm/mremap.c b/mm/mremap.c
index a369a6100698..5989d3990020 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -25,7 +25,8 @@
#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/pgalloc.h>
#include "internal.h"
@@ -209,6 +210,15 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma);
}
+#ifndef arch_supports_page_table_move
+#define arch_supports_page_table_move arch_supports_page_table_move
+static inline bool arch_supports_page_table_move(void)
+{
+ return IS_ENABLED(CONFIG_HAVE_MOVE_PMD) ||
+ IS_ENABLED(CONFIG_HAVE_MOVE_PUD);
+}
+#endif
+
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
@@ -217,6 +227,8 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
struct mm_struct *mm = vma->vm_mm;
pmd_t pmd;
+ if (!arch_supports_page_table_move())
+ return false;
/*
* The destination pmd shouldn't be established, free_pgtables()
* should have released it.
@@ -258,8 +270,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
VM_BUG_ON(!pmd_none(*new_pmd));
- /* Set the new pmd */
- set_pmd_at(mm, new_addr, new_pmd, pmd);
+ pmd_populate(mm, new_pmd, pmd_pgtable(pmd));
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
@@ -276,7 +287,7 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
}
#endif
-#ifdef CONFIG_HAVE_MOVE_PUD
+#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD)
static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
{
@@ -284,6 +295,8 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
struct mm_struct *mm = vma->vm_mm;
pud_t pud;
+ if (!arch_supports_page_table_move())
+ return false;
/*
* The destination pud shouldn't be established, free_pgtables()
* should have released it.
@@ -306,8 +319,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
VM_BUG_ON(!pud_none(*new_pud));
- /* Set the new pud */
- set_pud_at(mm, new_addr, new_pud, pud);
+ pud_populate(mm, new_pud, pud_pgtable(pud));
flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
@@ -324,10 +336,61 @@ static inline bool move_normal_pud(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ spinlock_t *old_ptl, *new_ptl;
+ struct mm_struct *mm = vma->vm_mm;
+ pud_t pud;
+
+ /*
+ * The destination pud shouldn't be established, free_pgtables()
+ * should have released it.
+ */
+ if (WARN_ON_ONCE(!pud_none(*new_pud)))
+ return false;
+
+ /*
+ * We don't have to worry about the ordering of src and dst
+ * ptlocks because exclusive mmap_lock prevents deadlock.
+ */
+ old_ptl = pud_lock(vma->vm_mm, old_pud);
+ new_ptl = pud_lockptr(mm, new_pud);
+ if (new_ptl != old_ptl)
+ spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+
+ /* Clear the pud */
+ pud = *old_pud;
+ pud_clear(old_pud);
+
+ VM_BUG_ON(!pud_none(*new_pud));
+
+ /* Set the new pud */
+ /* mark soft_ditry when we add pud level soft dirty support */
+ set_pud_at(mm, new_addr, new_pud, pud);
+ flush_pud_tlb_range(vma, old_addr, old_addr + HPAGE_PUD_SIZE);
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
+ spin_unlock(old_ptl);
+
+ return true;
+}
+#else
+static bool move_huge_pud(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
+{
+ WARN_ON_ONCE(1);
+ return false;
+
+}
+#endif
+
enum pgt_entry {
NORMAL_PMD,
HPAGE_PMD,
NORMAL_PUD,
+ HPAGE_PUD,
};
/*
@@ -347,6 +410,7 @@ static __always_inline unsigned long get_extent(enum pgt_entry entry,
mask = PMD_MASK;
size = PMD_SIZE;
break;
+ case HPAGE_PUD:
case NORMAL_PUD:
mask = PUD_MASK;
size = PUD_SIZE;
@@ -395,6 +459,12 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
move_huge_pmd(vma, old_addr, new_addr, old_entry,
new_entry);
break;
+ case HPAGE_PUD:
+ moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ move_huge_pud(vma, old_addr, new_addr, old_entry,
+ new_entry);
+ break;
+
default:
WARN_ON_ONCE(1);
break;
@@ -414,6 +484,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long extent, old_end;
struct mmu_notifier_range range;
pmd_t *old_pmd, *new_pmd;
+ pud_t *old_pud, *new_pud;
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
@@ -429,17 +500,24 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
* PUD level if possible.
*/
extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
- if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
- pud_t *old_pud, *new_pud;
- old_pud = get_old_pud(vma->vm_mm, old_addr);
- if (!old_pud)
+ old_pud = get_old_pud(vma->vm_mm, old_addr);
+ if (!old_pud)
+ continue;
+ new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
+ if (!new_pud)
+ break;
+ if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
+ if (extent == HPAGE_PUD_SIZE) {
+ move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr,
+ old_pud, new_pud, need_rmap_locks);
+ /* We ignore and continue on error? */
continue;
- new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
- if (!new_pud)
- break;
+ }
+ } else if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
+
if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
- old_pud, new_pud, need_rmap_locks))
+ old_pud, new_pud, true))
continue;
}
@@ -466,7 +544,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
* moving at the PMD level if possible.
*/
if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr,
- old_pmd, new_pmd, need_rmap_locks))
+ old_pmd, new_pmd, true))
continue;
}
diff --git a/mm/secretmem.c b/mm/secretmem.c
new file mode 100644
index 000000000000..f77d25467a14
--- /dev/null
+++ b/mm/secretmem.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2021
+ *
+ * Author: Mike Rapoport <rppt@linux.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/swap.h>
+#include <linux/mount.h>
+#include <linux/memfd.h>
+#include <linux/bitops.h>
+#include <linux/printk.h>
+#include <linux/pagemap.h>
+#include <linux/syscalls.h>
+#include <linux/pseudo_fs.h>
+#include <linux/secretmem.h>
+#include <linux/set_memory.h>
+#include <linux/sched/signal.h>
+
+#include <uapi/linux/magic.h>
+
+#include <asm/tlbflush.h>
+
+#include "internal.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "secretmem: " fmt
+
+/*
+ * Define mode and flag masks to allow validation of the system call
+ * parameters.
+ */
+#define SECRETMEM_MODE_MASK (0x0)
+#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
+
+static bool secretmem_enable __ro_after_init;
+module_param_named(enable, secretmem_enable, bool, 0400);
+MODULE_PARM_DESC(secretmem_enable,
+ "Enable secretmem and memfd_secret(2) system call");
+
+static atomic_t secretmem_users;
+
+bool secretmem_active(void)
+{
+ return !!atomic_read(&secretmem_users);
+}
+
+static vm_fault_t secretmem_fault(struct vm_fault *vmf)
+{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ pgoff_t offset = vmf->pgoff;
+ gfp_t gfp = vmf->gfp_mask;
+ unsigned long addr;
+ struct page *page;
+ int err;
+
+ if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+ return vmf_error(-EINVAL);
+
+retry:
+ page = find_lock_page(mapping, offset);
+ if (!page) {
+ page = alloc_page(gfp | __GFP_ZERO);
+ if (!page)
+ return VM_FAULT_OOM;
+
+ err = set_direct_map_invalid_noflush(page);
+ if (err) {
+ put_page(page);
+ return vmf_error(err);
+ }
+
+ __SetPageUptodate(page);
+ err = add_to_page_cache_lru(page, mapping, offset, gfp);
+ if (unlikely(err)) {
+ put_page(page);
+ /*
+ * If a split of large page was required, it
+ * already happened when we marked the page invalid
+ * which guarantees that this call won't fail
+ */
+ set_direct_map_default_noflush(page);
+ if (err == -EEXIST)
+ goto retry;
+
+ return vmf_error(err);
+ }
+
+ addr = (unsigned long)page_address(page);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ }
+
+ vmf->page = page;
+ return VM_FAULT_LOCKED;
+}
+
+static const struct vm_operations_struct secretmem_vm_ops = {
+ .fault = secretmem_fault,
+};
+
+static int secretmem_release(struct inode *inode, struct file *file)
+{
+ atomic_dec(&secretmem_users);
+ return 0;
+}
+
+static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long len = vma->vm_end - vma->vm_start;
+
+ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
+ return -EINVAL;
+
+ if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len))
+ return -EAGAIN;
+
+ vma->vm_flags |= VM_LOCKED | VM_DONTDUMP;
+ vma->vm_ops = &secretmem_vm_ops;
+
+ return 0;
+}
+
+bool vma_is_secretmem(struct vm_area_struct *vma)
+{
+ return vma->vm_ops == &secretmem_vm_ops;
+}
+
+static const struct file_operations secretmem_fops = {
+ .release = secretmem_release,
+ .mmap = secretmem_mmap,
+};
+
+static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode)
+{
+ return false;
+}
+
+static int secretmem_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ return -EBUSY;
+}
+
+static void secretmem_freepage(struct page *page)
+{
+ set_direct_map_default_noflush(page);
+ clear_highpage(page);
+}
+
+const struct address_space_operations secretmem_aops = {
+ .freepage = secretmem_freepage,
+ .migratepage = secretmem_migratepage,
+ .isolate_page = secretmem_isolate_page,
+};
+
+static struct vfsmount *secretmem_mnt;
+
+static struct file *secretmem_file_create(unsigned long flags)
+{
+ struct file *file = ERR_PTR(-ENOMEM);
+ struct inode *inode;
+
+ inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
+ O_RDWR, &secretmem_fops);
+ if (IS_ERR(file))
+ goto err_free_inode;
+
+ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ mapping_set_unevictable(inode->i_mapping);
+
+ inode->i_mapping->a_ops = &secretmem_aops;
+
+ /* pretend we are a normal file with zero size */
+ inode->i_mode |= S_IFREG;
+ inode->i_size = 0;
+
+ return file;
+
+err_free_inode:
+ iput(inode);
+ return file;
+}
+
+SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
+{
+ struct file *file;
+ int fd, err;
+
+ /* make sure local flags do not confict with global fcntl.h */
+ BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
+
+ if (!secretmem_enable)
+ return -ENOSYS;
+
+ if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
+ return -EINVAL;
+
+ fd = get_unused_fd_flags(flags & O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ file = secretmem_file_create(flags);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto err_put_fd;
+ }
+
+ file->f_flags |= O_LARGEFILE;
+
+ fd_install(fd, file);
+ atomic_inc(&secretmem_users);
+ return fd;
+
+err_put_fd:
+ put_unused_fd(fd);
+ return err;
+}
+
+static int secretmem_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type secretmem_fs = {
+ .name = "secretmem",
+ .init_fs_context = secretmem_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int secretmem_init(void)
+{
+ int ret = 0;
+
+ if (!secretmem_enable)
+ return ret;
+
+ secretmem_mnt = kern_mount(&secretmem_fs);
+ if (IS_ERR(secretmem_mnt))
+ ret = PTR_ERR(secretmem_mnt);
+
+ /* prevent secretmem mappings from ever getting PROT_EXEC */
+ secretmem_mnt->mnt_flags |= MNT_NOEXEC;
+
+ return ret;
+}
+fs_initcall(secretmem_init);
diff --git a/mm/slub.c b/mm/slub.c
index 2ee43ff667a5..dc863c1ea324 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -26,6 +26,7 @@
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
#include <linux/ctype.h>
+#include <linux/stackdepot.h>
#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
#include <linux/kfence.h>
@@ -220,8 +221,8 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define TRACK_ADDRS_COUNT 16
struct track {
unsigned long addr; /* Called from address */
-#ifdef CONFIG_STACKTRACE
- unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#ifdef CONFIG_STACKDEPOT
+ depot_stack_handle_t handle;
#endif
int cpu; /* Was running on cpu */
int pid; /* Pid context */
@@ -625,22 +626,27 @@ static struct track *get_track(struct kmem_cache *s, void *object,
return kasan_reset_tag(p + alloc);
}
+#ifdef CONFIG_STACKDEPOT
+static depot_stack_handle_t save_stack_depot_trace(gfp_t flags)
+{
+ unsigned long entries[TRACK_ADDRS_COUNT];
+ depot_stack_handle_t handle;
+ unsigned int nr_entries;
+
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 4);
+ handle = stack_depot_save(entries, nr_entries, flags);
+ return handle;
+}
+#endif
+
static void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr)
{
struct track *p = get_track(s, object, alloc);
if (addr) {
-#ifdef CONFIG_STACKTRACE
- unsigned int nr_entries;
-
- metadata_access_enable();
- nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
- TRACK_ADDRS_COUNT, 3);
- metadata_access_disable();
-
- if (nr_entries < TRACK_ADDRS_COUNT)
- p->addrs[nr_entries] = 0;
+#ifdef CONFIG_STACKDEPOT
+ p->handle = save_stack_depot_trace(GFP_NOWAIT);
#endif
p->addr = addr;
p->cpu = smp_processor_id();
@@ -667,14 +673,19 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
-#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_STACKDEPOT
{
- int i;
- for (i = 0; i < TRACK_ADDRS_COUNT; i++)
- if (t->addrs[i])
- pr_err("\t%pS\n", (void *)t->addrs[i]);
- else
- break;
+ depot_stack_handle_t handle;
+ unsigned long *entries;
+ unsigned int nr_entries;
+
+ handle = READ_ONCE(t->handle);
+ if (!handle) {
+ pr_err("object allocation/free stack trace missing\n");
+ } else {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ stack_trace_print(entries, nr_entries, 0);
+ }
}
#endif
}
@@ -4048,18 +4059,26 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
objp = fixup_red_left(s, objp);
trackp = get_track(s, objp, TRACK_ALLOC);
kpp->kp_ret = (void *)trackp->addr;
-#ifdef CONFIG_STACKTRACE
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_stack[i])
- break;
- }
+#ifdef CONFIG_STACKDEPOT
+ {
+ depot_stack_handle_t handle;
+ unsigned long *entries;
+ unsigned int nr_entries;
- trackp = get_track(s, objp, TRACK_FREE);
- for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
- kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
- if (!kpp->kp_free_stack[i])
- break;
+ handle = READ_ONCE(trackp->handle);
+ if (handle) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
+ kpp->kp_stack[i] = (void *)entries[i];
+ }
+
+ trackp = get_track(s, objp, TRACK_FREE);
+ handle = READ_ONCE(trackp->handle);
+ if (handle) {
+ nr_entries = stack_depot_fetch(handle, &entries);
+ for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
+ kpp->kp_free_stack[i] = (void *)entries[i];
+ }
}
#endif
#endif
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index a18b47695f55..b7609958ee36 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -40,6 +40,10 @@ cat << EOF
#define __IGNORE_setrlimit /* setrlimit */
#endif
+#ifndef __ARCH_WANT_MEMFD_SECRET
+#define __IGNORE_memfd_secret
+#endif
+
/* Missing flags argument */
#define __IGNORE_renameat /* renameat2 */
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
index 90398347e366..5fbad61fe490 100755
--- a/scripts/decode_stacktrace.sh
+++ b/scripts/decode_stacktrace.sh
@@ -3,11 +3,10 @@
# (c) 2014, Sasha Levin <sasha.levin@oracle.com>
#set -x
-if [[ $# < 1 ]]; then
+usage() {
echo "Usage:"
- echo " $0 -r <release> | <vmlinux> [base path] [modules path]"
- exit 1
-fi
+ echo " $0 -r <release> | <vmlinux> [<base path>|auto] [<modules path>]"
+}
if [[ $1 == "-r" ]] ; then
vmlinux=""
@@ -24,6 +23,7 @@ if [[ $1 == "-r" ]] ; then
if [[ $vmlinux == "" ]] ; then
echo "ERROR! vmlinux image for release $release is not found" >&2
+ usage
exit 2
fi
else
@@ -31,12 +31,35 @@ else
basepath=${2-auto}
modpath=$3
release=""
+ debuginfod=
+
+ # Can we use debuginfod-find?
+ if type debuginfod-find >/dev/null 2>&1 ; then
+ debuginfod=${1-only}
+ fi
+
+ if [[ $vmlinux == "" && -z $debuginfod ]] ; then
+ echo "ERROR! vmlinux image must be specified" >&2
+ usage
+ exit 1
+ fi
fi
declare -A cache
declare -A modcache
find_module() {
+ if [[ -n $debuginfod ]] ; then
+ if [[ -n $modbuildid ]] ; then
+ debuginfod-find debuginfo $modbuildid && return
+ fi
+
+ # Only using debuginfod so don't try to find vmlinux module path
+ if [[ $debuginfod == "only" ]] ; then
+ return
+ fi
+ fi
+
if [[ "$modpath" != "" ]] ; then
for fn in $(find "$modpath" -name "${module//_/[-_]}.ko*") ; do
if readelf -WS "$fn" | grep -qwF .debug_line ; then
@@ -51,7 +74,7 @@ find_module() {
find_module && return
if [[ $release == "" ]] ; then
- release=$(gdb -ex 'print init_uts_ns.name.release' -ex 'quit' -quiet -batch "$vmlinux" | sed -n 's/\$1 = "\(.*\)".*/\1/p')
+ release=$(gdb -ex 'print init_uts_ns.name.release' -ex 'quit' -quiet -batch "$vmlinux" 2>/dev/null | sed -n 's/\$1 = "\(.*\)".*/\1/p')
fi
for dn in {/usr/lib/debug,}/lib/modules/$release ; do
@@ -105,7 +128,7 @@ parse_symbol() {
if [[ "${cache[$module,$name]+isset}" == "isset" ]]; then
local base_addr=${cache[$module,$name]}
else
- local base_addr=$(nm "$objfile" | awk '$3 == "'$name'" && ($2 == "t" || $2 == "T") {print $1; exit}')
+ local base_addr=$(nm "$objfile" 2>/dev/null | awk '$3 == "'$name'" && ($2 == "t" || $2 == "T") {print $1; exit}')
if [[ $base_addr == "" ]] ; then
# address not found
return
@@ -129,7 +152,7 @@ parse_symbol() {
if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then
local code=${cache[$module,$address]}
else
- local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address")
+ local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address" 2>/dev/null)
cache[$module,$address]=$code
fi
@@ -150,6 +173,27 @@ parse_symbol() {
symbol="$segment$name ($code)"
}
+debuginfod_get_vmlinux() {
+ local vmlinux_buildid=${1##* }
+
+ if [[ $vmlinux != "" ]]; then
+ return
+ fi
+
+ if [[ $vmlinux_buildid =~ ^[0-9a-f]+ ]]; then
+ vmlinux=$(debuginfod-find debuginfo $vmlinux_buildid)
+ if [[ $? -ne 0 ]] ; then
+ echo "ERROR! vmlinux image not found via debuginfod-find" >&2
+ usage
+ exit 2
+ fi
+ return
+ fi
+ echo "ERROR! Build ID for vmlinux not found. Try passing -r or specifying vmlinux" >&2
+ usage
+ exit 2
+}
+
decode_code() {
local scripts=`dirname "${BASH_SOURCE[0]}"`
@@ -157,6 +201,14 @@ decode_code() {
}
handle_line() {
+ if [[ $basepath == "auto" && $vmlinux != "" ]] ; then
+ module=""
+ symbol="kernel_init+0x0/0x0"
+ parse_symbol
+ basepath=${symbol#kernel_init (}
+ basepath=${basepath%/init/main.c:*)}
+ fi
+
local words
# Tokenize
@@ -182,16 +234,28 @@ handle_line() {
fi
done
+ if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+ words[$last-1]="${words[$last-1]} ${words[$last]}"
+ unset words[$last]
+ last=$(( $last - 1 ))
+ fi
+
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
module=${module#\[}
module=${module%\]}
+ modbuildid=${module#* }
+ module=${module% *}
+ if [[ $modbuildid == $module ]]; then
+ modbuildid=
+ fi
symbol=${words[$last-1]}
unset words[$last-1]
else
# The symbol is the last element, process it
symbol=${words[$last]}
module=
+ modbuildid=
fi
unset words[$last]
@@ -201,14 +265,6 @@ handle_line() {
echo "${words[@]}" "$symbol $module"
}
-if [[ $basepath == "auto" ]] ; then
- module=""
- symbol="kernel_init+0x0/0x0"
- parse_symbol
- basepath=${symbol#kernel_init (}
- basepath=${basepath%/init/main.c:*)}
-fi
-
while read line; do
# Let's see if we have an address in the line
if [[ $line =~ \[\<([^]]+)\>\] ]] ||
@@ -218,6 +274,9 @@ while read line; do
# Is it a code line?
elif [[ $line == *Code:* ]]; then
decode_code "$line"
+ # Is it a version line?
+ elif [[ -n $debuginfod && $line =~ PID:\ [0-9]+\ Comm: ]]; then
+ debuginfod_get_vmlinux "$line"
else
# Nothing special in this line, show it as is
echo "$line"
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index d683a49d07d5..f0fd80ef17df 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -24,5 +24,6 @@ va_128TBswitch
map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
+memfd_secret
local_config.*
split_huge_page_test
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 812bc03e3142..521243770f26 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -35,6 +35,7 @@ TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += memfd_secret
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += mremap_dontunmap
@@ -135,7 +136,7 @@ warn_32bit_failure:
endif
endif
-$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
+$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c
new file mode 100644
index 000000000000..93e7e7ffed33
--- /dev/null
+++ b/tools/testing/selftests/vm/memfd_secret.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2021
+ *
+ * Author: Mike Rapoport <rppt@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "../kselftest.h"
+
+#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
+#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
+#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+
+#ifdef __NR_memfd_secret
+
+#define PATTERN 0x55
+
+static const int prot = PROT_READ | PROT_WRITE;
+static const int mode = MAP_SHARED;
+
+static unsigned long page_size;
+static unsigned long mlock_limit_cur;
+static unsigned long mlock_limit_max;
+
+static int memfd_secret(unsigned int flags)
+{
+ return syscall(__NR_memfd_secret, flags);
+}
+
+static void test_file_apis(int fd)
+{
+ char buf[64];
+
+ if ((read(fd, buf, sizeof(buf)) >= 0) ||
+ (write(fd, buf, sizeof(buf)) >= 0) ||
+ (pread(fd, buf, sizeof(buf), 0) >= 0) ||
+ (pwrite(fd, buf, sizeof(buf), 0) >= 0))
+ fail("unexpected file IO\n");
+ else
+ pass("file IO is blocked as expected\n");
+}
+
+static void test_mlock_limit(int fd)
+{
+ size_t len;
+ char *mem;
+
+ len = mlock_limit_cur;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("unable to mmap secret memory\n");
+ return;
+ }
+ munmap(mem, len);
+
+ len = mlock_limit_max * 2;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem != MAP_FAILED) {
+ fail("unexpected mlock limit violation\n");
+ munmap(mem, len);
+ return;
+ }
+
+ pass("mlock limit is respected\n");
+}
+
+static void try_process_vm_read(int fd, int pipefd[2])
+{
+ struct iovec liov, riov;
+ char buf[64];
+ char *mem;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
+ }
+
+ liov.iov_len = riov.iov_len = sizeof(buf);
+ liov.iov_base = buf;
+ riov.iov_base = mem;
+
+ if (process_vm_readv(getppid(), &liov, 1, &riov, 1, 0) < 0) {
+ if (errno == ENOSYS)
+ exit(KSFT_SKIP);
+ exit(KSFT_PASS);
+ }
+
+ exit(KSFT_FAIL);
+}
+
+static void try_ptrace(int fd, int pipefd[2])
+{
+ pid_t ppid = getppid();
+ int status;
+ char *mem;
+ long ret;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ perror("pipe write");
+ exit(KSFT_FAIL);
+ }
+
+ ret = ptrace(PTRACE_ATTACH, ppid, 0, 0);
+ if (ret) {
+ perror("ptrace_attach");
+ exit(KSFT_FAIL);
+ }
+
+ ret = waitpid(ppid, &status, WUNTRACED);
+ if ((ret != ppid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitppid result %ld stat %x\n",
+ ret, status);
+ exit(KSFT_FAIL);
+ }
+
+ if (ptrace(PTRACE_PEEKDATA, ppid, mem, 0))
+ exit(KSFT_PASS);
+
+ exit(KSFT_FAIL);
+}
+
+static void check_child_status(pid_t pid, const char *name)
+{
+ int status;
+
+ waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == KSFT_SKIP) {
+ skip("%s is not supported\n", name);
+ return;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == KSFT_PASS) ||
+ WIFSIGNALED(status)) {
+ pass("%s is blocked as expected\n", name);
+ return;
+ }
+
+ fail("%s: unexpected memory access\n", name);
+}
+
+static void test_remote_access(int fd, const char *name,
+ void (*func)(int fd, int pipefd[2]))
+{
+ int pipefd[2];
+ pid_t pid;
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fail("fork failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (pid == 0) {
+ func(fd, pipefd);
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ return;
+ }
+
+ ftruncate(fd, page_size);
+ memset(mem, PATTERN, page_size);
+
+ if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ return;
+ }
+
+ check_child_status(pid, name);
+}
+
+static void test_process_vm_read(int fd)
+{
+ test_remote_access(fd, "process_vm_read", try_process_vm_read);
+}
+
+static void test_ptrace(int fd)
+{
+ test_remote_access(fd, "ptrace", try_ptrace);
+}
+
+static int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error");
+ return -2;
+ }
+
+ return 0;
+}
+
+static void prepare(void)
+{
+ struct rlimit rlim;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (!page_size)
+ ksft_exit_fail_msg("Failed to get page size %s\n",
+ strerror(errno));
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim))
+ ksft_exit_fail_msg("Unable to detect mlock limit: %s\n",
+ strerror(errno));
+
+ mlock_limit_cur = rlim.rlim_cur;
+ mlock_limit_max = rlim.rlim_max;
+
+ printf("page_size: %ld, mlock.soft: %ld, mlock.hard: %ld\n",
+ page_size, mlock_limit_cur, mlock_limit_max);
+
+ if (page_size > mlock_limit_cur)
+ mlock_limit_cur = page_size;
+ if (page_size > mlock_limit_max)
+ mlock_limit_max = page_size;
+
+ if (set_cap_limits(mlock_limit_max))
+ ksft_exit_fail_msg("Unable to set mlock limit: %s\n",
+ strerror(errno));
+}
+
+#define NUM_TESTS 4
+
+int main(int argc, char *argv[])
+{
+ int fd;
+
+ prepare();
+
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ fd = memfd_secret(0);
+ if (fd < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("memfd_secret is not supported\n");
+ else
+ ksft_exit_fail_msg("memfd_secret failed: %s\n",
+ strerror(errno));
+ }
+
+ test_mlock_limit(fd);
+ test_file_apis(fd);
+ test_process_vm_read(fd);
+ test_ptrace(fd);
+
+ close(fd);
+
+ ksft_exit(!ksft_get_fail_cnt());
+}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 9c391d016922..0624d1bd71b5 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -45,14 +45,15 @@ enum {
_4MB = 4ULL << 20,
_1GB = 1ULL << 30,
_2GB = 2ULL << 30,
- PTE = _4KB,
PMD = _2MB,
PUD = _1GB,
};
+#define PTE page_size
+
#define MAKE_TEST(source_align, destination_align, size, \
overlaps, should_fail, test_name) \
-{ \
+(struct test){ \
.name = test_name, \
.config = { \
.src_alignment = source_align, \
@@ -74,9 +75,10 @@ static void *get_source_mapping(struct config c)
retry:
addr += c.src_alignment;
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
- if (errno == EPERM)
+ if (errno == EPERM || errno == EEXIST)
goto retry;
goto error;
}
@@ -252,12 +254,17 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
return 0;
}
+#define MAX_TEST 13
+#define MAX_PERF_TEST 3
int main(int argc, char **argv)
{
int failures = 0;
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
+ struct test test_cases[MAX_TEST];
+ struct test perf_test_cases[MAX_PERF_TEST];
+ int page_size;
time_t t;
pattern_seed = (unsigned int) time(&t);
@@ -268,56 +275,59 @@ int main(int argc, char **argv)
ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
threshold_mb, pattern_seed);
- struct test test_cases[] = {
- /* Expected mremap failures */
- MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
- "mremap - Source and Destination Regions Overlapping"),
- MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
- "mremap - Destination Address Misaligned (1KB-aligned)"),
- MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
- "mremap - Source Address Misaligned (1KB-aligned)"),
-
- /* Src addr PTE aligned */
- MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
-
- /* Src addr 1MB aligned */
- MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
- MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
-
- /* Src addr PMD aligned */
- MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
- MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
- MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
-
- /* Src addr PUD aligned */
- MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
- MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
- MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
- MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
- };
-
- struct test perf_test_cases[] = {
- /*
- * mremap 1GB region - Page table level aligned time
- * comparison.
- */
- MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
- MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
- MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
- };
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Expected mremap failures */
+ test_cases[0] = MAKE_TEST(page_size, page_size, page_size,
+ OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source and Destination Regions Overlapping");
+
+ test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Destination Address Misaligned (1KB-aligned)");
+ test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source Address Misaligned (1KB-aligned)");
+
+ /* Src addr PTE aligned */
+ test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
+ NON_OVERLAPPING, EXPECT_SUCCESS,
+ "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
+
+ /* Src addr 1MB aligned */
+ test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
+ test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src addr PMD aligned */
+ test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
+ test_cases[7] = MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
+ test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
+
+ /* Src addr PUD aligned */
+ test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
+ test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
+ test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
+ test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PTE-aligned, Destination PTE-aligned");
+ /*
+ * mremap 1GB region - Page table level aligned time
+ * comparison.
+ */
+ perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
+ perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) ||
(threshold_mb * _1MB >= _1GB);
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 955782d138ab..d09a6b71f1e9 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -362,4 +362,21 @@ else
exitcode=1
fi
+echo "running memfd_secret test"
+echo "------------------------------------"
+./memfd_secret
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+exit $exitcode
+
exit $exitcode