diff options
Diffstat (limited to 'arch/riscv/kernel')
82 files changed, 4652 insertions, 1987 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 5e591f831638..c7b542573407 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,8 +9,8 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif -CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) -CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_table.o += $(call cc-disable-warning, override-init) +CFLAGS_compat_syscall_table.o += $(call cc-disable-warning, override-init) ifdef CONFIG_KEXEC_CORE AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) @@ -20,26 +20,34 @@ endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY CFLAGS_alternative.o := -mcmodel=medany CFLAGS_cpufeature.o := -mcmodel=medany +CFLAGS_sbi_ecall.o := -mcmodel=medany ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE) endif ifdef CONFIG_RELOCATABLE CFLAGS_alternative.o += -fno-pie CFLAGS_cpufeature.o += -fno-pie +CFLAGS_sbi_ecall.o += -fno-pie endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n +KASAN_SANITIZE_sbi_ecall.o := n +endif +ifdef CONFIG_FORTIFY_SOURCE +CFLAGS_alternative.o += -D__NO_FORTIFY +CFLAGS_cpufeature.o += -D__NO_FORTIFY +CFLAGS_sbi_ecall.o += -D__NO_FORTIFY endif endif -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o -obj-y += copy-unaligned.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -59,12 +67,19 @@ obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o +obj-y += vendor_extensions.o +obj-y += vendor_extensions/ obj-y += probes/ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o +obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o + obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_FPU) += kernel_mode_fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o @@ -83,7 +98,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o -obj-$(CONFIG_RISCV_SBI) += sbi.o +obj-$(CONFIG_RISCV_SBI) += sbi.o sbi_ecall.o ifeq ($(CONFIG_RISCV_SBI), y) obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o @@ -92,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o -obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o @@ -107,3 +122,6 @@ obj-$(CONFIG_COMPAT) += compat_vdso/ obj-$(CONFIG_64BIT) += pi/ obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o + +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls new file mode 100644 index 000000000000..9668fd1faf60 --- /dev/null +++ b/arch/riscv/kernel/Makefile.syscalls @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +syscall_abis_32 += riscv memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index e619edc8b0cc..3f6d5a6789e8 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -17,7 +17,9 @@ #include <linux/efi.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/of_fdt.h> #include <linux/pci.h> +#include <linux/serial_core.h> int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; @@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void) if (param_acpi_off || (!param_acpi_on && !param_acpi_force && efi.acpi20 == EFI_INVALID_TABLE_ADDR)) - return; + goto done; /* * ACPI is disabled at this point. Enable it in order to parse @@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void) if (!param_acpi_force) disable_acpi(); } + +done: + if (acpi_disabled) { + if (earlycon_acpi_spcr_enable) + early_init_dt_scan_chosen_stdout(); + } else { + acpi_parse_spcr(earlycon_acpi_spcr_enable, true); + } } static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end) @@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu) return &cpu_madt_rintc[cpu]; } -u32 get_acpi_id_for_cpu(int cpu) -{ - return acpi_cpu_get_madt_rintc(cpu)->uid; -} - /* * __acpi_map_table() will be called before paging_init(), so early_ioremap() * or early_memremap() should be called here to for ACPI table mapping. @@ -205,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) if (!size) return NULL; - return early_ioremap(phys, size); + return early_memremap(phys, size); } void __init __acpi_unmap_table(void __iomem *map, unsigned long size) @@ -213,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) if (!map || !size) return; - early_iounmap(map, size); + early_memunmap(map, size); } void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) @@ -300,35 +305,32 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) } } - return ioremap_prot(phys, size, pgprot_val(prot)); + return ioremap_prot(phys, size, prot); } #ifdef CONFIG_PCI /* - * These interfaces are defined just to enable building ACPI core. - * TODO: Update it with actual implementation when external interrupt - * controller support is added in RISC-V ACPI. + * raw_pci_read/write - Platform-specific PCI config space access. */ -int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *val) +int raw_pci_read(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *val) { - return PCIBIOS_DEVICE_NOT_FOUND; -} + struct pci_bus *b = pci_find_bus(domain, bus); -int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 val) -{ - return PCIBIOS_DEVICE_NOT_FOUND; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->read(b, devfn, reg, len, val); } -int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +int raw_pci_write(unsigned int domain, unsigned int bus, + unsigned int devfn, int reg, int len, u32 val) { - return -1; -} + struct pci_bus *b = pci_find_bus(domain, bus); -struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) -{ - return NULL; + if (!b) + return PCIBIOS_DEVICE_NOT_FOUND; + return b->ops->write(b, devfn, reg, len, val); } + #endif /* CONFIG_PCI */ diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c new file mode 100644 index 000000000000..130769e3a99c --- /dev/null +++ b/arch/riscv/kernel/acpi_numa.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACPI 6.6 based NUMA setup for RISCV + * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * Copyright (C) 2024 Intel Corporation. + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <asm/numa.h> + +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; + +static int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + +static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_rintc_affinity *pa; + int cpu, pxm, node; + + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_rintc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return 0; + + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm, + cpuid_to_hartid_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + int i; + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} + +/* Callback for Proximity Domain -> logical node ID mapping */ +void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED)) + return; + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + node_set(node, numa_nodes_parsed); +} diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 319a1da0358b..7eb3cb1215c6 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -18,7 +18,7 @@ #include <asm/sbi.h> #include <asm/csr.h> #include <asm/insn.h> -#include <asm/patch.h> +#include <asm/text-patching.h> struct cpu_manufacturer_info_t { unsigned long vendor_id; @@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_ANDES - case ANDESTECH_VENDOR_ID: + case ANDES_VENDOR_ID: cpu_mfr_info->patch_func = andes_errata_patch_func; break; #endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index a03129f40c46..6e8c0d6feae9 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -4,11 +4,10 @@ * Copyright (C) 2017 SiFive */ -#define GENERATING_ASM_OFFSETS - #include <linux/kbuild.h> #include <linux/mm.h> #include <linux/sched.h> +#include <linux/ftrace.h> #include <linux/suspend.h> #include <asm/kvm_host.h> #include <asm/thread_info.h> @@ -35,13 +34,20 @@ void asm_offsets(void) OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); - OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); + OFFSET(TASK_THREAD_SUM, task_struct, thread.sum); + + OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu); OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); #ifdef CONFIG_SHADOW_CALL_STACK OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp); #endif +#ifdef CONFIG_64BIT + OFFSET(TASK_TI_A0, task_struct, thread_info.a0); + OFFSET(TASK_TI_A1, task_struct, thread_info.a1); + OFFSET(TASK_TI_A2, task_struct, thread_info.a2); +#endif OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); @@ -341,6 +347,10 @@ void asm_offsets(void) offsetof(struct task_struct, thread.s[11]) - offsetof(struct task_struct, thread.ra) ); + DEFINE(TASK_THREAD_SUM_RA, + offsetof(struct task_struct, thread.sum) + - offsetof(struct task_struct, thread.ra) + ); DEFINE(TASK_THREAD_F0_F0, offsetof(struct task_struct, thread.fstate.f[0]) @@ -488,4 +498,34 @@ void asm_offsets(void) DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN)); OFFSET(STACKFRAME_FP, stackframe, fp); OFFSET(STACKFRAME_RA, stackframe, ra); +#ifdef CONFIG_FUNCTION_TRACER + DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func)); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call)); +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); +#ifdef CONFIG_CC_IS_CLANG + DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2)); + DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3)); + DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4)); + DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5)); + DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6)); +#endif + DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); +#endif } diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c new file mode 100644 index 000000000000..3655fe7d678c --- /dev/null +++ b/arch/riscv/kernel/bugs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/sprintf.h> + +#include <asm/bugs.h> +#include <asm/vendor_extensions/thead.h> + +static enum mitigation_state ghostwrite_state; + +void ghostwrite_set_vulnerable(void) +{ + ghostwrite_state = VULNERABLE; +} + +/* + * Vendor extension alternatives will use the value set at the time of boot + * alternative patching, thus this must be called before boot alternatives are + * patched (and after extension probing) to be effective. + * + * Returns true if mitgated, false otherwise. + */ +bool ghostwrite_enable_mitigation(void) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) && + ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) { + disable_xtheadvector(); + ghostwrite_state = MITIGATED; + return true; + } + + return false; +} + +enum mitigation_state ghostwrite_get_state(void) +{ + return ghostwrite_state; +} + +ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) { + switch (ghostwrite_state) { + case UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case MITIGATED: + return sprintf(buf, "Mitigation: xtheadvector disabled\n"); + case VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } + } else { + return sprintf(buf, "Not affected\n"); + } +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 09e9b88110d1..26b085dbdd07 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,6 +3,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/acpi.h> #include <linux/cpu.h> #include <linux/of.h> #include <asm/cacheinfo.h> @@ -64,27 +65,55 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) } static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, enum cache_type type, unsigned int level) { this_leaf->level = level; this_leaf->type = type; } +int init_cache_level(unsigned int cpu) +{ + return init_of_cache_level(cpu); +} + int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - struct device_node *np = of_cpu_device_node_get(cpu); - struct device_node *prev = NULL; + struct device_node *np, *prev; int levels = 1, level = 1; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (!acpi_disabled) { + int ret, fw_levels, split_levels; + + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels); + if (ret) + return ret; + + BUG_ON((split_levels > fw_levels) || + (split_levels + fw_levels > this_cpu_ci->num_leaves)); + + for (; level <= this_cpu_ci->num_levels; level++) { + if (level <= split_levels) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + } + } + return 0; + } + + np = of_cpu_device_node_get(cpu); + if (!np) + return -ENOENT; + + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; while ((np = of_find_next_cache_node(np))) { @@ -96,12 +125,12 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + if (of_property_present(np, "cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); + if (of_property_present(np, "i-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (of_property_present(np, "d-cache-size")) + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c index ad7f2d712f5f..e884c069e88f 100644 --- a/arch/riscv/kernel/compat_syscall_table.c +++ b/arch/riscv/kernel/compat_syscall_table.c @@ -8,9 +8,11 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, @@ -19,5 +21,5 @@ asmlinkage long compat_sys_rt_sigreturn(void); void * const compat_sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table_32.h> }; diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..24e37d1ef7ec 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -34,12 +34,6 @@ obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n - # Force dependency $(obj)/compat_vdso.o: $(obj)/compat_vdso.so @@ -58,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +gen-compat_vdsosym := $(src)/gen_compat_vdso_offsets.sh quiet_cmd_compat_vdsosym = VDSOSYM $@ cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ @@ -74,5 +68,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h index e3d70d35b708..85d4d11450cb 100644 --- a/arch/riscv/kernel/copy-unaligned.h +++ b/arch/riscv/kernel/copy-unaligned.h @@ -10,4 +10,9 @@ void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size); void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size); +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size); +void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size); +#endif + #endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */ diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index 28b58fc5ad19..a1e38ecfc8be 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); if (ret) - pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); + pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); } /* diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index d11d6320fb0d..f6b13e9f5e6c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -16,6 +16,7 @@ #include <asm/sbi.h> #include <asm/smp.h> #include <asm/pgtable.h> +#include <asm/vendor_extensions.h> bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { @@ -139,6 +140,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } +unsigned long __init riscv_get_marchid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->marchid = csr_read(CSR_MARCHID); +#else + ci->marchid = 0; +#endif + return ci->marchid; +} + +unsigned long __init riscv_get_mvendorid(void) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); +#else + ci->mvendorid = 0; +#endif + return ci->mvendorid; +} + DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) @@ -170,12 +199,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu) struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); #if IS_ENABLED(CONFIG_RISCV_SBI) - ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); - ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + if (!ci->mvendorid) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + if (!ci->marchid) + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); #elif IS_ENABLED(CONFIG_RISCV_M_MODE) - ci->mvendorid = csr_read(CSR_MVENDORID); - ci->marchid = csr_read(CSR_MARCHID); + if (!ci->mvendorid) + ci->mvendorid = csr_read(CSR_MVENDORID); + if (!ci->marchid) + ci->marchid = csr_read(CSR_MARCHID); ci->mimpid = csr_read(CSR_MIMPID); #else ci->mvendorid = 0; @@ -203,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) +#define ALL_CPUS -1 + +static void print_vendor_isa(struct seq_file *f, int cpu) +{ + struct riscv_isavendorinfo *vendor_bitmap; + struct riscv_isa_vendor_ext_data_list *ext_list; + const struct riscv_isa_ext_data *ext_data; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + ext_list = riscv_isa_vendor_ext_list[i]; + ext_data = riscv_isa_vendor_ext_list[i]->ext_data; + + if (cpu == ALL_CPUS) + vendor_bitmap = &ext_list->all_harts_isa_bitmap; + else + vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id)) + continue; + + seq_printf(f, "_%s", ext_data[j].name); + } + } +} + +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu) { if (IS_ENABLED(CONFIG_32BIT)) @@ -222,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) seq_printf(f, "%s", riscv_isa_ext[i].name); } + print_vendor_isa(f, cpu); + seq_puts(f, "\n"); } @@ -284,7 +345,7 @@ static int c_show(struct seq_file *m, void *v) * line. */ seq_puts(m, "isa\t\t: "); - print_isa(m, NULL); + print_isa(m, NULL, ALL_CPUS); print_mmu(m); if (acpi_disabled) { @@ -306,7 +367,7 @@ static int c_show(struct seq_file *m, void *v) * additional extensions not present across all harts. */ seq_puts(m, "hart isa\t: "); - print_isa(m, hart_isa[cpu_id].isa); + print_isa(m, hart_isa[cpu_id].isa, cpu_id); seq_puts(m, "\n"); return 0; diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 1cc7df740edd..e6fbaaf54956 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) /* Make sure tidle is updated */ smp_mb(); bdata->task_ptr = tidle; - bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + bdata->stack_ptr = task_pt_regs(tidle); /* Make sure boot data is updated */ smp_mb(); hsm_data = __pa(bdata); diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index 613872b0a21a..24869eb88908 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid, /* Make sure tidle is updated */ smp_mb(); - WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle)); WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 79a5a35fab96..743d53415572 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -11,30 +11,29 @@ #include <linux/cpu.h> #include <linux/cpuhotplug.h> #include <linux/ctype.h> -#include <linux/jump_label.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> #include <linux/of.h> #include <asm/acpi.h> #include <asm/alternative.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwcap.h> +#include <asm/text-patching.h> #include <asm/hwprobe.h> -#include <asm/patch.h> #include <asm/processor.h> #include <asm/sbi.h> #include <asm/vector.h> - -#include "copy-unaligned.h" +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) -#define MISALIGNED_ACCESS_JIFFIES_LG2 1 -#define MISALIGNED_BUFFER_SIZE 0x4000 -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) +static bool any_cpu_has_zicboz; +static bool any_cpu_has_zicbop; +static bool any_cpu_has_zicbom; unsigned long elf_hwcap __read_mostly; @@ -44,10 +43,7 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; -/* Performance information */ -DEFINE_PER_CPU(long, misaligned_access_speed); - -static cpumask_t fast_misaligned_access; +u32 thead_vlenb_of; /** * riscv_isa_extension_base() - Get base extension word @@ -59,9 +55,7 @@ static cpumask_t fast_misaligned_access; */ unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) { - if (!isa_bitmap) - return riscv_isa[0]; - return isa_bitmap[0]; + return !isa_bitmap ? riscv_isa[0] : isa_bitmap[0]; } EXPORT_SYMBOL_GPL(riscv_isa_extension_base); @@ -82,55 +76,208 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i if (bit >= RISCV_ISA_EXT_MAX) return false; - return test_bit(bit, bmap) ? true : false; + return test_bit(bit, bmap); } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); -static bool riscv_isa_extension_check(int id) +static int riscv_ext_f_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) { - switch (id) { - case RISCV_ISA_EXT_ZICBOM: - if (!riscv_cbom_block_size) { - pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cbom_block_size)) { - pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); - return false; - } - return true; - case RISCV_ISA_EXT_ZICBOZ: - if (!riscv_cboz_block_size) { - pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); - return false; - } else if (!is_power_of_2(riscv_cboz_block_size)) { - pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); - return false; - } - return true; - case RISCV_ISA_EXT_INVALID: - return false; + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + + any_cpu_has_zicbom = true; + return 0; +} + +static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cboz_block_size) { + pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n"); + return -EINVAL; } + if (!is_power_of_2(riscv_cboz_block_size)) { + pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicboz = true; + return 0; +} - return true; +static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!riscv_cbop_block_size) { + pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n"); + return -EINVAL; + } + if (!is_power_of_2(riscv_cbop_block_size)) { + pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n"); + return -EINVAL; + } + any_cpu_has_zicbop = true; + return 0; } -#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size) { \ - .name = #_name, \ - .property = #_name, \ - .id = _id, \ - .subset_ext_ids = _subset_exts, \ - .subset_ext_size = _subset_exts_size \ +static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * Due to extension ordering, d is checked before f, so no deferral + * is required. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) { + pr_warn_once("This kernel does not support systems with F but not D\n"); + return -EINVAL; + } + + return 0; } -#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0) +static int riscv_ext_d_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + return 0; +} -/* Used to declare pure "lasso" extension (Zk for instance) */ -#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \ - _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, ARRAY_SIZE(_bundled_exts)) +static int riscv_ext_vector_x_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; -/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */ -#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \ - _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts)) + return 0; +} + +static int riscv_ext_vector_float_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_FPU)) + return -EINVAL; + + /* + * The kernel doesn't support systems that don't implement both of + * F and D, so if any of the vector extensions that do floating point + * are to be usable, both floating point extensions need to be usable. + * + * Since this function validates vector only, and v/Zve* are probed + * after f/d, there's no need for a deferral here. + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return -EINVAL; + + return 0; +} + +static int riscv_ext_vector_crypto_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + /* + * It isn't the kernel's job to check that the binding is correct, so + * it should be enough to check that any of the vector extensions are + * enabled, which in-turn means that vector is usable in this kernel + */ + if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32X)) + return -EPROBE_DEFER; + + return 0; +} + +static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA)) + return 0; + + return -EPROBE_DEFER; +} +static int riscv_ext_zcd_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) + return -EINVAL; + + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32F)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_zvfbfwma_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZFBFMIN) && + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVFBFMIN)) + return 0; + + return -EPROBE_DEFER; +} + +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} + +static const unsigned int riscv_a_exts[] = { + RISCV_ISA_EXT_ZAAMO, + RISCV_ISA_EXT_ZALRSC, +}; static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, @@ -202,6 +349,40 @@ static const unsigned int riscv_zvbb_exts[] = { RISCV_ISA_EXT_ZVKB }; +#define RISCV_ISA_EXT_ZVE64F_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64X, \ + RISCV_ISA_EXT_ZVE32F, \ + RISCV_ISA_EXT_ZVE32X + +#define RISCV_ISA_EXT_ZVE64D_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64F, \ + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST + +#define RISCV_ISA_EXT_V_IMPLY_LIST \ + RISCV_ISA_EXT_ZVE64D, \ + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST + +static const unsigned int riscv_zve32f_exts[] = { + RISCV_ISA_EXT_ZVE32X +}; + +static const unsigned int riscv_zve64f_exts[] = { + RISCV_ISA_EXT_ZVE64F_IMPLY_LIST +}; + +static const unsigned int riscv_zve64d_exts[] = { + RISCV_ISA_EXT_ZVE64D_IMPLY_LIST +}; + +static const unsigned int riscv_v_exts[] = { + RISCV_ISA_EXT_V_IMPLY_LIST +}; + +static const unsigned int riscv_zve64x_exts[] = { + RISCV_ISA_EXT_ZVE32X, + RISCV_ISA_EXT_ZVE64X +}; + /* * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V * privileged ISA, the existence of the CSRs is implied by any extension which @@ -213,6 +394,21 @@ static const unsigned int riscv_xlinuxenvcfg_exts[] = { }; /* + * Zc* spec states that: + * - C always implies Zca + * - C+F implies Zcf (RV32 only) + * - C+D implies Zcd + * + * These extensions will be enabled and then validated depending on the + * availability of F/D RV32. + */ +static const unsigned int riscv_c_exts[] = { + RISCV_ISA_EXT_ZCA, + RISCV_ISA_EXT_ZCF, + RISCV_ISA_EXT_ZCD, +}; + +/* * The canonical order of ISA extension names in the ISA string is defined in * chapter 27 of the unprivileged specification. * @@ -254,15 +450,17 @@ static const unsigned int riscv_xlinuxenvcfg_exts[] = { const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i), __RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m), - __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a), - __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f), - __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d), + __RISCV_ISA_EXT_SUPERSET(a, RISCV_ISA_EXT_a, riscv_a_exts), + __RISCV_ISA_EXT_DATA_VALIDATE(f, RISCV_ISA_EXT_f, riscv_ext_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(d, RISCV_ISA_EXT_d, riscv_ext_d_validate), __RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q), - __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c), - __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v), + __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts), - __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), + __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), @@ -270,10 +468,21 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), + __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), + __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC), + __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), + __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends), __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH), __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN), + __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA), + __RISCV_ISA_EXT_DATA_VALIDATE(zcb, RISCV_ISA_EXT_ZCB, riscv_ext_zca_depends), + __RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends), __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC), @@ -292,63 +501,134 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED), __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH), __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO), - __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts), - __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvbc, RISCV_ISA_EXT_ZVBC, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zve32x, RISCV_ISA_EXT_ZVE32X, riscv_ext_vector_x_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts, riscv_ext_vector_float_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts, riscv_ext_vector_x_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfmin, RISCV_ISA_EXT_ZVFBFMIN, riscv_vector_f_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfwma, RISCV_ISA_EXT_ZVFBFWMA, riscv_ext_zvfbfwma_validate), __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH), __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN), - __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB), - __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG), - __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED), - __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts), - __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA), - __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB), - __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts), - __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts), - __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED), - __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH), - __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts), - __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkb, RISCV_ISA_EXT_ZVKB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkg, RISCV_ISA_EXT_ZVKG, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkn, riscv_zvkn_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvknc, riscv_zvknc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkned, RISCV_ISA_EXT_ZVKNED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkng, riscv_zvkng_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknha, RISCV_ISA_EXT_ZVKNHA, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvknhb, RISCV_ISA_EXT_ZVKNHB, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvks, riscv_zvks_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksc, riscv_zvksc_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksed, RISCV_ISA_EXT_ZVKSED, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvksh, RISCV_ISA_EXT_ZVKSH, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksg, riscv_zvksg_bundled_exts, riscv_ext_vector_crypto_validate), + __RISCV_ISA_EXT_DATA_VALIDATE(zvkt, RISCV_ISA_EXT_ZVKT, riscv_ext_vector_crypto_validate), __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), + __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM), + __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN), __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); -static void __init match_isa_ext(const struct riscv_isa_ext_data *ext, const char *name, - const char *name_end, struct riscv_isainfo *isainfo) +static void riscv_isa_set_ext(const struct riscv_isa_ext_data *ext, unsigned long *bitmap) { - if ((name_end - name == strlen(ext->name)) && - !strncasecmp(name, ext->name, name_end - name)) { - /* - * If this is a bundle, enable all the ISA extensions that - * comprise the bundle. - */ - if (ext->subset_ext_size) { - for (int i = 0; i < ext->subset_ext_size; i++) { - if (riscv_isa_extension_check(ext->subset_ext_ids[i])) - set_bit(ext->subset_ext_ids[i], isainfo->isa); + if (ext->id != RISCV_ISA_EXT_INVALID) + set_bit(ext->id, bitmap); + + for (int i = 0; i < ext->subset_ext_size; i++) { + if (ext->subset_ext_ids[i] != RISCV_ISA_EXT_INVALID) + set_bit(ext->subset_ext_ids[i], bitmap); + } +} + +static const struct riscv_isa_ext_data *riscv_get_isa_ext_data(unsigned int ext_id) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + if (riscv_isa_ext[i].id == ext_id) + return &riscv_isa_ext[i]; + } + + return NULL; +} + +/* + * "Resolve" a source ISA bitmap into one that matches kernel configuration as + * well as correct extension dependencies. Some extensions depends on specific + * kernel configuration to be usable (V needs CONFIG_RISCV_ISA_V for instance) + * and this function will actually validate all the extensions provided in + * source_isa into the resolved_isa based on extensions validate() callbacks. + */ +static void __init riscv_resolve_isa(unsigned long *source_isa, + unsigned long *resolved_isa, unsigned long *this_hwcap, + unsigned long *isa2hwcap) +{ + bool loop; + const struct riscv_isa_ext_data *ext; + DECLARE_BITMAP(prev_resolved_isa, RISCV_ISA_EXT_MAX); + int max_loop_count = riscv_isa_ext_count, ret; + unsigned int bit; + + do { + loop = false; + if (max_loop_count-- < 0) { + pr_err("Failed to reach a stable ISA state\n"); + return; + } + bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); + for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { + ext = riscv_get_isa_ext_data(bit); + + if (ext && ext->validate) { + ret = ext->validate(ext, resolved_isa); + if (ret == -EPROBE_DEFER) { + loop = true; + continue; + } else if (ret) { + /* Disable the extension entirely */ + clear_bit(bit, source_isa); + continue; + } } + + set_bit(bit, resolved_isa); + /* No need to keep it in source isa now that it is enabled */ + clear_bit(bit, source_isa); + + /* Single letter extensions get set in hwcap */ + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); +} - /* - * This is valid even for bundle extensions which uses the RISCV_ISA_EXT_INVALID id - * (rejected by riscv_isa_extension_check()). - */ - if (riscv_isa_extension_check(ext->id)) - set_bit(ext->id, isainfo->isa); +static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) +{ + for (int i = 0; i < riscv_isa_ext_count; i++) { + const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i]; + + if ((name_end - name == strlen(ext->name)) && + !strncasecmp(name, ext->name, name_end - name)) { + riscv_isa_set_ext(ext, bitmap); + break; + } } } -static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo, - unsigned long *isa2hwcap, const char *isa) +static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap) { /* * For all possible cpus, we have already validated in @@ -361,9 +641,24 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc while (*isa) { const char *ext = isa++; const char *ext_end = isa; - bool ext_long = false, ext_err = false; + bool ext_err = false; switch (*ext) { + case 'x': + case 'X': + if (acpi_disabled) + pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead."); + /* + * To skip an extension, we find its end. + * As multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + for (; *isa && *isa != '_'; ++isa) + ; + ext_err = true; + break; case 's': /* * Workaround for invalid single-letter 's' & 'u' (QEMU). @@ -379,8 +674,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc } fallthrough; case 'S': - case 'x': - case 'X': case 'z': case 'Z': /* @@ -401,7 +694,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc * character itself while eliminating the extensions version number. * A simple re-increment solves this problem. */ - ext_long = true; for (; *isa && *isa != '_'; ++isa) if (unlikely(!isalnum(*isa))) ext_err = true; @@ -481,17 +773,8 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc if (unlikely(ext_err)) continue; - if (!ext_long) { - int nr = tolower(*ext) - 'a'; - if (riscv_isa_extension_check(nr)) { - *this_hwcap |= isa2hwcap[nr]; - set_bit(nr, isainfo->isa); - } - } else { - for (int i = 0; i < riscv_isa_ext_count; i++) - match_isa_ext(&riscv_isa_ext[i], ext, ext_end, isainfo); - } + match_isa_ext(ext, ext_end, bitmap); } } @@ -503,6 +786,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) struct acpi_table_header *rhct; acpi_status status; unsigned int cpu; + u64 boot_vendorid; + u64 boot_archid; if (!acpi_disabled) { status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct); @@ -510,9 +795,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) return; } + boot_vendorid = riscv_get_mvendorid(); + boot_archid = riscv_get_marchid(); + for_each_possible_cpu(cpu) { struct riscv_isainfo *isainfo = &hart_isa[cpu]; unsigned long this_hwcap = 0; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; if (acpi_disabled) { node = of_cpu_device_node_get(cpu); @@ -535,7 +824,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) } } - riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa); + riscv_parse_isa_string(isa, source_isa); /* * These ones were as they were part of the base ISA when the @@ -543,10 +832,10 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * unconditionally where `i` is in riscv,isa on DT systems. */ if (acpi_disabled) { - set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa); - set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa); + set_bit(RISCV_ISA_EXT_ZICSR, source_isa); + set_bit(RISCV_ISA_EXT_ZIFENCEI, source_isa); + set_bit(RISCV_ISA_EXT_ZICNTR, source_isa); + set_bit(RISCV_ISA_EXT_ZIHPM, source_isa); } /* @@ -557,12 +846,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) * CPU cores with the ratified spec will contain non-zero * marchid. */ - if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID && - riscv_cached_marchid(cpu) == 0x0) { + if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) { this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; - clear_bit(RISCV_ISA_EXT_v, isainfo->isa); + clear_bit(RISCV_ISA_EXT_v, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't @@ -583,14 +873,111 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) acpi_put_table((struct acpi_table_header *)rhct); } +static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + for (int j = 0; j < ext_list->ext_data_count; j++) { + const struct riscv_isa_ext_data ext = ext_list->ext_data[j]; + struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu]; + + if (of_property_match_string(cpu_node, "riscv,isa-extensions", + ext.property) < 0) + continue; + + /* + * Assume that subset extensions are all members of the + * same vendor. + */ + if (ext.subset_ext_size) + for (int k = 0; k < ext.subset_ext_size; k++) + set_bit(ext.subset_ext_ids[k], isavendorinfo->isa); + + set_bit(ext.id, isavendorinfo->isa); + } + } +} + +/* + * Populate all_harts_isa_bitmap for each vendor with all of the extensions that + * are shared across CPUs for that vendor. + */ +static void __init riscv_fill_vendor_ext_list(int cpu) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT)) + return; + + for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) { + struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i]; + + if (!ext_list->is_initialized) { + bitmap_copy(ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + ext_list->is_initialized = true; + } else { + bitmap_and(ext_list->all_harts_isa_bitmap.isa, + ext_list->all_harts_isa_bitmap.isa, + ext_list->per_hart_isa_bitmap[cpu].isa, + RISCV_ISA_VENDOR_EXT_MAX); + } + } +} + +static int has_thead_homogeneous_vlenb(void) +{ + int cpu; + u32 prev_vlenb = 0; + u32 vlenb; + + /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) + return 0; + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) { + pr_warn("Unable to find cpu node\n"); + return -ENOENT; + } + + if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) { + of_node_put(cpu_node); + + if (prev_vlenb) + return -ENOENT; + continue; + } + + if (prev_vlenb && vlenb != prev_vlenb) { + of_node_put(cpu_node); + return -ENOENT; + } + + prev_vlenb = vlenb; + of_node_put(cpu_node); + } + + thead_vlenb_of = vlenb; + return 0; +} + static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) { unsigned int cpu; + bool mitigated; for_each_possible_cpu(cpu) { unsigned long this_hwcap = 0; struct device_node *cpu_node; struct riscv_isainfo *isainfo = &hart_isa[cpu]; + DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 }; cpu_node = of_cpu_device_node_get(cpu); if (!cpu_node) { @@ -610,22 +997,12 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) ext->property) < 0) continue; - if (ext->subset_ext_size) { - for (int j = 0; j < ext->subset_ext_size; j++) { - if (riscv_isa_extension_check(ext->subset_ext_ids[i])) - set_bit(ext->subset_ext_ids[j], isainfo->isa); - } - } - - if (riscv_isa_extension_check(ext->id)) { - set_bit(ext->id, isainfo->isa); - - /* Only single letter extensions get set in hwcap */ - if (strnlen(riscv_isa_ext[i].name, 2) == 1) - this_hwcap |= isa2hwcap[riscv_isa_ext[i].id]; - } + riscv_isa_set_ext(ext, source_isa); } + riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap); + riscv_fill_cpu_vendor_ext(cpu_node, cpu); + of_node_put(cpu_node); /* @@ -641,6 +1018,19 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap) bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX); + + riscv_fill_vendor_ext_list(cpu); + } + + /* + * Execute ghostwrite mitigation immediately after detecting extensions + * to disable xtheadvector if necessary. + */ + mitigated = ghostwrite_enable_mitigation(); + + if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) { + pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n"); + disable_xtheadvector(); } if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) @@ -695,15 +1085,12 @@ void __init riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - if (elf_hwcap & COMPAT_HWCAP_ISA_V) { - riscv_v_setup_vsize(); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) || + has_xtheadvector_no_alternatives()) { /* - * ISA string in device tree might have 'v' flag, but - * CONFIG_RISCV_ISA_V is disabled in kernel. - * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled. + * This cannot fail when called on the boot hart */ - if (!IS_ENABLED(CONFIG_RISCV_ISA_V)) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; + riscv_v_setup_vsize(); } memset(print_str, 0, sizeof(print_str)); @@ -731,251 +1118,21 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -static int check_unaligned_access(void *param) -{ - int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; - struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - - if (check_unaligned_access_emulated(cpu)) - return 0; - - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); - /* Unalign src as well, but differently (off by 1 + 2 = 3). */ - src = dst + (MISALIGNED_BUFFER_SIZE / 2); - src += 2; - word_cycles = -1ULL; - /* Do a warmup. */ - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - preempt_disable(); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - /* - * For a fixed amount of time, repeatedly try the function, and take - * the best time in cycles as the measurement. - */ - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - /* Ensure the CSR read can't reorder WRT to the copy. */ - mb(); - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - /* Ensure the copy ends before the end time is snapped. */ - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < word_cycles) - word_cycles = end_cycles - start_cycles; - } - - byte_cycles = -1ULL; - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - mb(); - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < byte_cycles) - byte_cycles = end_cycles - start_cycles; - } - - preempt_enable(); - - /* Don't divide by zero. */ - if (!word_cycles || !byte_cycles) { - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); - - return 0; - } - - if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; - - ratio = div_u64((byte_cycles * 100), word_cycles); - pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", - cpu, - ratio / 100, - ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - - per_cpu(misaligned_access_speed, cpu) = speed; - - /* - * Set the value of fast_misaligned_access of a CPU. These operations - * are atomic to avoid race conditions. - */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) - cpumask_set_cpu(cpu, &fast_misaligned_access); - else - cpumask_clear_cpu(cpu, &fast_misaligned_access); - - return 0; -} - -static void check_unaligned_access_nonboot_cpu(void *param) -{ - unsigned int cpu = smp_processor_id(); - struct page **pages = param; - - if (smp_processor_id() != 0) - check_unaligned_access(pages[cpu]); -} - -DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key); - -static void modify_unaligned_access_branches(cpumask_t *mask, int weight) -{ - if (cpumask_weight(mask) == weight) - static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key); - else - static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key); -} - -static void set_unaligned_access_static_branches_except_cpu(int cpu) -{ - /* - * Same as set_unaligned_access_static_branches, except excludes the - * given CPU from the result. When a CPU is hotplugged into an offline - * state, this function is called before the CPU is set to offline in - * the cpumask, and thus the CPU needs to be explicitly excluded. - */ - - cpumask_t fast_except_me; - - cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); - cpumask_clear_cpu(cpu, &fast_except_me); - - modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); -} - -static void set_unaligned_access_static_branches(void) -{ - /* - * This will be called after check_unaligned_access_all_cpus so the - * result of unaligned access speed for all CPUs will be available. - * - * To avoid the number of online cpus changing between reading - * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be - * held before calling this function. - */ - - cpumask_t fast_and_online; - - cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); - - modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); -} - -static int lock_and_set_unaligned_access_static_branch(void) -{ - cpus_read_lock(); - set_unaligned_access_static_branches(); - cpus_read_unlock(); - - return 0; -} - -arch_initcall_sync(lock_and_set_unaligned_access_static_branch); - -static int riscv_online_cpu(unsigned int cpu) -{ - static struct page *buf; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - goto exit; - - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; - } - - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); - -exit: - set_unaligned_access_static_branches(); - - return 0; -} - -static int riscv_offline_cpu(unsigned int cpu) -{ - set_unaligned_access_static_branches_except_cpu(cpu); - - return 0; -} - -/* Measure unaligned access on all CPUs present at boot in parallel. */ -static int check_unaligned_access_all_cpus(void) +void __init riscv_user_isa_enable(void) { - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), - GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - -out: - unaligned_emulation_finish(); - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} - -arch_initcall(check_unaligned_access_all_cpus); - -void riscv_user_isa_enable(void) -{ - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_ENVCFG, ENVCFG_CBZE); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + current->thread.envcfg |= ENVCFG_CBZE; + else if (any_cpu_has_zicboz) + pr_warn("Zicboz disabled as it is unavailable on some harts\n"); + + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOM)) + current->thread.envcfg |= ENVCFG_CBCFE; + else if (any_cpu_has_zicbom) + pr_warn("Zicbom disabled as it is unavailable on some harts\n"); + + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) && + any_cpu_has_zicbop) + pr_warn("Zicbop disabled as it is unavailable on some harts\n"); } #ifdef CONFIG_RISCV_ALTERNATIVE @@ -1018,28 +1175,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, { struct alt_entry *alt; void *oldptr, *altptr; - u16 id, value; + u16 id, value, vendor; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; for (alt = begin; alt < end; alt++) { - if (alt->vendor_id != 0) - continue; - id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id); - if (id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", id); - continue; - } + /* + * Any alternative with a patch_id that is less than + * RISCV_ISA_EXT_MAX is interpreted as a standard extension. + * + * Any alternative with patch_id that is greater than or equal + * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a + * vendor extension. + */ + if (id < RISCV_ISA_EXT_MAX) { + /* + * This patch should be treated as errata so skip + * processing here. + */ + if (alt->vendor_id != 0) + continue; - if (!__riscv_isa_extension_available(NULL, id)) - continue; + if (!__riscv_isa_extension_available(NULL, id)) + continue; - value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); - if (!riscv_cpufeature_patch_check(id, value)) + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) + continue; + } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) { + if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor, + id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE)) + continue; + } else { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; + } oldptr = ALT_OLD_PTR(alt); altptr = ALT_ALT_PTR(alt); diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 515b2dfbca75..2efc3aaf4a8c 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -9,7 +9,7 @@ #include <asm/set_memory.h> .macro __EFI_PE_HEADER - .long PE_MAGIC + .long IMAGE_NT_SIGNATURE coff_header: #ifdef CONFIG_64BIT .short IMAGE_FILE_MACHINE_RISCV64 // Machine @@ -27,9 +27,9 @@ coff_header: optional_header: #ifdef CONFIG_64BIT - .short PE_OPT_MAGIC_PE32PLUS // PE32+ format + .short IMAGE_NT_OPTIONAL_HDR64_MAGIC // PE32+ format #else - .short PE_OPT_MAGIC_PE32 // PE32 format + .short IMAGE_NT_OPTIONAL_HDR32_MAGIC // PE32 format #endif .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion @@ -64,7 +64,7 @@ extra_header_fields: .long efi_header_end - _start // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLLCHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c deleted file mode 100644 index 54260c16f991..000000000000 --- a/arch/riscv/kernel/elf_kexec.c +++ /dev/null @@ -1,475 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Load ELF vmlinux file for the kexec_file_load syscall. - * - * Copyright (C) 2021 Huawei Technologies Co, Ltd. - * - * Author: Liao Chang (liaochang1@huawei.com) - * - * Based on kexec-tools' kexec-elf-riscv.c, heavily modified - * for kernel. - */ - -#define pr_fmt(fmt) "kexec_image: " fmt - -#include <linux/elf.h> -#include <linux/kexec.h> -#include <linux/slab.h> -#include <linux/of.h> -#include <linux/libfdt.h> -#include <linux/types.h> -#include <linux/memblock.h> -#include <asm/setup.h> - -int arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - kvfree(image->arch.fdt); - image->arch.fdt = NULL; - - vfree(image->elf_headers); - image->elf_headers = NULL; - image->elf_headers_sz = 0; - - return kexec_image_post_load_cleanup_default(image); -} - -static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, - struct kexec_elf_info *elf_info, unsigned long old_pbase, - unsigned long new_pbase) -{ - int i; - int ret = 0; - size_t size; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - - kbuf.image = image; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - size = phdr->p_filesz; - if (size > phdr->p_memsz) - size = phdr->p_memsz; - - kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; - kbuf.bufsz = size; - kbuf.buf_align = phdr->p_align; - kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; - kbuf.memsz = phdr->p_memsz; - kbuf.top_down = false; - ret = kexec_add_buffer(&kbuf); - if (ret) - break; - } - - return ret; -} - -/* - * Go through the available phsyical memory regions and find one that hold - * an image of the specified size. - */ -static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, - struct elfhdr *ehdr, struct kexec_elf_info *elf_info, - unsigned long *old_pbase, unsigned long *new_pbase) -{ - int i; - int ret; - struct kexec_buf kbuf; - const struct elf_phdr *phdr; - unsigned long lowest_paddr = ULONG_MAX; - unsigned long lowest_vaddr = ULONG_MAX; - - for (i = 0; i < ehdr->e_phnum; i++) { - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - if (lowest_paddr > phdr->p_paddr) - lowest_paddr = phdr->p_paddr; - - if (lowest_vaddr > phdr->p_vaddr) - lowest_vaddr = phdr->p_vaddr; - } - - kbuf.image = image; - kbuf.buf_min = lowest_paddr; - kbuf.buf_max = ULONG_MAX; - - /* - * Current riscv boot protocol requires 2MB alignment for - * RV64 and 4MB alignment for RV32 - * - */ - kbuf.buf_align = PMD_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); - kbuf.top_down = false; - ret = arch_kexec_locate_mem_hole(&kbuf); - if (!ret) { - *old_pbase = lowest_paddr; - *new_pbase = kbuf.mem; - image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; - } - return ret; -} - -#ifdef CONFIG_CRASH_DUMP -static int get_nr_ram_ranges_callback(struct resource *res, void *arg) -{ - unsigned int *nr_ranges = arg; - - (*nr_ranges)++; - return 0; -} - -static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) -{ - struct crash_mem *cmem = arg; - - cmem->ranges[cmem->nr_ranges].start = res->start; - cmem->ranges[cmem->nr_ranges].end = res->end; - cmem->nr_ranges++; - - return 0; -} - -static int prepare_elf_headers(void **addr, unsigned long *sz) -{ - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 1; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; -} - -static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, - unsigned long cmdline_len) -{ - int elfcorehdr_strlen; - char *cmdline_ptr; - - cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); - if (!cmdline_ptr) - return NULL; - - elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", - image->elf_load_addr); - - if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { - pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); - kfree(cmdline_ptr); - return NULL; - } - - memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); - /* Ensure it's nul terminated */ - cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; - return cmdline_ptr; -} -#endif - -static void *elf_kexec_load(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len) -{ - int ret; - void *fdt; - unsigned long old_kernel_pbase = ULONG_MAX; - unsigned long new_kernel_pbase = 0UL; - unsigned long initrd_pbase = 0UL; - unsigned long kernel_start; - struct elfhdr ehdr; - struct kexec_buf kbuf; - struct kexec_elf_info elf_info; - char *modified_cmdline = NULL; - - ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); - if (ret) - return ERR_PTR(ret); - - ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, - &old_kernel_pbase, &new_kernel_pbase); - if (ret) - goto out; - kernel_start = image->start; - - /* Add the kernel binary to the image */ - ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, - old_kernel_pbase, new_kernel_pbase); - if (ret) - goto out; - - kbuf.image = image; - kbuf.buf_min = new_kernel_pbase + kernel_len; - kbuf.buf_max = ULONG_MAX; - -#ifdef CONFIG_CRASH_DUMP - /* Add elfcorehdr */ - if (image->type == KEXEC_TYPE_CRASH) { - void *headers; - unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); - if (ret) { - pr_err("Preparing elf core header failed\n"); - goto out; - } - - kbuf.buffer = headers; - kbuf.bufsz = headers_sz; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.memsz = headers_sz; - kbuf.buf_align = ELF_CORE_HEADER_ALIGN; - kbuf.top_down = true; - - ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree(headers); - goto out; - } - image->elf_headers = headers; - image->elf_load_addr = kbuf.mem; - image->elf_headers_sz = headers_sz; - - kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->elf_load_addr, kbuf.bufsz, kbuf.memsz); - - /* Setup cmdline for kdump kernel case */ - modified_cmdline = setup_kdump_cmdline(image, cmdline, - cmdline_len); - if (!modified_cmdline) { - pr_err("Setting up cmdline for kdump kernel failed\n"); - ret = -EINVAL; - goto out; - } - cmdline = modified_cmdline; - } -#endif - -#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY - /* Add purgatory to the image */ - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_load_purgatory(image, &kbuf); - if (ret) { - pr_err("Error loading purgatory ret=%d\n", ret); - goto out; - } - kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); - - ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", - &kernel_start, - sizeof(kernel_start), 0); - if (ret) - pr_err("Error update purgatory ret=%d\n", ret); -#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ - - /* Add the initrd to the image */ - if (initrd != NULL) { - kbuf.buffer = initrd; - kbuf.bufsz = kbuf.memsz = initrd_len; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - initrd_pbase = kbuf.mem; - kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); - } - - /* Add the DTB to the image */ - fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, - initrd_len, cmdline, 0); - if (!fdt) { - pr_err("Error setting up the new device tree.\n"); - ret = -EINVAL; - goto out; - } - - fdt_pack(fdt); - kbuf.buffer = fdt; - kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); - kbuf.buf_align = PAGE_SIZE; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf.top_down = true; - ret = kexec_add_buffer(&kbuf); - if (ret) { - pr_err("Error add DTB kbuf ret=%d\n", ret); - goto out_free_fdt; - } - /* Cache the fdt buffer address for memory cleanup */ - image->arch.fdt = fdt; - kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); - goto out; - -out_free_fdt: - kvfree(fdt); -out: - kfree(modified_cmdline); - kexec_free_elf_info(&elf_info); - return ret ? ERR_PTR(ret) : NULL; -} - -#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) -#define RISCV_IMM_BITS 12 -#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) -#define RISCV_CONST_HIGH_PART(x) \ - (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) -#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) - -#define ENCODE_ITYPE_IMM(x) \ - (RV_X(x, 0, 12) << 20) -#define ENCODE_BTYPE_IMM(x) \ - ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ - (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) -#define ENCODE_UTYPE_IMM(x) \ - (RV_X(x, 12, 20) << 12) -#define ENCODE_JTYPE_IMM(x) \ - ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ - (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) -#define ENCODE_CBTYPE_IMM(x) \ - ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) -#define ENCODE_CJTYPE_IMM(x) \ - ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ - (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ - (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) -#define ENCODE_UJTYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ - (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) -#define ENCODE_UITYPE_IMM(x) \ - (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) - -#define CLEAN_IMM(type, x) \ - ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) - -int arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab) -{ - const char *strtab, *name, *shstrtab; - const Elf_Shdr *sechdrs; - Elf64_Rela *relas; - int i, r_type; - - /* String & section header string table */ - sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; - strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; - shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; - - relas = (void *)pi->ehdr + relsec->sh_offset; - - for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { - const Elf_Sym *sym; /* symbol to relocate */ - unsigned long addr; /* final location after relocation */ - unsigned long val; /* relocated symbol value */ - unsigned long sec_base; /* relocated symbol value */ - void *loc; /* tmp location to modify */ - - sym = (void *)pi->ehdr + symtab->sh_offset; - sym += ELF64_R_SYM(relas[i].r_info); - - if (sym->st_name) - name = strtab + sym->st_name; - else - name = shstrtab + sechdrs[sym->st_shndx].sh_name; - - loc = pi->purgatory_buf; - loc += section->sh_offset; - loc += relas[i].r_offset; - - if (sym->st_shndx == SHN_ABS) - sec_base = 0; - else if (sym->st_shndx >= pi->ehdr->e_shnum) { - pr_err("Invalid section %d for symbol %s\n", - sym->st_shndx, name); - return -ENOEXEC; - } else - sec_base = pi->sechdrs[sym->st_shndx].sh_addr; - - val = sym->st_value; - val += sec_base; - val += relas[i].r_addend; - - addr = section->sh_addr + relas[i].r_offset; - - r_type = ELF64_R_TYPE(relas[i].r_info); - - switch (r_type) { - case R_RISCV_BRANCH: - *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | - ENCODE_BTYPE_IMM(val - addr); - break; - case R_RISCV_JAL: - *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | - ENCODE_JTYPE_IMM(val - addr); - break; - /* - * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I - * sym is expected to be next to R_RISCV_PCREL_HI20 - * in purgatory relsec. Handle it like R_RISCV_CALL - * sym, instead of searching the whole relsec. - */ - case R_RISCV_PCREL_HI20: - case R_RISCV_CALL_PLT: - case R_RISCV_CALL: - *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | - ENCODE_UJTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_BRANCH: - *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | - ENCODE_CBTYPE_IMM(val - addr); - break; - case R_RISCV_RVC_JUMP: - *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | - ENCODE_CJTYPE_IMM(val - addr); - break; - case R_RISCV_ADD32: - *(u32 *)loc += val; - break; - case R_RISCV_SUB32: - *(u32 *)loc -= val; - break; - /* It has been applied by R_RISCV_PCREL_HI20 sym */ - case R_RISCV_PCREL_LO12_I: - case R_RISCV_ALIGN: - case R_RISCV_RELAX: - break; - default: - pr_err("Unknown rela relocation: %d\n", r_type); - return -ENOEXEC; - } - } - return 0; -} - -const struct kexec_file_ops elf_kexec_ops = { - .probe = kexec_elf_probe, - .load = elf_kexec_load, -}; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 9d1a305d5508..75656afa2d6b 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -19,6 +19,79 @@ .section .irqentry.text, "ax" +.macro new_vmalloc_check + REG_S a0, TASK_TI_A0(tp) + csrr a0, CSR_CAUSE + /* Exclude IRQs */ + blt a0, zero, .Lnew_vmalloc_restore_context_a0 + + REG_S a1, TASK_TI_A1(tp) + /* Only check new_vmalloc if we are in page/protection fault */ + li a1, EXC_LOAD_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_STORE_PAGE_FAULT + beq a0, a1, .Lnew_vmalloc_kernel_address + li a1, EXC_INST_PAGE_FAULT + bne a0, a1, .Lnew_vmalloc_restore_context_a1 + +.Lnew_vmalloc_kernel_address: + /* Is it a kernel address? */ + csrr a0, CSR_TVAL + bge a0, zero, .Lnew_vmalloc_restore_context_a1 + + /* Check if a new vmalloc mapping appeared that could explain the trap */ + REG_S a2, TASK_TI_A2(tp) + /* + * Computes: + * a0 = &new_vmalloc[BIT_WORD(cpu)] + * a1 = BIT_MASK(cpu) + */ + REG_L a2, TASK_TI_CPU(tp) + /* + * Compute the new_vmalloc element position: + * (cpu / 64) * 8 = (cpu >> 6) << 3 + */ + srli a1, a2, 6 + slli a1, a1, 3 + la a0, new_vmalloc + add a0, a0, a1 + /* + * Compute the bit position in the new_vmalloc element: + * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6 + * = cpu - ((cpu >> 6) << 3) << 3 + */ + slli a1, a1, 3 + sub a1, a2, a1 + /* Compute the "get mask": 1 << bit_pos */ + li a2, 1 + sll a1, a2, a1 + + /* Check the value of new_vmalloc for this cpu */ + REG_L a2, 0(a0) + and a2, a2, a1 + beq a2, zero, .Lnew_vmalloc_restore_context + + /* Atomically reset the current cpu bit in new_vmalloc */ + amoxor.d a0, a1, (a0) + + /* Only emit a sfence.vma if the uarch caches invalid entries */ + ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1) + + REG_L a0, TASK_TI_A0(tp) + REG_L a1, TASK_TI_A1(tp) + REG_L a2, TASK_TI_A2(tp) + csrw CSR_SCRATCH, x0 + sret + +.Lnew_vmalloc_restore_context: + REG_L a2, TASK_TI_A2(tp) +.Lnew_vmalloc_restore_context_a1: + REG_L a1, TASK_TI_A1(tp) +.Lnew_vmalloc_restore_context_a0: + REG_L a0, TASK_TI_A0(tp) +.endm + + SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load @@ -30,6 +103,20 @@ SYM_CODE_START(handle_exception) .Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH + +#ifdef CONFIG_64BIT + /* + * The RISC-V kernel does not eagerly emit a sfence.vma after each + * new vmalloc mapping, which may result in exceptions: + * - if the uarch caches invalid entries, the new mapping would not be + * observed by the page table walker and an invalidation is needed. + * - if the uarch does not cache invalid entries, a reordered access + * could "miss" the new mapping and traps: in that case, we only need + * to retry the access, no sfence.vma is required. + */ + new_vmalloc_check +#endif + REG_S sp, TASK_TI_KERNEL_SP(tp) #ifdef CONFIG_VMAP_STACK @@ -88,7 +175,6 @@ SYM_CODE_START(handle_exception) call riscv_v_context_nesting_start #endif move a0, sp /* pt_regs */ - la ra, ret_from_exception /* * MSB of cause differentiates between @@ -97,7 +183,8 @@ SYM_CODE_START(handle_exception) bge s4, zero, 1f /* Handle interrupts */ - tail do_irq + call do_irq + j ret_from_exception 1: /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR @@ -105,12 +192,16 @@ SYM_CODE_START(handle_exception) la t2, excp_vect_table_end add t0, t1, t0 /* Check if exception code lies within bounds */ - bgeu t0, t2, 1f - REG_L t0, 0(t0) - jr t0 -1: - tail do_trap_unknown + bgeu t0, t2, 3f + REG_L t1, 0(t0) +2: jalr t1 + j ret_from_exception +3: + + la t1, do_trap_unknown + j 2b SYM_CODE_END(handle_exception) +ASM_NOKPROBE(handle_exception) /* * The ret_from_exception must be called with interrupt disabled. Here is the @@ -129,6 +220,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #endif bnez s0, 1f +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + call stackleak_erase_on_task_stack +#endif + /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK REG_S s0, TASK_TI_KERNEL_SP(tp) @@ -183,7 +278,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else sret #endif +SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL) SYM_CODE_END(ret_from_exception) +ASM_NOKPROBE(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) @@ -219,19 +316,24 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) move a0, sp tail handle_bad_stack SYM_CODE_END(handle_kernel_stack_overflow) +ASM_NOKPROBE(handle_kernel_stack_overflow) #endif -SYM_CODE_START(ret_from_fork) +SYM_CODE_START(ret_from_fork_kernel_asm) + call schedule_tail + move a0, s1 /* fn_arg */ + move a1, s0 /* fn */ + move a2, sp /* pt_regs */ + call ret_from_fork_kernel + j ret_from_exception +SYM_CODE_END(ret_from_fork_kernel_asm) + +SYM_CODE_START(ret_from_fork_user_asm) call schedule_tail - beqz s0, 1f /* not from kernel thread */ - /* Call fn(arg) */ - move a0, s1 - jalr s0 -1: move a0, sp /* pt_regs */ - la ra, ret_from_exception - tail syscall_exit_to_user_mode -SYM_CODE_END(ret_from_fork) + call ret_from_fork_user + j ret_from_exception +SYM_CODE_END(ret_from_fork_user_asm) #ifdef CONFIG_IRQ_STACKS /* @@ -299,9 +401,18 @@ SYM_FUNC_START(__switch_to) REG_S s9, TASK_THREAD_S9_RA(a3) REG_S s10, TASK_THREAD_S10_RA(a3) REG_S s11, TASK_THREAD_S11_RA(a3) + + /* save the user space access flag */ + csrr s0, CSR_STATUS + REG_S s0, TASK_THREAD_SUM_RA(a3) + /* Save the kernel shadow call stack pointer */ scs_save_current /* Restore context from next->thread */ + REG_L s0, TASK_THREAD_SUM_RA(a4) + li s1, SR_SUM + and s0, s0, s1 + csrs CSR_STATUS, s0 REG_L ra, TASK_THREAD_RA_RA(a4) REG_L sp, TASK_THREAD_SP_RA(a4) REG_L s0, TASK_THREAD_S0_RA(a4) diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 2c543f130f93..f74f6b60e347 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore) __access_func(f31) -#ifdef CONFIG_RISCV_MISALIGNED +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED /* * Disable compressed instructions set to keep a constant offset between FP @@ -211,7 +211,7 @@ SYM_FUNC_START(put_f64_reg) SYM_FUNC_END(put_f64_reg) /* - * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * get_f64_reg - Get a 64 bits FP register value and returned it or store it to * a pointer. * a0 = FP register index to be retrieved * a1 = If xlen == 32, pointer which should be loaded with the FP register value @@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg) fp_access_epilogue SYM_FUNC_END(get_f64_reg) -#endif /* CONFIG_RISCV_MISALIGNED */ +#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index f5aa24d9e1c1..4c6c24380cfd 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,98 +8,129 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/memory.h> +#include <linux/irqflags.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #ifdef CONFIG_DYNAMIC_FTRACE -void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) +unsigned long ftrace_call_adjust(unsigned long addr) { - mutex_lock(&text_mutex); + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return addr + 8 + MCOUNT_AUIPC_SIZE; - /* - * The code sequences we use for ftrace can't be patched while the - * kernel is running, so we need to use stop_machine() to modify them - * for now. This doesn't play nice with text_mutex, we use this flag - * to elide the check. - */ - riscv_patch_in_stop_machine = true; + return addr + MCOUNT_AUIPC_SIZE; } -void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) +unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip) { - riscv_patch_in_stop_machine = false; + return fentry_ip - MCOUNT_AUIPC_SIZE; +} + +void arch_ftrace_update_code(int command) +{ + mutex_lock(&text_mutex); + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); mutex_unlock(&text_mutex); + flush_icache_all(); } -static int ftrace_check_current_call(unsigned long hook_pos, - unsigned int *expected) +static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate) { + unsigned int call[2], offset; unsigned int replaced[2]; - unsigned int nops[2] = {NOP4, NOP4}; - /* we expect nops at the hook position */ - if (!expected) - expected = nops; + offset = target - source; + call[1] = to_jalr_t0(offset); + + if (validate) { + call[0] = to_auipc_t0(offset); + /* + * Read the text we want to modify; + * return must be -EFAULT on read error + */ + if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (replaced[0] != call[0]) { + pr_err("%p: expected (%08x) but got (%08x)\n", + (void *)source, call[0], replaced[0]); + return -EINVAL; + } + } - /* - * Read the text we want to modify; - * return must be -EFAULT on read error - */ - if (copy_from_kernel_nofault(replaced, (void *)hook_pos, - MCOUNT_INSN_SIZE)) - return -EFAULT; + /* Replace the jalr at once. Return -EPERM on write error. */ + if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE)) + return -EPERM; - /* - * Make sure it is what we expect it to be; - * return must be -EINVAL on failed comparison - */ - if (memcmp(expected, replaced, sizeof(replaced))) { - pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n", - (void *)hook_pos, expected[0], expected[1], replaced[0], - replaced[1]); - return -EINVAL; + return 0; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); } - return 0; + if (!ops) + ops = &ftrace_list_ops; + + return ops; } -static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, - bool enable, bool ra) +static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops) { - unsigned int call[2]; - unsigned int nops[2] = {NOP4, NOP4}; + unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8); - if (ra) - make_call_ra(hook_pos, target, call); - else - make_call_t0(hook_pos, target, call); + return patch_text_nosync((void *)literal, &ops, sizeof(ops)); +} - /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ - if (patch_text_nosync - ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) - return -EPERM; +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} - return 0; +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec)); } +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned int call[2]; + unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE; + int ret; - make_call_t0(rec->ip, addr, call); + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; - if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) - return -EPERM; + orig_addr = (unsigned long)&ftrace_caller; + distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr; + if (distance > JALR_RANGE) + addr = FTRACE_ADDR; - return 0; + return __ftrace_modify_call(pc, addr, false); } -int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, - unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int nops[2] = {NOP4, NOP4}; + u32 nop4 = RISCV_INSN_NOP4; + int ret; + + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; - if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE)) return -EPERM; return 0; @@ -114,43 +145,71 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, */ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { - int out; + unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE; + unsigned int nops[2], offset; + int ret; + + ret = ftrace_rec_set_nop_ops(rec); + if (ret) + return ret; + + offset = (unsigned long) &ftrace_caller - pc; + nops[0] = to_auipc_t0(offset); + nops[1] = RISCV_INSN_NOP4; mutex_lock(&text_mutex); - out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE); mutex_unlock(&text_mutex); - return out; + return ret; } +ftrace_func_t ftrace_call_dest = ftrace_stub; int ftrace_update_ftrace_func(ftrace_func_t func) { - int ret = __ftrace_modify_call((unsigned long)&ftrace_call, - (unsigned long)func, true, true); - if (!ret) { - ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, - (unsigned long)func, true, true); - } + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; - return ret; + WRITE_ONCE(ftrace_call_dest, func); + /* + * The data fence ensure that the update to ftrace_call_dest happens + * before the write to function_trace_op later in the generic ftrace. + * If the sequence is not enforced, then an old ftrace_call_dest may + * race loading a new function_trace_op set in ftrace_modify_all_code + */ + smp_wmb(); + /* + * Updating ftrace dpes not take stop_machine path, so irqs should not + * be disabled. + */ + WARN_ON(irqs_disabled()); + smp_call_function(ftrace_sync_ipi, NULL, 1); + return 0; } -#endif -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#else /* CONFIG_DYNAMIC_FTRACE */ +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - unsigned int call[2]; - unsigned long caller = rec->ip; + unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE; int ret; - make_call_t0(caller, old_addr, call); - ret = ftrace_check_current_call(caller, call); - + ret = ftrace_rec_update_ops(rec); if (ret) return ret; - return __ftrace_modify_call(caller, addr, true, false); + return __ftrace_modify_call(caller, FTRACE_ADDR, true); } #endif @@ -178,28 +237,25 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } #ifdef CONFIG_DYNAMIC_FTRACE -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = arch_ftrace_get_regs(fregs); - unsigned long *parent = (unsigned long *)®s->ra; + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0; + unsigned long *parent = &arch_ftrace_regs(fregs)->ra; + unsigned long old; - prepare_ftrace_return(parent, ip, frame_pointer(regs)); -} -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -extern void ftrace_graph_call(void); -int ftrace_enable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, true, true); -} + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; -int ftrace_disable_ftrace_graph_caller(void) -{ - return __ftrace_modify_call((unsigned long)&ftrace_graph_call, - (unsigned long)&prepare_ftrace_return, false, true); + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; + + if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs)) + *parent = return_hooker; } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 4236a69c35cb..bdf3352acf4c 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -131,6 +131,12 @@ secondary_start_sbi: csrw CSR_IE, zero csrw CSR_IP, zero +#ifndef CONFIG_RISCV_M_MODE + /* Enable time CSR */ + li t0, 0x2 + csrw CSR_SCOUNTEREN, t0 +#endif + /* Load the global pointer */ load_global_pointer @@ -165,10 +171,21 @@ secondary_start_sbi: #endif call .Lsetup_trap_vector scs_load_current - tail smp_callin + call smp_callin #endif /* CONFIG_SMP */ .align 2 +.Lsecondary_park: + /* + * Park this hart if we: + * - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT + * - receive an early trap, before setup_trap_vector finished + * - fail in smp_callin(), as a successful one wouldn't return + */ + wfi + j .Lsecondary_park + +.align 2 .Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception @@ -181,12 +198,6 @@ secondary_start_sbi: csrw CSR_SCRATCH, zero ret -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park - SYM_CODE_END(_start) SYM_CODE_START(_start_kernel) @@ -221,6 +232,10 @@ SYM_CODE_START(_start_kernel) * to hand it to us. */ csrr a0, CSR_MHARTID +#else + /* Enable time CSR */ + li t0, 0x2 + csrw CSR_SCOUNTEREN, t0 #endif /* CONFIG_RISCV_M_MODE */ /* Load the global pointer */ @@ -300,6 +315,9 @@ SYM_CODE_START(_start_kernel) #else mv a0, a1 #endif /* CONFIG_BUILTIN_DTB */ + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 call setup_vm #ifdef CONFIG_MMU la a0, early_pg_dir diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index e6694759dbd0..b4c1a6a3fbd2 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -9,13 +9,14 @@ #include <linux/memory.h> #include <linux/mutex.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/cacheflush.h> +#include <asm/text-patching.h> +#include <asm/insn-def.h> -#define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -24,7 +25,7 @@ void arch_jump_label_transform(struct jump_entry *entry, long offset = jump_entry_target(entry) - jump_entry_code(entry); if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288)) - return; + return true; insn = RISCV_INSN_JAL | (((u32)offset & GENMASK(19, 12)) << (12 - 12)) | @@ -32,10 +33,23 @@ void arch_jump_label_transform(struct jump_entry *entry, (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | (((u32)offset & GENMASK(20, 20)) << (31 - 20)); } else { - insn = RISCV_INSN_NOP; + insn = RISCV_INSN_NOP4; } - mutex_lock(&text_mutex); - patch_text_nosync(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (early_boot_irqs_disabled) { + riscv_patch_in_stop_machine = 1; + patch_insn_write(addr, &insn, sizeof(insn)); + riscv_patch_in_stop_machine = 0; + } else { + mutex_lock(&text_mutex); + patch_insn_write(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); + } + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + flush_icache_all(); } diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c new file mode 100644 index 000000000000..0ac8348876c4 --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_fpu.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 SiFive + */ + +#include <linux/export.h> +#include <linux/preempt.h> + +#include <asm/csr.h> +#include <asm/fpu.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +void kernel_fpu_begin(void) +{ + preempt_disable(); + fstate_save(current, task_pt_regs(current)); + csr_set(CSR_SSTATUS, SR_FS); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + csr_clear(CSR_SSTATUS, SR_FS); + fstate_restore(current, task_pt_regs(current)); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c index 6afe80c7f03a..99972a48e86b 100644 --- a/arch/riscv/kernel/kernel_mode_vector.c +++ b/arch/riscv/kernel/kernel_mode_vector.c @@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested) /* Transfer the ownership of V from user to kernel, then save */ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY); - if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) { + if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) { uvstate = ¤t->thread.vstate; __riscv_v_vstate_save(uvstate, uvstate->datap); } @@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs) return; depth = riscv_v_ctx_get_depth(); - if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY) + if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY)) riscv_preempt_v_set_dirty(); riscv_v_ctx_depth_inc(); @@ -208,7 +208,7 @@ void kernel_vector_begin(void) { bool nested = false; - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; BUG_ON(!may_use_simd()); @@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin); */ void kernel_vector_end(void) { - if (WARN_ON(!has_vector())) + if (WARN_ON(!(has_vector() || has_xtheadvector()))) return; riscv_v_disable(); diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c new file mode 100644 index 000000000000..f4755d49b89e --- /dev/null +++ b/arch/riscv/kernel/kexec_elf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <asm/setup.h> + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + struct elfhdr ehdr; + struct kexec_elf_info elf_info; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + ret = load_extra_segments(image, image->start, kernel_len, + initrd, initrd_len, cmdline, cmdline_len); +out: + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c new file mode 100644 index 000000000000..26a81774a78a --- /dev/null +++ b/arch/riscv/kernel/kexec_image.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V Kexec image loader + * + */ + +#define pr_fmt(fmt) "kexec_file(Image): " fmt + +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <linux/pe.h> +#include <linux/string.h> +#include <asm/byteorder.h> +#include <asm/image.h> + +static int image_probe(const char *kernel_buf, unsigned long kernel_len) +{ + const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf; + + if (!h || kernel_len < sizeof(*h)) + return -EINVAL; + + /* According to Documentation/riscv/boot-image-header.rst, + * use "magic2" field to check when version >= 0.2. + */ + + if (h->version >= RISCV_HEADER_VERSION && + memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2))) + return -EINVAL; + + return 0; +} + +static void *image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct riscv_image_header *h; + u64 flags; + bool be_image, be_kernel; + struct kexec_buf kbuf; + int ret; + + /* Check Image header */ + h = (struct riscv_image_header *)kernel; + if (!h->image_size) { + ret = -EINVAL; + goto out; + } + + /* Check endianness */ + flags = le64_to_cpu(h->flags); + be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE); + be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + if (be_image != be_kernel) { + ret = -EINVAL; + goto out; + } + + /* Load the kernel image */ + kbuf.image = image; + kbuf.buf_min = 0; + kbuf.buf_max = ULONG_MAX; + kbuf.top_down = false; + + kbuf.buffer = kernel; + kbuf.bufsz = kernel_len; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = le64_to_cpu(h->image_size); + kbuf.buf_align = le64_to_cpu(h->text_offset); + + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add kernel image ret=%d\n", ret); + goto out; + } + + image->start = kbuf.mem; + + pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + kbuf.mem, kbuf.bufsz, kbuf.memsz); + + ret = load_extra_segments(image, kbuf.mem, kbuf.memsz, + initrd, initrd_len, cmdline, cmdline_len); + +out: + return ret ? ERR_PTR(ret) : NULL; +} + +const struct kexec_file_ops image_kexec_ops = { + .probe = image_probe, + .load = image_load, +}; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 2e0266ae6bd7..9f3db3503dab 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -254,6 +254,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->epc = pc; } +noinline void arch_kgdb_breakpoint(void) +{ + asm(".global kgdb_compiled_break\n" + "kgdb_compiled_break: ebreak\n"); +} + void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, char *remcom_out_buffer) { diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -114,37 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index b0bf8c1722c0..e36104af2e24 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -7,8 +7,369 @@ * Author: Liao Chang (liaochang1@huawei.com) */ #include <linux/kexec.h> +#include <linux/elf.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <linux/vmalloc.h> +#include <asm/setup.h> const struct kexec_file_ops * const kexec_file_loaders[] = { &elf_kexec_ops, + &image_kexec_ops, NULL }; + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + +#ifdef CONFIG_CRASH_DUMP +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} +#endif + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf64_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD16: + *(u16 *)loc += val; + break; + case R_RISCV_SUB16: + *(u16 *)loc -= val; + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + case R_RISCV_64: + *(u64 *)loc = val; + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + + +int load_extra_segments(struct kimage *image, unsigned long kernel_start, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + void *fdt; + unsigned long initrd_pbase = 0UL; + struct kexec_buf kbuf; + char *modified_cmdline = NULL; + + kbuf.image = image; + kbuf.buf_min = kernel_start + kernel_len; + kbuf.buf_max = ULONG_MAX; + +#ifdef CONFIG_CRASH_DUMP + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + void *headers; + unsigned long headers_sz; + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); + + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ + + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; + kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kfree(modified_cmdline); + return ret; +} diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index b7561288e8da..48f6c4f7dca0 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -13,7 +13,6 @@ .text -#define FENTRY_RA_OFFSET 8 #define ABI_SIZE_ON_STACK 80 #define ABI_A0 0 #define ABI_A1 8 @@ -56,204 +55,153 @@ addi sp, sp, ABI_SIZE_ON_STACK .endm -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /** -* SAVE_ABI_REGS - save regs against the pt_regs struct -* -* @all: tell if saving all the regs -* -* If all is set, all the regs will be saved, otherwise only ABI -* related regs (a0-a7,epc,ra and optional s0) will be saved. +* SAVE_ABI_REGS - save regs against the ftrace_regs struct * * After the stack is established, * * 0(sp) stores the PC of the traced function which can be accessed -* by &(fregs)->regs->epc in tracing function. Note that the real -* function entry address should be computed with -FENTRY_RA_OFFSET. +* by &(fregs)->epc in tracing function. * * 8(sp) stores the function return address (i.e. parent IP) that -* can be accessed by &(fregs)->regs->ra in tracing function. +* can be accessed by &(fregs)->ra in tracing function. * * The other regs are saved at the respective localtion and accessed -* by the respective pt_regs member. +* by the respective ftrace_regs member. * * Here is the layout of stack for your reference. * * PT_SIZE_ON_STACK -> +++++++++ * + ..... + -* + t3-t6 + -* + s2-s11+ * + a0-a7 + --++++-> ftrace_caller saved -* + s1 + + -* + s0 + --+ -* + t0-t2 + + -* + tp + + -* + gp + + +* + t1 + --++++-> direct tramp address +* + s0 + --+ // frame pointer * + sp + + * + ra + --+ // parent IP * sp -> + epc + --+ // PC * +++++++++ **/ - .macro SAVE_ABI_REGS, all=0 - addi sp, sp, -PT_SIZE_ON_STACK - - REG_S t0, PT_EPC(sp) - REG_S x1, PT_RA(sp) - - // save the ABI regs - - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - - // save the leftover regs - - .if \all == 1 - REG_S x2, PT_SP(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - // save s0 if FP_TEST defined - - .else + .macro SAVE_ABI_REGS + addi sp, sp, -FREGS_SIZE_ON_STACK + REG_S t0, FREGS_EPC(sp) + REG_S x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_S x8, PT_S0(sp) + REG_S x8, FREGS_S0(sp) #endif - .endif + REG_S x6, FREGS_T1(sp) +#ifdef CONFIG_CC_IS_CLANG + REG_S x7, FREGS_T2(sp) + REG_S x28, FREGS_T3(sp) + REG_S x29, FREGS_T4(sp) + REG_S x30, FREGS_T5(sp) + REG_S x31, FREGS_T6(sp) +#endif + // save the arguments + REG_S x10, FREGS_A0(sp) + REG_S x11, FREGS_A1(sp) + REG_S x12, FREGS_A2(sp) + REG_S x13, FREGS_A3(sp) + REG_S x14, FREGS_A4(sp) + REG_S x15, FREGS_A5(sp) + REG_S x16, FREGS_A6(sp) + REG_S x17, FREGS_A7(sp) + mv a0, sp + addi a0, a0, FREGS_SIZE_ON_STACK + REG_S a0, FREGS_SP(sp) // Put original SP on stack .endm - .macro RESTORE_ABI_REGS, all=0 - REG_L t0, PT_EPC(sp) - REG_L x1, PT_RA(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - - .if \all == 1 - REG_L x2, PT_SP(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - .else + .macro RESTORE_ABI_REGS + REG_L t0, FREGS_EPC(sp) + REG_L x1, FREGS_RA(sp) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_L x8, PT_S0(sp) + REG_L x8, FREGS_S0(sp) #endif - .endif - addi sp, sp, PT_SIZE_ON_STACK + REG_L x6, FREGS_T1(sp) +#ifdef CONFIG_CC_IS_CLANG + REG_L x7, FREGS_T2(sp) + REG_L x28, FREGS_T3(sp) + REG_L x29, FREGS_T4(sp) + REG_L x30, FREGS_T5(sp) + REG_L x31, FREGS_T6(sp) +#endif + // restore the arguments + REG_L x10, FREGS_A0(sp) + REG_L x11, FREGS_A1(sp) + REG_L x12, FREGS_A2(sp) + REG_L x13, FREGS_A3(sp) + REG_L x14, FREGS_A4(sp) + REG_L x15, FREGS_A5(sp) + REG_L x16, FREGS_A6(sp) + REG_L x17, FREGS_A7(sp) + + addi sp, sp, FREGS_SIZE_ON_STACK .endm .macro PREPARE_ARGS - addi a0, t0, -FENTRY_RA_OFFSET + addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + mv a1, ra // parent_ip + REG_L a2, -16(t0) // op + REG_L ra, FTRACE_OPS_FUNC(a2) // op->func +#else la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp + REG_L a2, 0(a1) // op + mv a1, ra // parent_ip +#endif + mv a3, sp // regs .endm -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ - -#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS SYM_FUNC_START(ftrace_caller) - SAVE_ABI - - addi a0, t0, -FENTRY_RA_OFFSET - la a1, function_trace_op - REG_L a2, 0(a1) - mv a1, ra - mv a3, sp - -SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* + * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the + * function entry, these are later overwritten with the pointer to the + * associated struct ftrace_ops. + * + * -8: &ftrace_ops of the associated tracer function. + *<ftrace enable>: + * 0: auipc t0/ra, 0x? + * 4: jalr t0/ra, ?(t0/ra) + * + * -8: &ftrace_nop_ops + *<ftrace disable>: + * 0: nop + * 4: nop + * + * t0 is set to ip+8 after the jalr is executed at the callsite, + * so we find the associated op at t0-16. + */ + REG_L t1, -16(t0) // op Should be SZ_REG instead of 16 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - addi a0, sp, ABI_RA - REG_L a1, ABI_T0(sp) - addi a1, a1, -FENTRY_RA_OFFSET -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST - mv a2, s0 +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* + * If the op has a direct call, handle it immediately without + * saving/restoring registers. + */ + REG_L t1, FTRACE_OPS_DIRECT_CALL(t1) + bnez t1, ftrace_caller_direct #endif -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) - call ftrace_stub #endif - RESTORE_ABI - jr t0 -SYM_FUNC_END(ftrace_caller) - -#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -SYM_FUNC_START(ftrace_regs_caller) - mv t1, zero - SAVE_ABI_REGS 1 - PREPARE_ARGS - -SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) - call ftrace_stub - - RESTORE_ABI_REGS 1 - bnez t1, .Ldirect - jr t0 -.Ldirect: - jr t1 -SYM_FUNC_END(ftrace_regs_caller) - -SYM_FUNC_START(ftrace_caller) - SAVE_ABI_REGS 0 + SAVE_ABI_REGS PREPARE_ARGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + jalr ra +#else SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) - call ftrace_stub - - RESTORE_ABI_REGS 0 + REG_L ra, ftrace_call_dest + jalr ra, 0(ra) +#endif + RESTORE_ABI_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnez t1, ftrace_caller_direct +#endif jr t0 +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL) + jr t1 +#endif SYM_FUNC_END(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 3a42f6287909..da4a4000e57e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -26,12 +26,12 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -4*SZREG - REG_S s0, 2*SZREG(sp) - REG_S ra, 3*SZREG(sp) - REG_S a0, 1*SZREG(sp) - REG_S a1, 0*SZREG(sp) - addi s0, sp, 4*SZREG + addi sp, sp, -FREGS_SIZE_ON_STACK + REG_S ra, FREGS_RA(sp) + REG_S s0, FREGS_S0(sp) + REG_S a0, FREGS_A0(sp) + REG_S a1, FREGS_A1(sp) + addi s0, sp, FREGS_SIZE_ON_STACK .endm .macro RESTORE_ABI_STATE @@ -41,11 +41,11 @@ .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 3*SZREG(sp) - REG_L s0, 2*SZREG(sp) - REG_L a0, 1*SZREG(sp) - REG_L a1, 0*SZREG(sp) - addi sp, sp, 4*SZREG + REG_L ra, FREGS_RA(sp) + REG_L s0, FREGS_S0(sp) + REG_L a0, FREGS_A0(sp) + REG_L a1, FREGS_A1(sp) + addi sp, sp, FREGS_SIZE_ON_STACK .endm SYM_TYPED_FUNC_START(ftrace_stub) diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index e264e59e596e..75551ac6504c 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleloader.h> +#include <linux/sort.h> unsigned long module_emit_got_entry(struct module *mod, unsigned long val) { @@ -55,43 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val) return (unsigned long)&plt[i]; } -static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) { - return x->r_info == y->r_info && x->r_addend == y->r_addend; + const Elf_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(x->r_info, y->r_info); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; } static bool duplicate_rela(const Elf_Rela *rela, int idx) { - int i; - for (i = 0; i < idx; i++) { - if (is_rela_equal(&rela[i], &rela[idx])) - return true; - } - return false; + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding slot. + */ + return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0; } -static void count_max_entries(Elf_Rela *relas, int num, +static void count_max_entries(const Elf_Rela *relas, size_t num, unsigned int *plts, unsigned int *gots) { - unsigned int type, i; - - for (i = 0; i < num; i++) { - type = ELF_RISCV_R_TYPE(relas[i].r_info); - if (type == R_RISCV_CALL_PLT) { - if (!duplicate_rela(relas, i)) - (*plts)++; - } else if (type == R_RISCV_GOT_HI20) { - if (!duplicate_rela(relas, i)) - (*gots)++; + for (size_t i = 0; i < num; i++) { + if (duplicate_rela(relas, i)) + continue; + + switch (ELF_R_TYPE(relas[i].r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_PLT32: + (*plts)++; + break; + case R_RISCV_GOT_HI20: + (*gots)++; + break; + default: + unreachable(); } } } +static bool rela_needs_plt_got_entry(const Elf_Rela *rela) +{ + switch (ELF_R_TYPE(rela->r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_GOT_HI20: + case R_RISCV_PLT32: + return true; + default: + return false; + } +} + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { + size_t num_scratch_relas = 0; unsigned int num_plts = 0; unsigned int num_gots = 0; + Elf_Rela *scratch = NULL; + size_t scratch_size = 0; int i; /* @@ -121,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* Calculate the maxinum number of entries */ for (i = 0; i < ehdr->e_shnum; i++) { + size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; - int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela); Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + size_t scratch_size_needed; if (sechdrs[i].sh_type != SHT_RELA) continue; @@ -132,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts, &num_gots); + /* + * apply_relocate_add() relies on HI20 and LO12 relocation pairs being + * close together, so sort a copy of the section to avoid interfering. + */ + scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); + if (scratch_size_needed > scratch_size) { + scratch_size = scratch_size_needed; + scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!scratch) + return -ENOMEM; + } + + for (size_t j = 0; j < num_relas; j++) + if (rela_needs_plt_got_entry(&relas[j])) + scratch[num_scratch_relas++] = relas[j]; + } + + if (scratch) { + /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */ + sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL); + count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots); + kvfree(scratch); } mod->arch.plt.shdr->sh_type = SHT_NOBITS; diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5e5a82644451..7f6147c18033 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/moduleloader.h> -#include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> #include <asm/alternative.h> @@ -24,7 +23,7 @@ struct used_bucket { struct relocation_head { struct hlist_node node; - struct list_head *rel_entry; + struct list_head rel_entry; void *location; }; @@ -635,7 +634,7 @@ process_accumulated_relocations(struct module *me, location = rel_head_iter->location; list_for_each_entry_safe(rel_entry_iter, rel_entry_iter_tmp, - rel_head_iter->rel_entry, + &rel_head_iter->rel_entry, head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( @@ -649,7 +648,7 @@ process_accumulated_relocations(struct module *me, kfree(bucket_iter); } - kfree(*relocation_hashtable); + kvfree(*relocation_hashtable); } static int add_relocation_to_accumulate(struct module *me, int type, @@ -705,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, return -ENOMEM; } - rel_head->rel_entry = - kmalloc(sizeof(struct list_head), GFP_KERNEL); - - if (!rel_head->rel_entry) { - kfree(entry); - kfree(rel_head); - return -ENOMEM; - } - - INIT_LIST_HEAD(rel_head->rel_entry); + INIT_LIST_HEAD(&rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { @@ -723,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type, if (!bucket) { kfree(entry); - kfree(rel_head->rel_entry); kfree(rel_head); return -ENOMEM; } @@ -736,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type, } /* Add relocation to head of discovered rel_head */ - list_add_tail(&entry->head, rel_head->rel_entry); + list_add_tail(&entry->head, &rel_head->rel_entry); return 0; } @@ -763,9 +752,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; - *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(**relocation_hashtable), - GFP_KERNEL); + /* Number of relocations may be large, so kvmalloc it */ + *relocation_hashtable = kvmalloc_array(hashtable_size, + sizeof(**relocation_hashtable), + GFP_KERNEL); if (!*relocation_hashtable) return 0; @@ -788,8 +778,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); struct hlist_head *relocation_hashtable; - struct list_head used_buckets_list; unsigned int hashtable_bits; + LIST_HEAD(used_buckets_list); hashtable_bits = initialize_relocation_hashtable(num_relocations, &relocation_hashtable); @@ -797,8 +787,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, if (!relocation_hashtable) return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); - pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -872,7 +860,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } j++; - if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + if (j == num_relocations) j = 0; } while (j_idx != j); @@ -905,17 +893,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 0d6225fd3194..fa6b0339a65d 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, lo, hi, flags, 0, 0, 0); if (ret.error) { - if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) pr_warn("Failed to disable steal-time shmem"); else pr_warn("Failed to set steal-time shmem"); @@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu) static int pv_time_cpu_down_prepare(unsigned int cpu) { - return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, - SBI_STA_SHMEM_DISABLE, 0); + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); } static u64 pv_time_steal_clock(int cpu) diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..db13c9ddf9e3 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -13,13 +13,13 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/ftrace.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #include <asm/sections.h> struct patch_insn { void *addr; u32 *insns; - int ninsns; + size_t len; atomic_t cpu_count; }; @@ -54,7 +54,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap) BUG_ON(!page); return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + offset_in_page(addr)); } static void patch_unmap(int fixmap) @@ -65,8 +65,8 @@ NOKPROBE_SYMBOL(patch_unmap); static int __patch_insn_set(void *addr, u8 c, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE; /* * Only two pages can be mapped at a time for writing. @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -87,19 +89,29 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) memset(waddr, c, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); static int __patch_insn_write(void *addr, const void *insn, size_t len) { + bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE; void *waddr = addr; - bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; int ret; /* @@ -122,6 +134,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -129,11 +143,21 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) ret = copy_to_kernel_nofault(waddr, insn, len); + /* + * We could have just patched a function that is about to be + * called so make sure we don't execute partially patched + * instructions by flushing the icache as soon as possible. + */ + local_flush_icache_range((unsigned long)waddr, + (unsigned long)waddr + len); + patch_unmap(FIX_TEXT_POKE0); if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); @@ -155,69 +179,70 @@ NOKPROBE_SYMBOL(__patch_insn_write); static int patch_insn_set(void *addr, u8 c, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * __patch_insn_set() can only work on 2 pages at a time so call it in a * loop with len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_set(addr + patched, c, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_set(addr, c, size); + if (ret) + return ret; + + addr += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_set); int patch_text_set_nosync(void *addr, u8 c, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_set(tp, c, len); - + ret = patch_insn_set(addr, c, len); if (!ret) - flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } NOKPROBE_SYMBOL(patch_text_set_nosync); -static int patch_insn_write(void *addr, const void *insn, size_t len) +int patch_insn_write(void *addr, const void *insn, size_t len) { - size_t patched = 0; size_t size; - int ret = 0; + int ret; /* * Copy the instructions to the destination address, two pages at a time * because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE. */ - while (patched < len && !ret) { - size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched); - ret = __patch_insn_write(addr + patched, insn + patched, size); - - patched += size; + while (len) { + size = min(len, PAGE_SIZE * 2 - offset_in_page(addr)); + ret = __patch_insn_write(addr, insn, size); + if (ret) + return ret; + + addr += size; + insn += size; + len -= size; } - return ret; + return 0; } NOKPROBE_SYMBOL(patch_insn_write); int patch_text_nosync(void *addr, const void *insns, size_t len) { - u32 *tp = addr; int ret; - ret = patch_insn_write(tp, insns, len); - + ret = patch_insn_write(addr, insns, len); if (!ret) - flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -226,33 +251,36 @@ NOKPROBE_SYMBOL(patch_text_nosync); static int patch_text_cb(void *data) { struct patch_insn *patch = data; - unsigned long len; - int i, ret = 0; + int ret = 0; if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { - for (i = 0; ret == 0 && i < patch->ninsns; i++) { - len = GET_INSN_LENGTH(patch->insns[i]); - ret = patch_text_nosync(patch->addr + i * len, - &patch->insns[i], len); - } - atomic_inc(&patch->cpu_count); + ret = patch_insn_write(patch->addr, patch->insns, patch->len); + /* + * Make sure the patching store is effective *before* we + * increment the counter which releases all waiting CPUs + * by using the release variant of atomic increment. The + * release pairs with the call to local_flush_icache_all() + * on the waiting CPU. + */ + atomic_inc_return_release(&patch->cpu_count); } else { while (atomic_read(&patch->cpu_count) <= num_online_cpus()) cpu_relax(); - smp_mb(); + + local_flush_icache_all(); } return ret; } NOKPROBE_SYMBOL(patch_text_cb); -int patch_text(void *addr, u32 *insns, int ninsns) +int patch_text(void *addr, u32 *insns, size_t len) { int ret; struct patch_insn patch = { .addr = addr, .insns = insns, - .ninsns = ninsns, + .len = len, .cpu_count = ATOMIC_INIT(0), }; diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 3348a61de7d9..b465bc9eb870 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -6,37 +6,9 @@ #include <asm/stacktrace.h> -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, - unsigned long fp, unsigned long reg_ra) +static bool fill_callchain(void *entry, unsigned long pc) { - struct stackframe buftail; - unsigned long ra = 0; - unsigned long __user *user_frame_tail = - (unsigned long __user *)(fp - sizeof(struct stackframe)); - - /* Check accessibility of one struct frame_tail beyond */ - if (!access_ok(user_frame_tail, sizeof(buftail))) - return 0; - if (__copy_from_user_inatomic(&buftail, user_frame_tail, - sizeof(buftail))) - return 0; - - if (reg_ra != 0) - ra = reg_ra; - else - ra = buftail.ra; - - fp = buftail.fp; - if (ra != 0) - perf_callchain_store(entry, ra); - else - return 0; - - return fp; + return perf_callchain_store(entry, pc) == 0; } /* @@ -56,23 +28,21 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - unsigned long fp = 0; + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } - fp = regs->s0; - perf_callchain_store(entry, regs->epc); - - fp = user_backtrace(entry, fp, regs->ra); - while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) - fp = user_backtrace(entry, fp, 0); -} - -static bool fill_callchain(void *entry, unsigned long pc) -{ - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(fill_callchain, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + if (perf_guest_state()) { + /* TODO: We don't support guest os callchain now */ + return; + } + walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index 07915dc9279e..81d69d45c06c 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -5,20 +5,23 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -include $(srctree)/include/linux/hidden.h \ -D__DISABLE_EXPORTS -ffreestanding \ -fno-asynchronous-unwind-tables -fno-unwind-tables \ $(call cc-option,-fno-addrsig) +# Disable LTO +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) + KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY +CFLAGS_fdt_early.o += -D__NO_FORTIFY +# lib/string.c already defines __NO_FORTIFY +CFLAGS_ctype.o += -D__NO_FORTIFY +CFLAGS_lib-fdt.o += -D__NO_FORTIFY CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY - -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +CFLAGS_archrandom_early.o += -D__NO_FORTIFY $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ --remove-section=.note.gnu.property \ @@ -35,5 +38,5 @@ $(obj)/string.o: $(srctree)/lib/string.c FORCE $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE $(call if_changed_rule,cc_o_c) -obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o archrandom_early.pi.o extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/riscv/kernel/pi/archrandom_early.c b/arch/riscv/kernel/pi/archrandom_early.c new file mode 100644 index 000000000000..3f05d3cf3b7b --- /dev/null +++ b/arch/riscv/kernel/pi/archrandom_early.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/csr.h> +#include <linux/processor.h> + +#include "pi.h" + +/* + * To avoid rewriting code include asm/archrandom.h and create macros + * for the functions that won't be included. + */ +#undef riscv_has_extension_unlikely +#define riscv_has_extension_likely(...) false +#undef pr_err_once +#define pr_err_once(...) + +#include <asm/archrandom.h> + +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa) +{ + unsigned long seed = 0; + + if (!fdt_early_match_extension_isa((const void *)dtb_pa, "zkr")) + return 0; + + if (!csr_seed_long(&seed)) + return 0; + + return seed; +} diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c index f6d4dedffb84..fbcdc9e4e143 100644 --- a/arch/riscv/kernel/pi/cmdline_early.c +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -6,15 +6,9 @@ #include <asm/pgtable.h> #include <asm/setup.h> -static char early_cmdline[COMMAND_LINE_SIZE]; +#include "pi.h" -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); -bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +static char early_cmdline[COMMAND_LINE_SIZE]; static char *get_early_cmdline(uintptr_t dtb_pa) { diff --git a/arch/riscv/kernel/pi/fdt_early.c b/arch/riscv/kernel/pi/fdt_early.c index 899610e042ab..9bdee2fafe47 100644 --- a/arch/riscv/kernel/pi/fdt_early.c +++ b/arch/riscv/kernel/pi/fdt_early.c @@ -2,13 +2,9 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/libfdt.h> +#include <linux/ctype.h> -/* - * Declare the functions that are exported (but prefixed) here so that LLVM - * does not complain it lacks the 'static' keyword (which, if added, makes - * LLVM complain because the function is actually unused in this file). - */ -u64 get_kaslr_seed(uintptr_t dtb_pa); +#include "pi.h" u64 get_kaslr_seed(uintptr_t dtb_pa) { @@ -28,3 +24,162 @@ u64 get_kaslr_seed(uintptr_t dtb_pa) *prop = 0; return ret; } + +/** + * fdt_device_is_available - check if a device is available for use + * + * @fdt: pointer to the device tree blob + * @node: offset of the node whose property to find + * + * Returns true if the status property is absent or set to "okay" or "ok", + * false otherwise + */ +static bool fdt_device_is_available(const void *fdt, int node) +{ + const char *status; + int statlen; + + status = fdt_getprop(fdt, node, "status", &statlen); + if (!status) + return true; + + if (statlen > 0) { + if (!strcmp(status, "okay") || !strcmp(status, "ok")) + return true; + } + + return false; +} + +/* Copy of fdt_nodename_eq_ */ +static int fdt_node_name_eq(const void *fdt, int offset, + const char *s) +{ + int olen; + int len = strlen(s); + const char *p = fdt_get_name(fdt, offset, &olen); + + if (!p || olen < len) + /* short match */ + return 0; + + if (memcmp(p, s, len) != 0) + return 0; + + if (p[len] == '\0') + return 1; + else if (!memchr(s, '@', len) && (p[len] == '@')) + return 1; + else + return 0; +} + +/** + * isa_string_contains - check if isa string contains an extension + * + * @isa_str: isa string to search + * @ext_name: the extension to search for + * + * Returns true if the extension is in the given isa string, + * false otherwise + */ +static bool isa_string_contains(const char *isa_str, const char *ext_name) +{ + size_t i, single_end, len = strlen(ext_name); + char ext_end; + + /* Error must contain rv32/64 */ + if (strlen(isa_str) < 4) + return false; + + if (len == 1) { + single_end = strcspn(isa_str, "sSxXzZ"); + /* Search for single chars between rv32/64 and multi-letter extensions */ + for (i = 4; i < single_end; i++) { + if (tolower(isa_str[i]) == ext_name[0]) + return true; + } + return false; + } + + /* Skip to start of multi-letter extensions */ + isa_str = strpbrk(isa_str, "sSxXzZ"); + while (isa_str) { + if (strncasecmp(isa_str, ext_name, len) == 0) { + ext_end = isa_str[len]; + /* Check if matches the whole extension. */ + if (ext_end == '\0' || ext_end == '_') + return true; + } + /* Multi-letter extensions must be split from other multi-letter + * extensions with an "_", the end of a multi-letter extension will + * either be the null character or the "_" at the start of the next + * multi-letter extension. + */ + isa_str = strchr(isa_str, '_'); + if (isa_str) + isa_str++; + } + + return false; +} + +/** + * early_cpu_isa_ext_available - check if cpu node has an extension + * + * @fdt: pointer to the device tree blob + * @node: offset of the cpu node + * @ext_name: the extension to search for + * + * Returns true if the cpu node has the extension, + * false otherwise + */ +static bool early_cpu_isa_ext_available(const void *fdt, int node, const char *ext_name) +{ + const void *prop; + int len; + + prop = fdt_getprop(fdt, node, "riscv,isa-extensions", &len); + if (prop && fdt_stringlist_contains(prop, len, ext_name)) + return true; + + prop = fdt_getprop(fdt, node, "riscv,isa", &len); + if (prop && isa_string_contains(prop, ext_name)) + return true; + + return false; +} + +/** + * fdt_early_match_extension_isa - check if all cpu nodes have an extension + * + * @fdt: pointer to the device tree blob + * @ext_name: the extension to search for + * + * Returns true if the all available the cpu nodes have the extension, + * false otherwise + */ +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name) +{ + int node, parent; + bool ret = false; + + parent = fdt_path_offset(fdt, "/cpus"); + if (parent < 0) + return false; + + fdt_for_each_subnode(node, fdt, parent) { + if (!fdt_node_name_eq(fdt, node, "cpu")) + continue; + + if (!fdt_device_is_available(fdt, node)) + continue; + + if (!early_cpu_isa_ext_available(fdt, node, ext_name)) + return false; + + ret = true; + } + + return ret; +} diff --git a/arch/riscv/kernel/pi/pi.h b/arch/riscv/kernel/pi/pi.h new file mode 100644 index 000000000000..21141d84fea6 --- /dev/null +++ b/arch/riscv/kernel/pi/pi.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _RISCV_PI_H_ +#define _RISCV_PI_H_ + +#include <linux/types.h> + +/* + * The following functions are exported (but prefixed). Declare them here so + * that LLVM does not complain it lacks the 'static' keyword (which, if + * added, makes LLVM complain because the function is unused). + */ + +u64 get_kaslr_seed(uintptr_t dtb_pa); +u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa); +bool set_nokaslr_from_cmdline(uintptr_t dtb_pa); +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); + +bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name); + +#endif /* _RISCV_PI_H_ */ diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 8265ff497977..d2129f2c61b8 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c deleted file mode 100644 index 7142ec42e889..000000000000 --- a/arch/riscv/kernel/probes/ftrace.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/kprobes.h> - -/* Ftrace callback handler for kprobes -- called under preepmt disabled */ -void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *fregs) -{ - struct kprobe *p; - struct pt_regs *regs; - struct kprobe_ctlblk *kcb; - int bit; - - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - return; - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto out; - - regs = ftrace_get_regs(fregs); - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - unsigned long orig_ip = instruction_pointer(regs); - - instruction_pointer_set(regs, ip); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) { - /* - * Emulate singlestep (and also recover regs->pc) - * as if there is a nop - */ - instruction_pointer_set(regs, - (unsigned long)p->addr + MCOUNT_INSN_SIZE); - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - instruction_pointer_set(regs, orig_ip); - } - - /* - * If pre_handler returns !0, it changes regs->pc. We have to - * skip emulating post_handler. - */ - __this_cpu_write(current_kprobe, NULL); - } -out: - ftrace_test_recursion_unlock(bit); -} -NOKPROBE_SYMBOL(kprobe_ftrace_handler); - -int arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.api.insn = NULL; - return 0; -} diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d..c0738d6c6498 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -6,12 +6,13 @@ #include <linux/extable.h> #include <linux/slab.h> #include <linux/stop_machine.h> +#include <linux/vmalloc.h> #include <asm/ptrace.h> #include <linux/uaccess.h> #include <asm/sections.h> #include <asm/cacheflush.h> #include <asm/bug.h> -#include <asm/patch.h> +#include <asm/text-patching.h> #include "decode-insn.h" @@ -23,14 +24,13 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + size_t len = GET_INSN_LENGTH(p->opcode); u32 insn = __BUG_INSN_32; - unsigned long offset = GET_INSN_LENGTH(p->opcode); - p->ainsn.api.restore = (unsigned long)p->addr + offset; + p->ainsn.api.restore = (unsigned long)p->addr + len; - patch_text(p->ainsn.api.insn, &p->opcode, 1); - patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset), - &insn, 1); + patch_text_nosync(p->ainsn.api.insn, &p->opcode, len); + patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn)); } static void __kprobes arch_prepare_simulate(struct kprobe *p) @@ -104,29 +104,21 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_MMU -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_READ_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ? - __BUG_INSN_32 : __BUG_INSN_16; + size_t len = GET_INSN_LENGTH(p->opcode); + u32 insn = len == 4 ? __BUG_INSN_32 : __BUG_INSN_16; - patch_text(p->addr, &insn, 1); + patch_text(p->addr, &insn, len); } /* remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, &p->opcode, 1); + size_t len = GET_INSN_LENGTH(p->opcode); + + patch_text(p->addr, &p->opcode, len); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77..cc15f7ca6cc1 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..a0a40889d79a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 SiFive */ +#include <linux/bitfield.h> #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -15,7 +16,10 @@ #include <linux/tick.h> #include <linux/ptrace.h> #include <linux/uaccess.h> +#include <linux/personality.h> +#include <linux/entry-common.h> +#include <asm/asm-prototypes.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/csr.h> @@ -26,8 +30,7 @@ #include <asm/cpuidle.h> #include <asm/vector.h> #include <asm/cpufeature.h> - -register unsigned long gp_in_global __asm__("gp"); +#include <asm/exec.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> @@ -35,9 +38,10 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -extern asmlinkage void ret_from_fork(void); +extern asmlinkage void ret_from_fork_kernel_asm(void); +extern asmlinkage void ret_from_fork_user_asm(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -56,7 +60,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) if (!unaligned_ctl_available()) return -EINVAL; - return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); + return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr); } void __show_regs(struct pt_regs *regs) @@ -101,6 +105,13 @@ void show_regs(struct pt_regs *regs) dump_backtrace(regs, NULL, KERN_DEFAULT); } +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_u32_below(PAGE_SIZE); + return sp & ~0xf; +} + #ifdef CONFIG_COMPAT static bool compat_mode_supported __read_mostly; @@ -173,12 +184,16 @@ void flush_thread(void) memset(¤t->thread.vstate, 0, sizeof(struct __riscv_v_ext_state)); clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE); #endif +#ifdef CONFIG_RISCV_ISA_SUPM + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0); +#endif } void arch_release_task_struct(struct task_struct *tsk) { /* Free the vector context of datap. */ - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_free(tsk); } @@ -194,6 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) +{ + fn(fn_arg); + + syscall_exit_to_user_mode(regs); +} + +asmlinkage void ret_from_fork_user(struct pt_regs *regs) +{ + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -201,18 +228,22 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + /* Ensure all threads in this mm have the same pointer masking mode. */ + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM)) + set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags); + memset(&p->thread.s, 0, sizeof(p->thread.s)); /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; + p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; } else { *childregs = *(current_pt_regs()); /* Turn off status.VS */ @@ -222,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.s[0] = 0; + p->thread.ra = (unsigned long)ret_from_fork_user_asm; } p->thread.riscv_v_flags = 0; - if (has_vector()) + if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); - p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } @@ -236,3 +266,154 @@ void __init arch_task_cache_init(void) { riscv_v_setup_ctx_cache(); } + +#ifdef CONFIG_RISCV_ISA_SUPM +enum { + PMLEN_0 = 0, + PMLEN_7 = 7, + PMLEN_16 = 16, +}; + +static bool have_user_pmlen_7; +static bool have_user_pmlen_16; + +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) +{ + unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + struct mm_struct *mm = task->mm; + unsigned long pmm; + u8 pmlen; + + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + + if (is_compat_thread(ti)) + return -EINVAL; + + if (arg & ~valid_mask) + return -EINVAL; + + /* + * Prefer the smallest PMLEN that satisfies the user's request, + * in case choosing a larger PMLEN has a performance impact. + */ + pmlen = FIELD_GET(PR_PMLEN_MASK, arg); + if (pmlen == PMLEN_0) { + pmm = ENVCFG_PMM_PMLEN_0; + } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) { + pmlen = PMLEN_7; + pmm = ENVCFG_PMM_PMLEN_7; + } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) { + pmlen = PMLEN_16; + pmm = ENVCFG_PMM_PMLEN_16; + } else { + return -EINVAL; + } + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled, if pointer masking + * is disabled for userspace. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen)) + return -EINVAL; + + if (!(arg & PR_TAGGED_ADDR_ENABLE)) + pmlen = PMLEN_0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) { + mmap_write_unlock(mm); + return -EBUSY; + } + + envcfg_update_bits(task, ENVCFG_PMM, pmm); + mm->context.pmlen = pmlen; + + mmap_write_unlock(mm); + + return 0; +} + +long get_tagged_addr_ctrl(struct task_struct *task) +{ + struct thread_info *ti = task_thread_info(task); + long ret = 0; + + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + + if (is_compat_thread(ti)) + return -EINVAL; + + /* + * The mm context's pmlen is set only when the tagged address ABI is + * enabled, so the effective PMLEN must be extracted from envcfg.PMM. + */ + switch (task->thread.envcfg & ENVCFG_PMM) { + case ENVCFG_PMM_PMLEN_7: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7); + break; + case ENVCFG_PMM_PMLEN_16: + ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16); + break; + } + + if (task->mm->context.pmlen) + ret |= PR_TAGGED_ADDR_ENABLE; + + return ret; +} + +static bool try_to_set_pmm(unsigned long value) +{ + csr_set(CSR_ENVCFG, value); + return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ + +static const struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static int __init tagged_addr_init(void) +{ + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return 0; + + /* + * envcfg.PMM is a WARL field. Detect which values are supported. + * Assume the supported PMLEN values are the same on all harts. + */ + csr_clear(CSR_ENVCFG, ENVCFG_PMM); + have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7); + have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16); + + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + + return 0; +} +core_initcall(tagged_addr_init); +#endif /* CONFIG_RISCV_ISA_SUPM */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index e8515aa9d80b..ea67e9fb7a58 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -28,6 +28,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_V REGSET_V, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + REGSET_TAGGED_ADDR_CTRL, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_ISA_SUPM +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_vr_set, }, #endif +#ifdef CONFIG_RISCV_ISA_SUPM + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view riscv_user_native_view = { @@ -377,14 +419,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, return ret; } +#else +static const struct user_regset_view compat_riscv_user_native_view = {}; #endif /* CONFIG_COMPAT */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (is_compat_thread(&task->thread_info)) return &compat_riscv_user_native_view; else -#endif return &riscv_user_native_view; } diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index a72879b4249a..5ab1c7e1a6ed 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,9 +12,6 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include <linux/irqdomain.h> #include <asm/sbi.h> +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -68,10 +71,16 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index e66e0999a800..53836a9235e3 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -24,51 +24,6 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - struct sbiret ret; - - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); - register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); - register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); - register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); - register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); - asm volatile ("ecall" - : "+r" (a0), "+r" (a1) - : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) - : "memory"); - ret.error = a0; - ret.value = a1; - - return ret; -} -EXPORT_SYMBOL(sbi_ecall); - -int sbi_err_map_linux_errno(int err) -{ - switch (err) { - case SBI_SUCCESS: - return 0; - case SBI_ERR_DENIED: - return -EPERM; - case SBI_ERR_INVALID_PARAM: - return -EINVAL; - case SBI_ERR_INVALID_ADDRESS: - return -EFAULT; - case SBI_ERR_NOT_SUPPORTED: - case SBI_ERR_FAILURE: - default: - return -ENOTSUPP; - }; -} -EXPORT_SYMBOL(sbi_err_map_linux_errno); - #ifdef CONFIG_RISCV_SBI_V01 static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) { @@ -344,6 +299,76 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, return 0; } +static bool sbi_fwft_supported; + +struct fwft_set_req { + u32 feature; + unsigned long value; + unsigned long flags; + atomic_t error; +}; + +static void cpu_sbi_fwft_set(void *arg) +{ + struct fwft_set_req *req = arg; + int ret; + + ret = sbi_fwft_set(req->feature, req->value, req->flags); + if (ret) + atomic_set(&req->error, ret); +} + +/** + * sbi_fwft_set() - Set a feature on the local hart + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) +{ + struct sbiret ret; + + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + + ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET, + feature, value, flags, 0, 0, 0); + + return sbi_err_map_linux_errno(ret.error); +} + +/** + * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask + * @mask: CPU mask of cpus that need the feature to be set + * @feature: The feature ID to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature, + unsigned long value, unsigned long flags) +{ + struct fwft_set_req req = { + .feature = feature, + .value = value, + .flags = flags, + .error = ATOMIC_INIT(0), + }; + + if (!sbi_fwft_supported) + return -EOPNOTSUPP; + + if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT) + return -EINVAL; + + on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1); + + return atomic_read(&req.error); +} + /** * sbi_set_timer() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. @@ -528,17 +553,6 @@ long sbi_probe_extension(int extid) } EXPORT_SYMBOL(sbi_probe_extension); -static long __sbi_base_ecall(int fid) -{ - struct sbiret ret; - - ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); - if (!ret.error) - return ret.value; - else - return sbi_err_map_linux_errno(ret.error); -} - static inline long sbi_get_spec_version(void) { return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); @@ -665,7 +679,7 @@ void __init sbi_init(void) } else { __sbi_rfence = __sbi_rfence_v01; } - if ((sbi_spec_version >= sbi_mk_version(0, 3)) && + if (sbi_spec_version >= sbi_mk_version(0, 3) && sbi_probe_extension(SBI_EXT_SRST)) { pr_info("SBI SRST extension detected\n"); pm_power_off = sbi_srst_power_off; @@ -673,11 +687,16 @@ void __init sbi_init(void) sbi_srst_reboot_nb.priority = 192; register_restart_handler(&sbi_srst_reboot_nb); } - if ((sbi_spec_version >= sbi_mk_version(2, 0)) && - (sbi_probe_extension(SBI_EXT_DBCN) > 0)) { + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_DBCN) > 0) { pr_info("SBI DBCN extension detected\n"); sbi_debug_console_available = true; } + if (sbi_spec_version >= sbi_mk_version(3, 0) && + sbi_probe_extension(SBI_EXT_FWFT)) { + pr_info("SBI FWFT extension detected\n"); + sbi_fwft_supported = true; + } } else { __sbi_set_timer = __sbi_set_timer_v01; __sbi_send_ipi = __sbi_send_ipi_v01; diff --git a/arch/riscv/kernel/sbi_ecall.c b/arch/riscv/kernel/sbi_ecall.c new file mode 100644 index 000000000000..24aabb4fbde3 --- /dev/null +++ b/arch/riscv/kernel/sbi_ecall.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Rivos Inc. */ + +#include <asm/sbi.h> +#define CREATE_TRACE_POINTS +#include <asm/trace.h> + +long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} +EXPORT_SYMBOL(__sbi_base_ecall); + +struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, + int fid, int ext) +{ + struct sbiret ret; + + trace_sbi_call(ext, fid); + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + trace_sbi_return(ext, ret.error, ret.value); + + return ret; +} +EXPORT_SYMBOL(__sbi_ecall); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4f73c0ae44b2..f7c9a1caa83e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -66,6 +66,9 @@ static struct resource bss_res = { .name = "Kernel bss", }; static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; #endif +static int num_standard_resources; +static struct resource *standard_resources; + static int __init add_resource(struct resource *parent, struct resource *res) { @@ -139,7 +142,7 @@ static void __init init_resources(void) struct resource *res = NULL; struct resource *mem_res = NULL; size_t mem_res_sz = 0; - int num_resources = 0, res_idx = 0; + int num_resources = 0, res_idx = 0, non_resv_res = 0; int ret = 0; /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ @@ -147,9 +150,7 @@ static void __init init_resources(void) res_idx = num_resources - 1; mem_res_sz = num_resources * sizeof(*mem_res); - mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); - if (!mem_res) - panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + mem_res = memblock_alloc_or_panic(mem_res_sz, SMP_CACHE_BYTES); /* * Start by adding the reserved regions, if they overlap @@ -195,6 +196,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { res = &mem_res[res_idx--]; + non_resv_res++; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -212,6 +214,9 @@ static void __init init_resources(void) goto error; } + num_standard_resources = non_resv_res; + standard_resources = &mem_res[res_idx + 1]; + /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); @@ -223,11 +228,38 @@ static void __init init_resources(void) memblock_free(mem_res, mem_res_sz); } +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; i++) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va)) { + if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { const char *name = of_flat_dt_get_machine_name(); if (name) { @@ -237,11 +269,42 @@ static void __init parse_dtb(void) } else { pr_err("No DTB passed to the kernel\n"); } +} + +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) +DEFINE_STATIC_KEY_TRUE(qspinlock_key); +EXPORT_SYMBOL(qspinlock_key); +#endif + +static void __init riscv_spinlock_init(void) +{ + char *using_ext = NULL; + + if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) { + pr_info("Ticket spinlock: enabled\n"); + return; + } -#ifdef CONFIG_CMDLINE_FORCE - strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); - pr_info("Forcing kernel command line to: %s\n", boot_command_line); + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) { + using_ext = "using Ziccrse"; + } +#if defined(CONFIG_RISCV_COMBO_SPINLOCKS) + else { + static_branch_disable(&qspinlock_key); + pr_info("Ticket spinlock: enabled\n"); + return; + } #endif + + if (!using_ext) + pr_err("Queued spinlock without Zabha or Ziccrse"); + else + pr_info("Queued spinlock %s: enabled\n", using_ext); } extern void __init init_rt_signal_env(void); @@ -281,13 +344,15 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - if (!acpi_disabled) + if (!acpi_disabled) { acpi_init_rintc_map(); + acpi_map_cpus_to_nodes(); + } riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) @@ -295,6 +360,7 @@ void __init setup_arch(char **cmdline_p) riscv_set_dma_cache_alignment(); riscv_user_isa_enable(); + riscv_spinlock_init(); } bool arch_cpu_is_hotpluggable(int cpu) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..08378fea3a11 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) datap = state + 1; /* datap is designed to be 16 byte aligned for better performance */ - WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16))); + WARN_ON(!IS_ALIGNED((unsigned long)datap, 16)); get_cpu_vector_context(); riscv_v_vstate_save(¤t->thread.vstate, regs); @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) @@ -188,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs, return 0; case RISCV_V_MAGIC: - if (!has_vector() || !riscv_v_vstate_query(regs) || + if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) || size != riscv_v_sc_size) return -EINVAL; @@ -210,16 +211,10 @@ static size_t get_rt_frame_size(bool cal_all) frame_size = sizeof(*frame); - if (has_vector()) { + if (has_vector() || has_xtheadvector()) { if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; @@ -283,7 +278,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); /* Save the vector state. */ - if (has_vector() && riscv_v_vstate_query(regs)) + if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs)) err |= save_v_state(regs, (void __user **)&sc_ext_ptr); /* Write zero to fp-reserved space and check it on restore_sigcontext */ err |= __put_user(0, &sc->sc_extdesc.reserved); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 45dd4035416e..e650dec44817 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -13,6 +13,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/kgdb.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/smp.h> @@ -21,6 +22,7 @@ #include <linux/delay.h> #include <linux/irq.h> #include <linux/irq_work.h> +#include <linux/nmi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -33,16 +35,21 @@ enum ipi_message_type { IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, + IPI_CPU_BACKTRACE, + IPI_KGDB_ROUNDUP, IPI_MAX }; unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; +EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map); void __init smp_setup_processor_id(void) { cpuid_to_hartid_map(0) = boot_cpu_hartid; + + pr_info("Booting Linux on hartid %lu\n", boot_cpu_hartid); } static DEFINE_PER_CPU_READ_MOSTLY(int, ipi_dummy_dev); @@ -113,6 +120,7 @@ void arch_irq_work_raise(void) static irqreturn_t handle_IPI(int irq, void *data) { + unsigned int cpu = smp_processor_id(); int ipi = irq - ipi_virq_base; switch (ipi) { @@ -126,7 +134,7 @@ static irqreturn_t handle_IPI(int irq, void *data) ipi_stop(); break; case IPI_CPU_CRASH_STOP: - ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs()); + ipi_cpu_crash_stop(cpu, get_irq_regs()); break; case IPI_IRQ_WORK: irq_work_run(); @@ -136,8 +144,14 @@ static irqreturn_t handle_IPI(int irq, void *data) tick_receive_broadcast(); break; #endif + case IPI_CPU_BACKTRACE: + nmi_cpu_backtrace(get_irq_regs()); + break; + case IPI_KGDB_ROUNDUP: + kgdb_nmicallback(cpu, get_irq_regs()); + break; default: - pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi); + pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi); break; } @@ -171,10 +185,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +208,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { @@ -212,6 +217,8 @@ static const char * const ipi_names[] = { [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", }; void show_ipi_stats(struct seq_file *p, int prec) @@ -332,3 +339,29 @@ void arch_smp_send_reschedule(int cpu) send_ipi_single(cpu, IPI_RESCHEDULE); } EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); + +static void riscv_backtrace_ipi(cpumask_t *mask) +{ + send_ipi_mask(mask, IPI_CPU_BACKTRACE); +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + int this_cpu = raw_smp_processor_id(); + int cpu; + + for_each_online_cpu(cpu) { + /* No need to roundup ourselves */ + if (cpu == this_cpu) + continue; + + send_ipi_single(cpu, IPI_KGDB_ROUNDUP); + } +} +#endif diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index c4ed7d977f57..601a321e0f17 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -26,9 +26,8 @@ #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> -#include <asm/cpufeature.h> +#include <asm/cacheflush.h> #include <asm/cpu_ops.h> -#include <asm/cpufeature.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/numa.h> @@ -97,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un if (hart == cpuid_to_hartid_map(0)) { BUG_ON(found_boot_cpu); found_boot_cpu = true; - early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count)); return 0; } @@ -107,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un } cpuid_to_hartid_map(cpu_count) = hart; - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count)); cpu_count++; return 0; @@ -215,6 +212,15 @@ asmlinkage __visible void smp_callin(void) struct mm_struct *mm = &init_mm; unsigned int curr_cpuid = smp_processor_id(); + if (has_vector()) { + /* + * Return as early as possible so the hart with a mismatching + * vlen won't boot. + */ + if (riscv_v_setup_vsize()) + return; + } + /* All kernel threads share the same mm context. */ mmgrab(mm); current->active_mm = mm; @@ -225,19 +231,17 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); - if (has_vector()) { - if (riscv_v_setup_vsize()) - elf_hwcap &= ~COMPAT_HWCAP_ISA_V; - } + pr_debug("CPU%u: Booted secondary hartid %lu\n", curr_cpuid, + cpuid_to_hartid_map(curr_cpuid)); - riscv_user_isa_enable(); + set_cpu_online(curr_cpuid, true); /* - * Remote TLB flushes are ignored while the CPU is offline, so emit - * a local TLB flush right now just in case. + * Remote cache and TLB flushes are ignored while the CPU is offline, + * so flush them both right now just in case. */ + local_flush_icache_all(); local_flush_tlb_all(); complete(&cpu_running); /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 64a9c093aef9..3fe9e6edef8f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,12 +16,24 @@ #ifdef CONFIG_FRAME_POINTER -extern asmlinkage void ret_from_exception(void); +extern asmlinkage void handle_exception(void); +extern unsigned long ret_from_exception_end; + +static inline int fp_is_valid(unsigned long fp, unsigned long sp) +{ + unsigned long low, high; + + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + + return !(fp < low || fp > high || fp & 0x07); +} void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -41,29 +53,28 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } for (;;) { - unsigned long low, high; struct stackframe *frame; if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; - /* Validate frame pointer */ - low = sp + sizeof(struct stackframe); - high = ALIGN(sp, THREAD_SIZE); - if (unlikely(fp < low || fp > high || fp & 0x7)) + if (unlikely(!fp_is_valid(fp, sp))) break; + /* Unwind stack frame */ frame = (struct stackframe *)fp - 1; sp = fp; - if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) { + /* We hit function where ra is not saved on the stack */ fp = frame->ra; pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); - if (pc == (unsigned long)ret_from_exception) { - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (pc >= (unsigned long)handle_exception && + pc < (unsigned long)&ret_from_exception_end) { + if (unlikely(!fn(arg, pc))) break; pc = ((struct pt_regs *)sp)->epc; @@ -148,8 +159,51 @@ unsigned long __get_wchan(struct task_struct *task) return pc; } -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); } + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long unwind_user_frame(stack_trace_consume_fn consume_entry, + void *cookie, unsigned long fp, + unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + ra = reg_ra ? : buftail.ra; + + fp = buftail.fp; + if (!ra || !consume_entry(cookie, ra)) + return 0; + + return fp; +} + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + unsigned long fp = 0; + + fp = regs->s0; + if (!consume_entry(cookie, regs->epc)) + return; + + fp = unwind_user_frame(consume_entry, cookie, fp, regs->ra); + while (fp && !(fp & 0x7)) + fp = unwind_user_frame(consume_entry, cookie, fp, 0); +} diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 299795341e8a..24b3f57d467f 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -14,8 +14,7 @@ void suspend_save_csrs(struct suspend_context *context) { - context->scratch = csr_read(CSR_SCRATCH); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -31,19 +30,33 @@ void suspend_save_csrs(struct suspend_context *context) */ #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + context->stimecmp = csr_read(CSR_STIMECMP); +#if __riscv_xlen < 64 + context->stimecmph = csr_read(CSR_STIMECMPH); +#endif + } + context->satp = csr_read(CSR_SATP); #endif } void suspend_restore_csrs(struct suspend_context *context) { - csr_write(CSR_SCRATCH, context->scratch); - if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + csr_write(CSR_SCRATCH, 0); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG)) csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); #ifdef CONFIG_MMU + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) { + csr_write(CSR_STIMECMP, context->stimecmp); +#if __riscv_xlen < 64 + csr_write(CSR_STIMECMPH, context->stimecmph); +#endif + } + csr_write(CSR_SATP, context->satp); #endif } @@ -132,4 +145,53 @@ static int __init sbi_system_suspend_init(void) } arch_initcall(sbi_system_suspend_init); + +static int sbi_suspend_finisher(unsigned long suspend_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND, + suspend_type, resume_addr, opaque, 0, 0, 0); + + return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0; +} + +int riscv_sbi_hart_suspend(u32 state) +{ + if (state & SBI_HSM_SUSP_NON_RET_BIT) + return cpu_suspend(state, sbi_suspend_finisher); + else + return sbi_suspend_finisher(state, 0, 0); +} + +bool riscv_sbi_suspend_state_is_valid(u32 state) +{ + if (state > SBI_HSM_SUSPEND_RET_DEFAULT && + state < SBI_HSM_SUSPEND_RET_PLATFORM) + return false; + + if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT && + state < SBI_HSM_SUSPEND_NON_RET_PLATFORM) + return false; + + return true; +} + +bool riscv_sbi_hsm_is_supported(void) +{ + /* + * The SBI HSM suspend function is only available when: + * 1) SBI version is 0.3 or higher + * 2) SBI HSM extension is available + */ + if (sbi_spec_version < sbi_mk_version(0, 3) || + !sbi_probe_extension(SBI_EXT_HSM)) { + pr_info("HSM suspend not available\n"); + return false; + } + + return true; +} #endif /* CONFIG_RISCV_SBI */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index a7c56b41efd2..0b170e18a2be 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -8,11 +8,15 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> +#include <asm/processor.h> +#include <asm/delay.h> #include <asm/sbi.h> #include <asm/switch_to.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/vector.h> +#include <asm/vendor_extensions/sifive_hwprobe.h> +#include <asm/vendor_extensions/thead_hwprobe.h> #include <vdso/vsyscall.h> @@ -69,7 +73,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, if (riscv_isa_extension_available(NULL, c)) pair->value |= RISCV_HWPROBE_IMA_C; - if (has_vector()) + if (has_vector() && riscv_isa_extension_available(NULL, v)) pair->value |= RISCV_HWPROBE_IMA_V; /* @@ -92,29 +96,53 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, * regardless of the kernel's configuration, as no other checks, besides * presence in the hart_isa bitmap, are made. */ + EXT_KEY(ZAAMO); + EXT_KEY(ZABHA); + EXT_KEY(ZACAS); + EXT_KEY(ZALRSC); + EXT_KEY(ZAWRS); EXT_KEY(ZBA); EXT_KEY(ZBB); - EXT_KEY(ZBS); - EXT_KEY(ZICBOZ); EXT_KEY(ZBC); - EXT_KEY(ZBKB); EXT_KEY(ZBKC); EXT_KEY(ZBKX); + EXT_KEY(ZBS); + EXT_KEY(ZCA); + EXT_KEY(ZCB); + EXT_KEY(ZCMOP); + EXT_KEY(ZICBOM); + EXT_KEY(ZICBOZ); + EXT_KEY(ZICNTR); + EXT_KEY(ZICOND); + EXT_KEY(ZIHINTNTL); + EXT_KEY(ZIHINTPAUSE); + EXT_KEY(ZIHPM); + EXT_KEY(ZIMOP); EXT_KEY(ZKND); EXT_KEY(ZKNE); EXT_KEY(ZKNH); EXT_KEY(ZKSED); EXT_KEY(ZKSH); EXT_KEY(ZKT); - EXT_KEY(ZIHINTNTL); EXT_KEY(ZTSO); - EXT_KEY(ZACAS); - EXT_KEY(ZICOND); + /* + * All the following extensions must depend on the kernel + * support of V. + */ if (has_vector()) { EXT_KEY(ZVBB); EXT_KEY(ZVBC); + EXT_KEY(ZVE32F); + EXT_KEY(ZVE32X); + EXT_KEY(ZVE64D); + EXT_KEY(ZVE64F); + EXT_KEY(ZVE64X); + EXT_KEY(ZVFBFMIN); + EXT_KEY(ZVFBFWMA); + EXT_KEY(ZVFH); + EXT_KEY(ZVFHMIN); EXT_KEY(ZVKB); EXT_KEY(ZVKG); EXT_KEY(ZVKNED); @@ -123,15 +151,19 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZVKSED); EXT_KEY(ZVKSH); EXT_KEY(ZVKT); - EXT_KEY(ZVFH); - EXT_KEY(ZVFHMIN); } if (has_fpu()) { + EXT_KEY(ZCD); + EXT_KEY(ZCF); + EXT_KEY(ZFA); + EXT_KEY(ZFBFMIN); EXT_KEY(ZFH); EXT_KEY(ZFHMIN); - EXT_KEY(ZFA); } + + if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM)) + EXT_KEY(SUPM); #undef EXT_KEY } @@ -139,7 +171,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; @@ -147,6 +179,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) return (pair.value & ext); } +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -159,16 +192,65 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } +#else +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; + + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; +} +#endif + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + /* Return if supported or not even if speed wasn't probed */ + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(vector_misaligned_access, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + return perf; +} +#else +static u64 hwprobe_vec_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; +} +#endif static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) @@ -194,14 +276,39 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; + case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF: + pair->value = hwprobe_vec_misaligned(cpus); + break; + case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE: pair->value = 0; if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ)) pair->value = riscv_cboz_block_size; break; + case RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE: + pair->value = 0; + if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM)) + pair->value = riscv_cbom_block_size; + break; + case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS: + pair->value = user_max_virt_addr(); + break; + + case RISCV_HWPROBE_KEY_TIME_CSR_FREQ: + pair->value = riscv_timebase; + break; + + case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0: + hwprobe_isa_vendor_ext_sifive_0(pair, cpus); + break; + + case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0: + hwprobe_isa_vendor_ext_thead_0(pair, cpus); + break; /* * For forward compatibility, unknown keys don't fail the whole @@ -362,8 +469,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, static int __init init_hwprobe_vdso_data(void) { - struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_data *avd = &vd->arch_data; + struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f1c1416a9f1e..d77afe05578f 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -7,7 +7,6 @@ #include <linux/syscalls.h> #include <asm/cacheflush.h> -#include <asm-generic/mman-common.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -24,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, #ifdef CONFIG_64BIT SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); } @@ -33,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, offset) + unsigned long, fd, unsigned long, offset) { /* * Note that the shift for mmap2 is constant (12), diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index dda913764903..6f1a36cb0f3f 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -9,14 +9,16 @@ #include <asm-generic/syscalls.h> #include <asm/syscall.h> +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) + #undef __SYSCALL #define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *); -#include <asm/unistd.h> +#include <asm/syscall_table.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = __riscv_##call, void * const sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall, -#include <asm/unistd.h> +#include <asm/syscall_table.h> }; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a1b9be3c4332..9c83848797a7 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/randomize_kstack.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/signal.h> @@ -34,7 +35,7 @@ int show_unhandled_signals = 1; -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) { @@ -80,7 +81,7 @@ void die(struct pt_regs *regs, const char *str) oops_enter(); - spin_lock_irqsave(&die_lock, flags); + raw_spin_lock_irqsave(&die_lock, flags); console_verbose(); bust_spinlocks(1); @@ -99,7 +100,7 @@ void die(struct pt_regs *regs, const char *str) bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irqrestore(&die_lock, flags); + raw_spin_unlock_irqrestore(&die_lock, flags); oops_exit(); if (in_interrupt()) @@ -121,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); @@ -197,47 +198,57 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) +enum misaligned_access_type { + MISALIGNED_STORE, + MISALIGNED_LOAD, +}; +static const struct { + const char *type_str; + int (*handler)(struct pt_regs *regs); +} misaligned_handler[] = { + [MISALIGNED_STORE] = { + .type_str = "Oops - store (or AMO) address misaligned", + .handler = handle_misaligned_store, + }, + [MISALIGNED_LOAD] = { + .type_str = "Oops - load address misaligned", + .handler = handle_misaligned_load, + }, +}; + +static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type type) { + irqentry_state_t state; + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); + local_irq_enable(); + } else { + state = irqentry_nmi_enter(regs); + } - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); + if (misaligned_handler[type].handler(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + misaligned_handler[type].type_str); + if (user_mode(regs)) { + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); - irqentry_nmi_exit(regs, state); } } -asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (user_mode(regs)) { - irqentry_enter_from_user_mode(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); - - irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); + do_trap_misaligned(regs, MISALIGNED_LOAD); +} - irqentry_nmi_exit(regs, state); - } +asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +{ + do_trap_misaligned(regs, MISALIGNED_STORE); } + DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, @@ -310,22 +321,36 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) } } -asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +asmlinkage __visible __trap_section __no_stack_protector +void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { long syscall = regs->a7; regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); syscall = syscall_enter_from_user_mode(regs, syscall); + add_random_kstack_offset(); + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned + * for RV32I or RV64I. + * + * The resulting 6 bits of entropy is seen in SP[9:4]. + */ + choose_random_kstack_offset(get_random_u16()); syscall_exit_to_user_mode(regs); } else { diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 8ded225e8c5b..dd8e4af6583f 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -16,6 +16,8 @@ #include <asm/entry-common.h> #include <asm/hwprobe.h> #include <asm/cpufeature.h> +#include <asm/sbi.h> +#include <asm/vector.h> #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -87,6 +89,13 @@ #define INSN_MATCH_C_FSWSP 0xe002 #define INSN_MASK_C_FSWSP 0xe003 +#define INSN_MATCH_C_LHU 0x8400 +#define INSN_MASK_C_LHU 0xfc43 +#define INSN_MATCH_C_LH 0x8440 +#define INSN_MASK_C_LH 0xfc43 +#define INSN_MATCH_C_SH 0x8c00 +#define INSN_MASK_C_SH 0xfc43 + #define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) #if defined(CONFIG_64BIT) @@ -136,8 +145,6 @@ #define REG_PTR(insn, pos, regs) \ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) -#define GET_RM(insn) (((insn) >> 12) & 7) - #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) @@ -264,86 +271,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, #define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) #define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) -#ifdef CONFIG_RISCV_M_MODE -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - u8 val; - - asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); - *r_val = val; - - return 0; -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); - - return 0; -} - -static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) -{ - register ulong __mepc asm ("a2") = mepc; - ulong val, rvc_mask = 3, tmp; - - asm ("and %[tmp], %[addr], 2\n" - "bnez %[tmp], 1f\n" -#if defined(CONFIG_64BIT) - __stringify(LWU) " %[insn], (%[addr])\n" -#else - __stringify(LW) " %[insn], (%[addr])\n" -#endif - "and %[tmp], %[insn], %[rvc_mask]\n" - "beq %[tmp], %[rvc_mask], 2f\n" - "sll %[insn], %[insn], %[xlen_minus_16]\n" - "srl %[insn], %[insn], %[xlen_minus_16]\n" - "j 2f\n" - "1:\n" - "lhu %[insn], (%[addr])\n" - "and %[tmp], %[insn], %[rvc_mask]\n" - "bne %[tmp], %[rvc_mask], 2f\n" - "lhu %[tmp], 2(%[addr])\n" - "sll %[tmp], %[tmp], 16\n" - "add %[insn], %[insn], %[tmp]\n" - "2:" - : [insn] "=&r" (val), [tmp] "=&r" (tmp) - : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), - [xlen_minus_16] "i" (XLEN_MINUS_16)); - - *r_insn = val; - - return 0; -} -#else -static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) -{ - if (user_mode(regs)) { - return __get_user(*r_val, (u8 __user *)addr); - } else { - *r_val = *addr; - return 0; - } -} - -static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) -{ - if (user_mode(regs)) { - return __put_user(val, (u8 __user *)addr); - } else { - *addr = val; - return 0; - } -} - -#define __read_insn(regs, insn, insn_addr) \ +#define __read_insn(regs, insn, insn_addr, type) \ ({ \ int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, insn_addr); \ + __ret = get_user(insn, (type __user *) insn_addr); \ } else { \ - insn = *(__force u16 *)insn_addr; \ + insn = *(type *)insn_addr; \ __ret = 0; \ } \ \ @@ -356,9 +291,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) if (epc & 0x2) { ulong tmp = 0; - u16 __user *insn_addr = (u16 __user *)epc; - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u16)) return -EFAULT; /* __get_user() uses regular "lw" which sign extend the loaded * value make sure to clear higher order bits in case we "or" it @@ -369,16 +303,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) *r_insn = insn; return 0; } - insn_addr++; - if (__read_insn(regs, tmp, insn_addr)) + epc += sizeof(u16); + if (__read_insn(regs, tmp, epc, u16)) return -EFAULT; *r_insn = (tmp << 16) | insn; return 0; } else { - u32 __user *insn_addr = (u32 __user *)epc; - - if (__read_insn(regs, insn, insn_addr)) + if (__read_insn(regs, insn, epc, u32)) return -EFAULT; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { *r_insn = insn; @@ -390,7 +322,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) return 0; } } -#endif union reg_data { u8 data_bytes[8]; @@ -398,22 +329,47 @@ union reg_data { u64 data_u64; }; -static bool unaligned_ctl __read_mostly; - /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ -int handle_misaligned_load(struct pt_regs *regs) +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (get_insn(regs, epc, &insn)) + return -1; + + /* Only return 0 when in check_vector_unaligned_access_emulated */ + if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + regs->epc = epc + INSN_LEN(insn); + return 0; + } + + /* If vector instruction we don't emulate it yet */ + regs->epc = epc; + return -1; +} +#else +static int handle_vector_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +#endif + +static int handle_scalar_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, fp = 0, shift = 0, len = 0; + int fp = 0, shift = 0, len = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; if (!unaligned_enabled) return -1; @@ -481,6 +437,13 @@ int handle_misaligned_load(struct pt_regs *regs) fp = 1; len = 4; #endif + } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) { + len = 2; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; } else { regs->epc = epc; return -1; @@ -490,9 +453,11 @@ int handle_misaligned_load(struct pt_regs *regs) return -EOPNOTSUPP; val.data_u64 = 0; - for (i = 0; i < len; i++) { - if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) return -1; + } else { + memcpy(&val, (u8 *)addr, len); } if (!fp) @@ -507,13 +472,13 @@ int handle_misaligned_load(struct pt_regs *regs) return 0; } -int handle_misaligned_store(struct pt_regs *regs) +static int handle_scalar_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, len = 0, fp = 0; + int len = 0, fp = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -578,6 +543,9 @@ int handle_misaligned_store(struct pt_regs *regs) len = 4; val.data_ulong = GET_F32_RS2C(insn, regs); #endif + } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) { + len = 2; + val.data_ulong = GET_RS2S(insn, regs); } else { regs->epc = epc; return -1; @@ -586,9 +554,11 @@ int handle_misaligned_store(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_FPU) && fp) return -EOPNOTSUPP; - for (i = 0; i < len; i++) { - if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + if (user_mode(regs)) { + if (copy_to_user_nofault((u8 __user *)addr, &val, len)) return -1; + } else { + memcpy((u8 *)addr, &val, len); } regs->epc = epc + INSN_LEN(insn); @@ -596,52 +566,225 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } -bool check_unaligned_access_emulated(int cpu) +int handle_misaligned_load(struct pt_regs *regs) +{ + unsigned long epc = regs->epc; + unsigned long insn; + + if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) { + if (get_insn(regs, epc, &insn)) + return -1; + + if (insn_is_vector(insn)) + return handle_vector_misaligned_load(regs); + } + + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_load(regs); + + return -1; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED)) + return handle_scalar_misaligned_store(regs); + + return -1; +} + +#ifdef CONFIG_RISCV_VECTOR_MISALIGNED +void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused) +{ + long *mas_ptr = this_cpu_ptr(&vector_misaligned_access); + unsigned long tmp_var; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + + kernel_vector_begin(); + /* + * In pre-13.0.0 versions of GCC, vector registers cannot appear in + * the clobber list. This inline asm clobbers v0, but since we do not + * currently build the kernel with V enabled, the v0 clobber arg is not + * needed (as the compiler will not emit vector code itself). If the kernel + * is changed to build with V enabled, the clobber arg will need to be + * added here. + */ + __asm__ __volatile__ ( + ".balign 4\n\t" + ".option push\n\t" + ".option arch, +zve32x\n\t" + " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b + " vle16.v v0, (%[ptr])\n\t" // Load bytes + ".option pop\n\t" + : : [ptr] "r" ((u8 *)&tmp_var + 1)); + kernel_vector_end(); +} + +bool __init check_vector_unaligned_access_emulated_all_cpus(void) +{ + int cpu; + + /* + * While being documented as very slow, schedule_on_each_cpu() is used since + * kernel_vector_begin() expects irqs to be enabled or it will panic() + */ + schedule_on_each_cpu(check_vector_unaligned_access_emulated); + + for_each_online_cpu(cpu) + if (per_cpu(vector_misaligned_access, cpu) + == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return false; + + return true; +} +#else +bool __init check_vector_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +#endif + +static bool all_cpus_unaligned_scalar_access_emulated(void) +{ + int cpu; + + for_each_online_cpu(cpu) + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED) + return false; + + return true; +} + +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED + +static bool unaligned_ctl __read_mostly; + +static void check_unaligned_access_emulated(void *arg __always_unused) { + int cpu = smp_processor_id(); long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); unsigned long tmp_var, tmp_val; - bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); +} + +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + + check_unaligned_access_emulated(NULL); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed * when hotplugging the new cpu, this is something we don't handle. */ - if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) { pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); - while (true) - cpu_relax(); + return -EINVAL; } - return misaligned_emu_detected; + return 0; } -void unaligned_emulation_finish(void) +bool __init check_unaligned_access_emulated_all_cpus(void) { - int cpu; - /* * We can only support PR_UNALIGN controls if all CPUs have misaligned * accesses emulated since tasks requesting such control can run on any * CPU. */ - for_each_present_cpu(cpu) { - if (per_cpu(misaligned_access_speed, cpu) != - RISCV_HWPROBE_MISALIGNED_EMULATED) { - return; - } - } + on_each_cpu(check_unaligned_access_emulated, NULL, 1); + + if (!all_cpus_unaligned_scalar_access_emulated()) + return false; + unaligned_ctl = true; + return true; } bool unaligned_ctl_available(void) { return unaligned_ctl; } +#else +bool __init check_unaligned_access_emulated_all_cpus(void) +{ + return false; +} +static int cpu_online_check_unaligned_access_emulated(unsigned int cpu) +{ + return 0; +} +#endif + +static bool misaligned_traps_delegated; + +#ifdef CONFIG_RISCV_SBI + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu) +{ + if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) && + misaligned_traps_delegated) { + pr_crit("Misaligned trap delegation non homogeneous (expected delegated)"); + return -EINVAL; + } + + return 0; +} + +void __init unaligned_access_init(void) +{ + int ret; + + ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0); + if (ret) + return; + + misaligned_traps_delegated = true; + pr_info("SBI misaligned access exception delegation ok\n"); + /* + * Note that we don't have to take any specific action here, if + * the delegation is successful, then + * check_unaligned_access_emulated() will verify that indeed the + * platform traps on misaligned accesses. + */ +} +#else +void __init unaligned_access_init(void) {} + +static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused) +{ + return 0; +} + +#endif + +int cpu_online_unaligned_access_init(unsigned int cpu) +{ + int ret; + + ret = cpu_online_sbi_unaligned_setup(cpu); + if (ret) + return ret; + + return cpu_online_check_unaligned_access_emulated(cpu); +} + +bool misaligned_traps_can_delegate(void) +{ + /* + * Either we successfully requested misaligned traps delegation for all + * CPUs, or the SBI does not implement the FWFT extension but delegated + * the exception by default. + */ + return misaligned_traps_delegated || + all_cpus_unaligned_scalar_access_emulated(); +} +EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate); diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c new file mode 100644 index 000000000000..ae2068425fbc --- /dev/null +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos Inc. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/jump_label.h> +#include <linux/kthread.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/vector.h> + +#include "copy-unaligned.h" + +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + +static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN; + +static cpumask_t fast_misaligned_access; + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static int check_unaligned_access(void *param) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) + return 0; + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + + return 0; +} + +static void __init check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static void __init check_unaligned_access_speed_all_cpus(void) +{ +} +#endif + +DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int __init lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + +static int riscv_online_cpu(unsigned int cpu) +{ + int ret = cpu_online_unaligned_access_init(cpu); + + if (ret) + return ret; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + goto exit; + } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + goto exit; + } + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS + { + static struct page *buf; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + } +#endif + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + + return 0; +} + +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + goto free; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; + +free: + __free_pages(page, MISALIGNED_BUFFER_ORDER); +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + schedule_on_each_cpu(check_vector_unaligned_access); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} +#endif + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; + return 0; + } + +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); +#endif + + return 0; +} + +static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" }; + +static int __init set_unaligned_scalar_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED])) + unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param); + +static int __init set_unaligned_vector_speed_param(char *str) +{ + if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED])) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + else + return -EINVAL; + + return 1; +} +__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param); + +static int __init check_unaligned_access_all_cpus(void) +{ + int cpu; + + unaligned_access_init(); + + if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); + for_each_online_cpu(cpu) + per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } else if (!check_unaligned_access_emulated_all_cpus()) { + check_unaligned_access_speed_all_cpus(); + } + + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + if (!has_vector() && + unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) { + pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n", + speed_str[unaligned_vector_speed_param]); + } else { + pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param); + } + } + + if (!has_vector()) + unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; + + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + for_each_online_cpu(cpu) + per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); + + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 98315b98256d..3a8e038b10a2 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,33 +13,17 @@ #include <linux/err.h> #include <asm/page.h> #include <asm/vdso.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <vdso/vsyscall.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - -enum rv_vdso_map { - RV_VDSO_MAP_VVAR, - RV_VDSO_MAP_VDSO, -}; - -#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) - -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; +#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) struct __vdso_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; unsigned long vdso_pages; - /* Data Mapping */ - struct vm_special_mapping *dm; /* Code Mapping */ struct vm_special_mapping *cm; }; @@ -86,112 +70,29 @@ static void __init __vdso_init(struct __vdso_info *vdso_info) vdso_info->cm->pages = vdso_pagelist; } -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, vdso_info.dm)) - zap_vma_pages(vma); -#ifdef CONFIG_COMPAT - if (vma_is_special_mapping(vma, compat_vdso_info.dm)) - zap_vma_pages(vma); -#endif - } - - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = sym_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static struct vm_special_mapping rv_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info vdso_info __ro_after_init = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, - .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_vdso_map, }; #ifdef CONFIG_COMPAT -static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = { - [RV_VDSO_MAP_VVAR] = { - .name = "[vvar]", - .fault = vvar_fault, - }, - [RV_VDSO_MAP_VDSO] = { - .name = "[vdso]", - .mremap = vdso_mremap, - }, +static struct vm_special_mapping rv_compat_vdso_map __ro_after_init = { + .name = "[vdso]", + .mremap = vdso_mremap, }; static struct __vdso_info compat_vdso_info __ro_after_init = { .name = "compat_vdso", .vdso_code_start = compat_vdso_start, .vdso_code_end = compat_vdso_end, - .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR], - .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO], + .cm = &rv_compat_vdso_map, }; #endif @@ -214,7 +115,7 @@ static int __setup_additional_pages(struct mm_struct *mm, unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -226,8 +127,7 @@ static int __setup_additional_pages(struct mm_struct *mm, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm); + ret = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(ret)) goto up_fail; @@ -236,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm, ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP), vdso_info->cm); if (IS_ERR(ret)) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..9ebb5e590f93 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -2,7 +2,7 @@ # Copied from arch/tile/kernel/vdso/Makefile # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT @@ -13,16 +13,29 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe +ifdef CONFIG_VDSO_GETRANDOM +vdso-syms += getrandom +endif + # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifdef CONFIG_VDSO_GETRANDOM +obj-vdso += vgetrandom-chacha.o +endif + ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -fno-builtin ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) +endif + CFLAGS_hwprobe.o += -fPIC # Build rules @@ -37,21 +50,17 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) - -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n +CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Force dependency $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) -LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \ - --build-id=sha1 --hash-style=both --eh-frame-hdr + $(call if_changed,vdsold_and_check) +LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \ + --build-id=sha1 --eh-frame-hdr # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -59,7 +68,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +gen-vdsosym := $(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ @@ -69,7 +78,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. -quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ +quiet_cmd_vdsold_and_check = VDSOLD $@ + cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ - rm $@.tmp + rm $@.tmp && \ + $(cmd_vdso_check) diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c new file mode 100644 index 000000000000..f21922e8cebd --- /dev/null +++ b/arch/riscv/kernel/vdso/getrandom.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. + */ +#include <linux/types.h> + +ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 1e926e4b5881..2ddeba6c68dd 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -16,8 +16,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -51,8 +50,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; unsigned char *c = (unsigned char *)cpus; diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index cbe2a179331d..7c15b0f4ee3b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -4,15 +4,14 @@ */ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -81,6 +80,9 @@ VERSION #ifndef COMPAT_VDSO __vdso_riscv_hwprobe; #endif +#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO) + __vdso_getrandom; +#endif local: *; }; } diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..5f0dad8f2373 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. + * + * Based on arch/loongarch/vdso/vgetrandom-chacha.S. + */ + +#include <asm/asm.h> +#include <linux/linkage.h> + +.text + +.macro ROTRI rd rs imm + slliw t0, \rs, 32 - \imm + srliw \rd, \rs, \imm + or \rd, \rd, t0 +.endm + +.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 + \op \d0, \d0, \s0 + \op \d1, \d1, \s1 + \op \d2, \d2, \s2 + \op \d3, \d3, \s3 +.endm + +/* + * a0: output bytes + * a1: 32-byte key input + * a2: 8-byte counter input/output + * a3: number of 64-byte blocks to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + +#define output a0 +#define key a1 +#define counter a2 +#define nblocks a3 +#define i a4 +#define state0 s0 +#define state1 s1 +#define state2 s2 +#define state3 s3 +#define state4 s4 +#define state5 s5 +#define state6 s6 +#define state7 s7 +#define state8 s8 +#define state9 s9 +#define state10 s10 +#define state11 s11 +#define state12 a5 +#define state13 a6 +#define state14 a7 +#define state15 t1 +#define cnt t2 +#define copy0 t3 +#define copy1 t4 +#define copy2 t5 +#define copy3 t6 + +/* Packs to be used with OP_4REG */ +#define line0 state0, state1, state2, state3 +#define line1 state4, state5, state6, state7 +#define line2 state8, state9, state10, state11 +#define line3 state12, state13, state14, state15 + +#define line1_perm state5, state6, state7, state4 +#define line2_perm state10, state11, state8, state9 +#define line3_perm state15, state12, state13, state14 + +#define copy copy0, copy1, copy2, copy3 + +#define _16 16, 16, 16, 16 +#define _20 20, 20, 20, 20 +#define _24 24, 24, 24, 24 +#define _25 25, 25, 25, 25 + + /* + * The ABI requires s0-s9 saved. + * This does not violate the stack-less requirement: no sensitive data + * is spilled onto the stack. + */ + addi sp, sp, -12*SZREG + REG_S s0, (sp) + REG_S s1, SZREG(sp) + REG_S s2, 2*SZREG(sp) + REG_S s3, 3*SZREG(sp) + REG_S s4, 4*SZREG(sp) + REG_S s5, 5*SZREG(sp) + REG_S s6, 6*SZREG(sp) + REG_S s7, 7*SZREG(sp) + REG_S s8, 8*SZREG(sp) + REG_S s9, 9*SZREG(sp) + REG_S s10, 10*SZREG(sp) + REG_S s11, 11*SZREG(sp) + + ld cnt, (counter) + + li copy0, 0x61707865 + li copy1, 0x3320646e + li copy2, 0x79622d32 + li copy3, 0x6b206574 + +.Lblock: + /* state[0,1,2,3] = "expand 32-byte k" */ + mv state0, copy0 + mv state1, copy1 + mv state2, copy2 + mv state3, copy3 + + /* state[4,5,..,11] = key */ + lw state4, (key) + lw state5, 4(key) + lw state6, 8(key) + lw state7, 12(key) + lw state8, 16(key) + lw state9, 20(key) + lw state10, 24(key) + lw state11, 28(key) + + /* state[12,13] = counter */ + mv state12, cnt + srli state13, cnt, 32 + + /* state[14,15] = 0 */ + mv state14, zero + mv state15, zero + + li i, 10 +.Lpermute: + /* odd round */ + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _16 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _20 + + OP_4REG addw line0, line1 + OP_4REG xor line3, line0 + OP_4REG ROTRI line3, _24 + + OP_4REG addw line2, line3 + OP_4REG xor line1, line2 + OP_4REG ROTRI line1, _25 + + /* even round */ + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _16 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _20 + + OP_4REG addw line0, line1_perm + OP_4REG xor line3_perm, line0 + OP_4REG ROTRI line3_perm, _24 + + OP_4REG addw line2_perm, line3_perm + OP_4REG xor line1_perm, line2_perm + OP_4REG ROTRI line1_perm, _25 + + addi i, i, -1 + bnez i, .Lpermute + + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ + OP_4REG addw line0, copy + sw state0, (output) + sw state1, 4(output) + sw state2, 8(output) + sw state3, 12(output) + + /* from now on state[0,1,2,3] are scratch registers */ + + /* state[0,1,2,3] = lo(key) */ + lw state0, (key) + lw state1, 4(key) + lw state2, 8(key) + lw state3, 12(key) + + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ + OP_4REG addw line1, line0 + sw state4, 16(output) + sw state5, 20(output) + sw state6, 24(output) + sw state7, 28(output) + + /* state[0,1,2,3] = hi(key) */ + lw state0, 16(key) + lw state1, 20(key) + lw state2, 24(key) + lw state3, 28(key) + + /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ + OP_4REG addw line2, line0 + sw state8, 32(output) + sw state9, 36(output) + sw state10, 40(output) + sw state11, 44(output) + + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ + addw state12, state12, cnt + srli state0, cnt, 32 + addw state13, state13, state0 + sw state12, 48(output) + sw state13, 52(output) + sw state14, 56(output) + sw state15, 60(output) + + /* ++counter */ + addi cnt, cnt, 1 + + /* output += 64 */ + addi output, output, 64 + /* --nblocks */ + addi nblocks, nblocks, -1 + bnez nblocks, .Lblock + + /* counter = [cnt_lo, cnt_hi] */ + sd cnt, (counter) + + /* Zero out the potentially sensitive regs, in case nothing uses these + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and + * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we + * only need to zero state[12,...,15]. + */ + mv state12, zero + mv state13, zero + mv state14, zero + mv state15, zero + + REG_L s0, (sp) + REG_L s1, SZREG(sp) + REG_L s2, 2*SZREG(sp) + REG_L s3, 3*SZREG(sp) + REG_L s4, 4*SZREG(sp) + REG_L s5, 5*SZREG(sp) + REG_L s6, 6*SZREG(sp) + REG_L s7, 7*SZREG(sp) + REG_L s8, 8*SZREG(sp) + REG_L s9, 9*SZREG(sp) + REG_L s10, 10*SZREG(sp) + REG_L s11, 11*SZREG(sp) + addi sp, sp, 12*SZREG + + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S new file mode 100644 index 000000000000..7ce4de6f6e69 --- /dev/null +++ b/arch/riscv/kernel/vec-copy-unaligned.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024 Rivos Inc. */ + +#include <linux/args.h> +#include <linux/linkage.h> +#include <asm/asm.h> + + .text + +#define WORD_EEW 32 + +#define WORD_SEW CONCATENATE(e, WORD_EEW) +#define VEC_L CONCATENATE(vle, WORD_EEW).v +#define VEC_S CONCATENATE(vse, WORD_EEW).v + +/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using word loads and stores. */ +/* Note: The size is truncated to a multiple of WORD_EEW */ +SYM_FUNC_START(__riscv_copy_vec_words_unaligned) + andi a4, a2, ~(WORD_EEW-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, WORD_SEW, m8, ta, ma + VEC_L v0, (a1) + VEC_S v0, (a0) + addi a0, a0, WORD_EEW + addi a1, a1, WORD_EEW + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_words_unaligned) + +/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ +/* Performs a memcpy without aligning buffers, using only byte accesses. */ +/* Note: The size is truncated to a multiple of 8 */ +SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) + andi a4, a2, ~(8-1) + beqz a4, 2f + add a3, a1, a4 + .option push + .option arch, +zve32x +1: + vsetivli t0, 8, e8, m8, ta, ma + vle8.v v0, (a1) + vse8.v v0, (a0) + addi a0, a0, 8 + addi a1, a1, 8 + bltu a1, a3, 1b + +2: + .option pop + ret +SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 6727d1d3b8f2..184f780c932d 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void) { unsigned long this_vsize; - /* There are 32 vector registers with vlenb length. */ + /* + * There are 32 vector registers with vlenb length. + * + * If the thead,vlenb property was provided by the firmware, use that + * instead of probing the CSRs. + */ + if (thead_vlenb_of) { + riscv_v_vsize = thead_vlenb_of * 32; + return 0; + } + riscv_v_enable(); this_vsize = csr_read(CSR_VLENB) * 32; riscv_v_disable(); @@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void) void __init riscv_v_setup_ctx_cache(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx", @@ -66,7 +76,7 @@ void __init riscv_v_setup_ctx_cache(void) #endif } -static bool insn_is_vector(u32 insn_buf) +bool insn_is_vector(u32 insn_buf) { u32 opcode = insn_buf & __INSN_OPCODE_MASK; u32 width, csr; @@ -173,8 +183,11 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 __user *epc = (u32 __user *)regs->epc; u32 insn = (u32)regs->badaddr; + if (!(has_vector() || has_xtheadvector())) + return false; + /* Do not handle if V is not supported, or disabled */ - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + if (!riscv_v_vstate_ctrl_user_allowed()) return false; /* If V has been enabled then it is not the first-use trap */ @@ -213,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return; next = riscv_v_ctrl_get_next(tsk); @@ -235,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk) long riscv_v_vstate_ctrl_get_current(void) { - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK; @@ -246,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) bool inherit; int cur, next; - if (!has_vector()) + if (!(has_vector() || has_xtheadvector())) return -EINVAL; if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK) @@ -284,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg) #ifdef CONFIG_SYSCTL -static struct ctl_table riscv_v_default_vstate_table[] = { +static const struct ctl_table riscv_v_default_vstate_table[] = { { .procname = "riscv_v_default_allow", .data = &riscv_v_implicit_uacc, @@ -296,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = { static int __init riscv_v_sysctl_init(void) { - if (has_vector()) + if (has_vector() || has_xtheadvector()) if (!register_sysctl("abi", riscv_v_default_vstate_table)) return -EINVAL; return 0; @@ -306,7 +319,7 @@ static int __init riscv_v_sysctl_init(void) static int __init riscv_v_sysctl_init(void) { return 0; } #endif /* ! CONFIG_SYSCTL */ -static int riscv_v_init(void) +static int __init riscv_v_init(void) { return riscv_v_sysctl_init(); } diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c new file mode 100644 index 000000000000..92d8ff81f42c --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos, Inc + */ + +#include <asm/vendorid_list.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> +#include <asm/vendor_extensions/sifive.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = { +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + &riscv_isa_vendor_ext_list_andes, +#endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + &riscv_isa_vendor_ext_list_sifive, +#endif +#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + &riscv_isa_vendor_ext_list_thead, +#endif +}; + +const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list); + +/** + * __riscv_isa_vendor_extension_available() - Check whether given vendor + * extension is available or not. + * + * @cpu: check if extension is available on this cpu + * @vendor: vendor that the extension is a member of + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: When cpu is -1, will check if extension is available on all cpus + */ +bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit) +{ + struct riscv_isavendorinfo *bmap; + struct riscv_isavendorinfo *cpu_bmap; + + switch (vendor) { + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES + case ANDES_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; + break; + #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE + case SIFIVE_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap; + break; + #endif + #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD + case THEAD_VENDOR_ID: + bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap; + cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap; + break; + #endif + default: + return false; + } + + if (cpu != -1) + bmap = &cpu_bmap[cpu]; + + if (bit >= RISCV_ISA_VENDOR_EXT_MAX) + return false; + + return test_bit(bit, bmap->isa); +} +EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available); diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile new file mode 100644 index 000000000000..a4eca96d1c8a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o +obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c new file mode 100644 index 000000000000..51f302b6d503 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/andes.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/andes.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All Andes vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = { + __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes), + .ext_data = riscv_isa_vendor_ext_andes, +}; diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c new file mode 100644 index 000000000000..1411337dc1e6 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/sifive.h> + +#include <linux/array_size.h> +#include <linux/types.h> + +/* All SiFive vendor extensions supported in Linux */ +const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { + __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), + __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), + __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), + __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive), + .ext_data = riscv_isa_vendor_ext_sifive, +}; diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c new file mode 100644 index 000000000000..1f77f6309763 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/sifive.h> +#include <asm/vendor_extensions/sifive_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/sifive.h> + +void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XSFVQMACCDOD); + VENDOR_EXT_KEY(XSFVQMACCQOQ); + VENDOR_EXT_KEY(XSFVFNRCLIPXFQF); + VENDOR_EXT_KEY(XSFVFWMACCQQQ); + }); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c new file mode 100644 index 000000000000..519dbf70710a --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/cpufeature.h> +#include <asm/vendor_extensions.h> +#include <asm/vendor_extensions/thead.h> + +#include <linux/array_size.h> +#include <linux/cpumask.h> +#include <linux/types.h> + +/* All T-Head vendor extensions supported in Linux */ +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = { + __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR), +}; + +struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = { + .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead), + .ext_data = riscv_isa_vendor_ext_thead, +}; + +void disable_xtheadvector(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa); + + clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa); +} diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c new file mode 100644 index 000000000000..2eba34011786 --- /dev/null +++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <asm/vendor_extensions/thead.h> +#include <asm/vendor_extensions/thead_hwprobe.h> +#include <asm/vendor_extensions/vendor_hwprobe.h> + +#include <linux/cpumask.h> +#include <linux/types.h> + +#include <uapi/asm/hwprobe.h> +#include <uapi/asm/vendor/thead.h> + +void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus) +{ + VENDOR_EXTENSION_SUPPORTED(pair, cpus, + riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, { + VENDOR_EXT_KEY(XTHEADVECTOR); + }); +} diff --git a/arch/riscv/kernel/vmcore_info.c b/arch/riscv/kernel/vmcore_info.c index 6d7a22522d63..d5e448aa90e7 100644 --- a/arch/riscv/kernel/vmcore_info.c +++ b/arch/riscv/kernel/vmcore_info.c @@ -19,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void) #endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +#ifdef CONFIG_XIP_KERNEL + /* TODO: Communicate with crash-utility developers on the information to + * export. The XIP case is more complicated, because the virtual-physical + * address offset depends on whether the address is in ROM or in RAM. + */ +#else vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); +#endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 8c3daa1b0531..a7611789bad5 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -65,10 +66,10 @@ SECTIONS * From this point, stuff is considered writable and will be copied to RAM */ __data_loc = ALIGN(PAGE_SIZE); /* location in file */ - . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + . = ALIGN(SECTION_ALIGN); /* location in memory */ #undef LOAD_OFFSET -#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) +#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc) _sdata = .; /* Start of data section */ _data = .; diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 002ca58dd998..61bd5ba6680a 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -97,6 +97,9 @@ SECTIONS { EXIT_DATA } + + RUNTIME_CONST_VARIABLES + PERCPU_SECTION(L1_CACHE_BYTES) .rel.dyn : { |