aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/riscv/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kernel')
-rw-r--r--arch/riscv/kernel/Makefile30
-rw-r--r--arch/riscv/kernel/Makefile.syscalls4
-rw-r--r--arch/riscv/kernel/acpi.c54
-rw-r--r--arch/riscv/kernel/acpi_numa.c131
-rw-r--r--arch/riscv/kernel/alternative.c4
-rw-r--r--arch/riscv/kernel/asm-offsets.c46
-rw-r--r--arch/riscv/kernel/bugs.c60
-rw-r--r--arch/riscv/kernel/cacheinfo.c59
-rw-r--r--arch/riscv/kernel/compat_syscall_table.c6
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile10
-rw-r--r--arch/riscv/kernel/copy-unaligned.h5
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c2
-rw-r--r--arch/riscv/kernel/cpu.c75
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c2
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c3
-rw-r--r--arch/riscv/kernel/cpufeature.c962
-rw-r--r--arch/riscv/kernel/efi-header.S8
-rw-r--r--arch/riscv/kernel/elf_kexec.c475
-rw-r--r--arch/riscv/kernel/entry.S143
-rw-r--r--arch/riscv/kernel/fpu.S6
-rw-r--r--arch/riscv/kernel/ftrace.c240
-rw-r--r--arch/riscv/kernel/head.S32
-rw-r--r--arch/riscv/kernel/jump_label.c32
-rw-r--r--arch/riscv/kernel/kernel_mode_fpu.c28
-rw-r--r--arch/riscv/kernel/kernel_mode_vector.c8
-rw-r--r--arch/riscv/kernel/kexec_elf.c144
-rw-r--r--arch/riscv/kernel/kexec_image.c96
-rw-r--r--arch/riscv/kernel/kgdb.c6
-rw-r--r--arch/riscv/kernel/machine_kexec.c31
-rw-r--r--arch/riscv/kernel/machine_kexec_file.c361
-rw-r--r--arch/riscv/kernel/mcount-dyn.S264
-rw-r--r--arch/riscv/kernel/mcount.S22
-rw-r--r--arch/riscv/kernel/module-sections.c92
-rw-r--r--arch/riscv/kernel/module.c45
-rw-r--r--arch/riscv/kernel/paravirt.c6
-rw-r--r--arch/riscv/kernel/patch.c110
-rw-r--r--arch/riscv/kernel/perf_callchain.c54
-rw-r--r--arch/riscv/kernel/pi/Makefile17
-rw-r--r--arch/riscv/kernel/pi/archrandom_early.c30
-rw-r--r--arch/riscv/kernel/pi/cmdline_early.c10
-rw-r--r--arch/riscv/kernel/pi/fdt_early.c167
-rw-r--r--arch/riscv/kernel/pi/pi.h20
-rw-r--r--arch/riscv/kernel/probes/Makefile1
-rw-r--r--arch/riscv/kernel/probes/ftrace.c62
-rw-r--r--arch/riscv/kernel/probes/kprobes.c32
-rw-r--r--arch/riscv/kernel/probes/uprobes.c10
-rw-r--r--arch/riscv/kernel/process.c201
-rw-r--r--arch/riscv/kernel/ptrace.c48
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c3
-rw-r--r--arch/riscv/kernel/sbi-ipi.c13
-rw-r--r--arch/riscv/kernel/sbi.c137
-rw-r--r--arch/riscv/kernel/sbi_ecall.c48
-rw-r--r--arch/riscv/kernel/setup.c86
-rw-r--r--arch/riscv/kernel/signal.c29
-rw-r--r--arch/riscv/kernel/smp.c57
-rw-r--r--arch/riscv/kernel/smpboot.c28
-rw-r--r--arch/riscv/kernel/stacktrace.c76
-rw-r--r--arch/riscv/kernel/suspend.c70
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c136
-rw-r--r--arch/riscv/kernel/sys_riscv.c5
-rw-r--r--arch/riscv/kernel/syscall_table.c6
-rw-r--r--arch/riscv/kernel/traps.c93
-rw-r--r--arch/riscv/kernel/traps_misaligned.c369
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c492
-rw-r--r--arch/riscv/kernel/vdso.c126
-rw-r--r--arch/riscv/kernel/vdso/Makefile38
-rw-r--r--arch/riscv/kernel/vdso/getrandom.c10
-rw-r--r--arch/riscv/kernel/vdso/hwprobe.c6
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S10
-rw-r--r--arch/riscv/kernel/vdso/vgetrandom-chacha.S249
-rw-r--r--arch/riscv/kernel/vec-copy-unaligned.S58
-rw-r--r--arch/riscv/kernel/vector.c33
-rw-r--r--arch/riscv/kernel/vendor_extensions.c76
-rw-r--r--arch/riscv/kernel/vendor_extensions/Makefile7
-rw-r--r--arch/riscv/kernel/vendor_extensions/andes.c18
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive.c21
-rw-r--r--arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c22
-rw-r--r--arch/riscv/kernel/vendor_extensions/thead.c29
-rw-r--r--arch/riscv/kernel/vendor_extensions/thead_hwprobe.c19
-rw-r--r--arch/riscv/kernel/vmcore_info.c7
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S5
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S3
82 files changed, 4652 insertions, 1987 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 5e591f831638..c7b542573407 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -9,8 +9,8 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
endif
-CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
-CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_table.o += $(call cc-disable-warning, override-init)
+CFLAGS_compat_syscall_table.o += $(call cc-disable-warning, override-init)
ifdef CONFIG_KEXEC_CORE
AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax)
@@ -20,26 +20,34 @@ endif
ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
CFLAGS_alternative.o := -mcmodel=medany
CFLAGS_cpufeature.o := -mcmodel=medany
+CFLAGS_sbi_ecall.o := -mcmodel=medany
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sbi_ecall.o = $(CC_FLAGS_FTRACE)
endif
ifdef CONFIG_RELOCATABLE
CFLAGS_alternative.o += -fno-pie
CFLAGS_cpufeature.o += -fno-pie
+CFLAGS_sbi_ecall.o += -fno-pie
endif
ifdef CONFIG_KASAN
KASAN_SANITIZE_alternative.o := n
KASAN_SANITIZE_cpufeature.o := n
+KASAN_SANITIZE_sbi_ecall.o := n
+endif
+ifdef CONFIG_FORTIFY_SOURCE
+CFLAGS_alternative.o += -D__NO_FORTIFY
+CFLAGS_cpufeature.o += -D__NO_FORTIFY
+CFLAGS_sbi_ecall.o += -D__NO_FORTIFY
endif
endif
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-y += head.o
obj-y += soc.o
obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
-obj-y += copy-unaligned.o
obj-y += cpu.o
obj-y += cpufeature.o
obj-y += entry.o
@@ -59,12 +67,19 @@ obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o
+obj-y += vendor_extensions.o
+obj-y += vendor_extensions/
obj-y += probes/
obj-y += tests/
obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
+obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
+obj-$(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS) += vec-copy-unaligned.o
+
obj-$(CONFIG_FPU) += fpu.o
+obj-$(CONFIG_FPU) += kernel_mode_fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o
@@ -83,7 +98,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
-obj-$(CONFIG_RISCV_SBI) += sbi.o
+obj-$(CONFIG_RISCV_SBI) += sbi.o sbi_ecall.o
ifeq ($(CONFIG_RISCV_SBI), y)
obj-$(CONFIG_SMP) += sbi-ipi.o
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
@@ -92,7 +107,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
-obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o kexec_image.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
@@ -107,3 +122,6 @@ obj-$(CONFIG_COMPAT) += compat_vdso/
obj-$(CONFIG_64BIT) += pi/
obj-$(CONFIG_ACPI) += acpi.o
+obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
+
+obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o
diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls
new file mode 100644
index 000000000000..9668fd1faf60
--- /dev/null
+++ b/arch/riscv/kernel/Makefile.syscalls
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += riscv memfd_secret
+syscall_abis_64 += riscv rlimit memfd_secret
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index e619edc8b0cc..3f6d5a6789e8 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -17,7 +17,9 @@
#include <linux/efi.h>
#include <linux/io.h>
#include <linux/memblock.h>
+#include <linux/of_fdt.h>
#include <linux/pci.h>
+#include <linux/serial_core.h>
int acpi_noirq = 1; /* skip ACPI IRQ initialization */
int acpi_disabled = 1;
@@ -131,7 +133,7 @@ void __init acpi_boot_table_init(void)
if (param_acpi_off ||
(!param_acpi_on && !param_acpi_force &&
efi.acpi20 == EFI_INVALID_TABLE_ADDR))
- return;
+ goto done;
/*
* ACPI is disabled at this point. Enable it in order to parse
@@ -151,6 +153,14 @@ void __init acpi_boot_table_init(void)
if (!param_acpi_force)
disable_acpi();
}
+
+done:
+ if (acpi_disabled) {
+ if (earlycon_acpi_spcr_enable)
+ early_init_dt_scan_chosen_stdout();
+ } else {
+ acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+ }
}
static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end)
@@ -191,11 +201,6 @@ struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
return &cpu_madt_rintc[cpu];
}
-u32 get_acpi_id_for_cpu(int cpu)
-{
- return acpi_cpu_get_madt_rintc(cpu)->uid;
-}
-
/*
* __acpi_map_table() will be called before paging_init(), so early_ioremap()
* or early_memremap() should be called here to for ACPI table mapping.
@@ -205,7 +210,7 @@ void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
if (!size)
return NULL;
- return early_ioremap(phys, size);
+ return early_memremap(phys, size);
}
void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
@@ -213,7 +218,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
if (!map || !size)
return;
- early_iounmap(map, size);
+ early_memunmap(map, size);
}
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
@@ -300,35 +305,32 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
}
}
- return ioremap_prot(phys, size, pgprot_val(prot));
+ return ioremap_prot(phys, size, prot);
}
#ifdef CONFIG_PCI
/*
- * These interfaces are defined just to enable building ACPI core.
- * TODO: Update it with actual implementation when external interrupt
- * controller support is added in RISC-V ACPI.
+ * raw_pci_read/write - Platform-specific PCI config space access.
*/
-int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 *val)
+int raw_pci_read(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *val)
{
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
+ struct pci_bus *b = pci_find_bus(domain, bus);
-int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
- int reg, int len, u32 val)
-{
- return PCIBIOS_DEVICE_NOT_FOUND;
+ if (!b)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ return b->ops->read(b, devfn, reg, len, val);
}
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
+int raw_pci_write(unsigned int domain, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 val)
{
- return -1;
-}
+ struct pci_bus *b = pci_find_bus(domain, bus);
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- return NULL;
+ if (!b)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ return b->ops->write(b, devfn, reg, len, val);
}
+
#endif /* CONFIG_PCI */
diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c
new file mode 100644
index 000000000000..130769e3a99c
--- /dev/null
+++ b/arch/riscv/kernel/acpi_numa.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACPI 6.6 based NUMA setup for RISCV
+ * Lots of code was borrowed from arch/arm64/kernel/acpi_numa.c
+ *
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2013-2016, Linaro Ltd.
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#define pr_fmt(fmt) "ACPI: NUMA: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include <asm/numa.h>
+
+static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
+
+static int __init acpi_numa_get_nid(unsigned int cpu)
+{
+ return acpi_early_node_map[cpu];
+}
+
+static inline int get_cpu_for_acpi_id(u32 uid)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+}
+
+static int __init acpi_parse_rintc_pxm(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_srat_rintc_affinity *pa;
+ int cpu, pxm, node;
+
+ if (srat_disabled())
+ return -EINVAL;
+
+ pa = (struct acpi_srat_rintc_affinity *)header;
+ if (!pa)
+ return -EINVAL;
+
+ if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
+ return 0;
+
+ pxm = pa->proximity_domain;
+ node = pxm_to_node(pxm);
+
+ /*
+ * If we can't map the UID to a logical cpu this
+ * means that the UID is not part of possible cpus
+ * so we do not need a NUMA mapping for it, skip
+ * the SRAT entry and keep parsing.
+ */
+ cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid);
+ if (cpu < 0)
+ return 0;
+
+ acpi_early_node_map[cpu] = node;
+ pr_info("SRAT: PXM %d -> HARTID 0x%lx -> Node %d\n", pxm,
+ cpuid_to_hartid_map(cpu), node);
+
+ return 0;
+}
+
+void __init acpi_map_cpus_to_nodes(void)
+{
+ int i;
+
+ /*
+ * In ACPI, SMP and CPU NUMA information is provided in separate
+ * static tables, namely the MADT and the SRAT.
+ *
+ * Thus, it is simpler to first create the cpu logical map through
+ * an MADT walk and then map the logical cpus to their node ids
+ * as separate steps.
+ */
+ acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat),
+ ACPI_SRAT_TYPE_RINTC_AFFINITY, acpi_parse_rintc_pxm, 0);
+
+ for (i = 0; i < nr_cpu_ids; i++)
+ early_map_cpu_to_node(i, acpi_numa_get_nid(i));
+}
+
+/* Callback for Proximity Domain -> logical node ID mapping */
+void __init acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa)
+{
+ int pxm, node;
+
+ if (srat_disabled())
+ return;
+
+ if (pa->header.length < sizeof(struct acpi_srat_rintc_affinity)) {
+ pr_err("SRAT: Invalid SRAT header length: %d\n", pa->header.length);
+ bad_srat();
+ return;
+ }
+
+ if (!(pa->flags & ACPI_SRAT_RINTC_ENABLED))
+ return;
+
+ pxm = pa->proximity_domain;
+ node = acpi_map_pxm_to_node(pxm);
+
+ if (node == NUMA_NO_NODE) {
+ pr_err("SRAT: Too many proximity domains %d\n", pxm);
+ bad_srat();
+ return;
+ }
+
+ node_set(node, numa_nodes_parsed);
+}
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 319a1da0358b..7eb3cb1215c6 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -18,7 +18,7 @@
#include <asm/sbi.h>
#include <asm/csr.h>
#include <asm/insn.h>
-#include <asm/patch.h>
+#include <asm/text-patching.h>
struct cpu_manufacturer_info_t {
unsigned long vendor_id;
@@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_ANDES
- case ANDESTECH_VENDOR_ID:
+ case ANDES_VENDOR_ID:
cpu_mfr_info->patch_func = andes_errata_patch_func;
break;
#endif
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index a03129f40c46..6e8c0d6feae9 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -4,11 +4,10 @@
* Copyright (C) 2017 SiFive
*/
-#define GENERATING_ASM_OFFSETS
-
#include <linux/kbuild.h>
#include <linux/mm.h>
#include <linux/sched.h>
+#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <asm/kvm_host.h>
#include <asm/thread_info.h>
@@ -35,13 +34,20 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
- OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
+ OFFSET(TASK_THREAD_SUM, task_struct, thread.sum);
+
+ OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
#ifdef CONFIG_SHADOW_CALL_STACK
OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
#endif
+#ifdef CONFIG_64BIT
+ OFFSET(TASK_TI_A0, task_struct, thread_info.a0);
+ OFFSET(TASK_TI_A1, task_struct, thread_info.a1);
+ OFFSET(TASK_TI_A2, task_struct, thread_info.a2);
+#endif
OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
@@ -341,6 +347,10 @@ void asm_offsets(void)
offsetof(struct task_struct, thread.s[11])
- offsetof(struct task_struct, thread.ra)
);
+ DEFINE(TASK_THREAD_SUM_RA,
+ offsetof(struct task_struct, thread.sum)
+ - offsetof(struct task_struct, thread.ra)
+ );
DEFINE(TASK_THREAD_F0_F0,
offsetof(struct task_struct, thread.fstate.f[0])
@@ -488,4 +498,34 @@ void asm_offsets(void)
DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
OFFSET(STACKFRAME_FP, stackframe, fp);
OFFSET(STACKFRAME_RA, stackframe, ra);
+#ifdef CONFIG_FUNCTION_TRACER
+ DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+ DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN));
+ DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc));
+ DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra));
+ DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
+ DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0));
+ DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1));
+#ifdef CONFIG_CC_IS_CLANG
+ DEFINE(FREGS_T2, offsetof(struct __arch_ftrace_regs, t2));
+ DEFINE(FREGS_T3, offsetof(struct __arch_ftrace_regs, t3));
+ DEFINE(FREGS_T4, offsetof(struct __arch_ftrace_regs, t4));
+ DEFINE(FREGS_T5, offsetof(struct __arch_ftrace_regs, t5));
+ DEFINE(FREGS_T6, offsetof(struct __arch_ftrace_regs, t6));
+#endif
+ DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0));
+ DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1));
+ DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2));
+ DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3));
+ DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4));
+ DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5));
+ DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6));
+ DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7));
+#endif
}
diff --git a/arch/riscv/kernel/bugs.c b/arch/riscv/kernel/bugs.c
new file mode 100644
index 000000000000..3655fe7d678c
--- /dev/null
+++ b/arch/riscv/kernel/bugs.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sprintf.h>
+
+#include <asm/bugs.h>
+#include <asm/vendor_extensions/thead.h>
+
+static enum mitigation_state ghostwrite_state;
+
+void ghostwrite_set_vulnerable(void)
+{
+ ghostwrite_state = VULNERABLE;
+}
+
+/*
+ * Vendor extension alternatives will use the value set at the time of boot
+ * alternative patching, thus this must be called before boot alternatives are
+ * patched (and after extension probing) to be effective.
+ *
+ * Returns true if mitgated, false otherwise.
+ */
+bool ghostwrite_enable_mitigation(void)
+{
+ if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR) &&
+ ghostwrite_state == VULNERABLE && !cpu_mitigations_off()) {
+ disable_xtheadvector();
+ ghostwrite_state = MITIGATED;
+ return true;
+ }
+
+ return false;
+}
+
+enum mitigation_state ghostwrite_get_state(void)
+{
+ return ghostwrite_state;
+}
+
+ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) {
+ switch (ghostwrite_state) {
+ case UNAFFECTED:
+ return sprintf(buf, "Not affected\n");
+ case MITIGATED:
+ return sprintf(buf, "Mitigation: xtheadvector disabled\n");
+ case VULNERABLE:
+ fallthrough;
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
+ } else {
+ return sprintf(buf, "Not affected\n");
+ }
+}
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 09e9b88110d1..26b085dbdd07 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -3,6 +3,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <asm/cacheinfo.h>
@@ -64,27 +65,55 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type)
}
static void ci_leaf_init(struct cacheinfo *this_leaf,
- struct device_node *node,
enum cache_type type, unsigned int level)
{
this_leaf->level = level;
this_leaf->type = type;
}
+int init_cache_level(unsigned int cpu)
+{
+ return init_of_cache_level(cpu);
+}
+
int populate_cache_leaves(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
- struct device_node *np = of_cpu_device_node_get(cpu);
- struct device_node *prev = NULL;
+ struct device_node *np, *prev;
int levels = 1, level = 1;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ if (!acpi_disabled) {
+ int ret, fw_levels, split_levels;
+
+ ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels);
+ if (ret)
+ return ret;
+
+ BUG_ON((split_levels > fw_levels) ||
+ (split_levels + fw_levels > this_cpu_ci->num_leaves));
+
+ for (; level <= this_cpu_ci->num_levels; level++) {
+ if (level <= split_levels) {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ } else {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+ }
+ }
+ return 0;
+ }
+
+ np = of_cpu_device_node_get(cpu);
+ if (!np)
+ return -ENOENT;
+
+ if (of_property_present(np, "cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+ if (of_property_present(np, "i-cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ if (of_property_present(np, "d-cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
prev = np;
while ((np = of_find_next_cache_node(np))) {
@@ -96,12 +125,12 @@ int populate_cache_leaves(unsigned int cpu)
break;
if (level <= levels)
break;
- if (of_property_read_bool(np, "cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
- if (of_property_read_bool(np, "i-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
- if (of_property_read_bool(np, "d-cache-size"))
- ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
+ if (of_property_present(np, "cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+ if (of_property_present(np, "i-cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ if (of_property_present(np, "d-cache-size"))
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
levels = level;
}
of_node_put(np);
diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c
index ad7f2d712f5f..e884c069e88f 100644
--- a/arch/riscv/kernel/compat_syscall_table.c
+++ b/arch/riscv/kernel/compat_syscall_table.c
@@ -8,9 +8,11 @@
#include <asm-generic/syscalls.h>
#include <asm/syscall.h>
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+
#undef __SYSCALL
#define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *);
-#include <asm/unistd.h>
+#include <asm/syscall_table_32.h>
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = __riscv_##call,
@@ -19,5 +21,5 @@ asmlinkage long compat_sys_rt_sigreturn(void);
void * const compat_sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table_32.h>
};
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 62fa393b2eb2..24e37d1ef7ec 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -34,12 +34,6 @@ obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso))
obj-y += compat_vdso.o
CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH)
-# Disable profiling and instrumentation for VDSO code
-GCOV_PROFILE := n
-KCOV_INSTRUMENT := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
-
# Force dependency
$(obj)/compat_vdso.o: $(obj)/compat_vdso.so
@@ -58,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# Generate VDSO offsets using helper script
-gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh
+gen-compat_vdsosym := $(src)/gen_compat_vdso_offsets.sh
quiet_cmd_compat_vdsosym = VDSOSYM $@
cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@
@@ -74,5 +68,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@
rm $@.tmp
# actual build commands
-quiet_cmd_compat_vdsoas = VDSOAS $@
+quiet_cmd_compat_vdsoas = VDSOAS $@
cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h
index e3d70d35b708..85d4d11450cb 100644
--- a/arch/riscv/kernel/copy-unaligned.h
+++ b/arch/riscv/kernel/copy-unaligned.h
@@ -10,4 +10,9 @@
void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size);
void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size);
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+void __riscv_copy_vec_words_unaligned(void *dst, const void *src, size_t size);
+void __riscv_copy_vec_bytes_unaligned(void *dst, const void *src, size_t size);
+#endif
+
#endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index 28b58fc5ad19..a1e38ecfc8be 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
if (cpu_ops->cpu_is_stopped)
ret = cpu_ops->cpu_is_stopped(cpu);
if (ret)
- pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
+ pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
}
/*
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index d11d6320fb0d..f6b13e9f5e6c 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -16,6 +16,7 @@
#include <asm/sbi.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
+#include <asm/vendor_extensions.h>
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
@@ -139,6 +140,34 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
return -1;
}
+unsigned long __init riscv_get_marchid(void)
+{
+ struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
+
+#if IS_ENABLED(CONFIG_RISCV_SBI)
+ ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
+#elif IS_ENABLED(CONFIG_RISCV_M_MODE)
+ ci->marchid = csr_read(CSR_MARCHID);
+#else
+ ci->marchid = 0;
+#endif
+ return ci->marchid;
+}
+
+unsigned long __init riscv_get_mvendorid(void)
+{
+ struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
+
+#if IS_ENABLED(CONFIG_RISCV_SBI)
+ ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
+#elif IS_ENABLED(CONFIG_RISCV_M_MODE)
+ ci->mvendorid = csr_read(CSR_MVENDORID);
+#else
+ ci->mvendorid = 0;
+#endif
+ return ci->mvendorid;
+}
+
DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
unsigned long riscv_cached_mvendorid(unsigned int cpu_id)
@@ -170,12 +199,16 @@ static int riscv_cpuinfo_starting(unsigned int cpu)
struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
#if IS_ENABLED(CONFIG_RISCV_SBI)
- ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
- ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
+ if (!ci->mvendorid)
+ ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid();
+ if (!ci->marchid)
+ ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid();
ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid();
#elif IS_ENABLED(CONFIG_RISCV_M_MODE)
- ci->mvendorid = csr_read(CSR_MVENDORID);
- ci->marchid = csr_read(CSR_MARCHID);
+ if (!ci->mvendorid)
+ ci->mvendorid = csr_read(CSR_MVENDORID);
+ if (!ci->marchid)
+ ci->marchid = csr_read(CSR_MARCHID);
ci->mimpid = csr_read(CSR_MIMPID);
#else
ci->mvendorid = 0;
@@ -203,7 +236,33 @@ arch_initcall(riscv_cpuinfo_init);
#ifdef CONFIG_PROC_FS
-static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
+#define ALL_CPUS -1
+
+static void print_vendor_isa(struct seq_file *f, int cpu)
+{
+ struct riscv_isavendorinfo *vendor_bitmap;
+ struct riscv_isa_vendor_ext_data_list *ext_list;
+ const struct riscv_isa_ext_data *ext_data;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ ext_list = riscv_isa_vendor_ext_list[i];
+ ext_data = riscv_isa_vendor_ext_list[i]->ext_data;
+
+ if (cpu == ALL_CPUS)
+ vendor_bitmap = &ext_list->all_harts_isa_bitmap;
+ else
+ vendor_bitmap = &ext_list->per_hart_isa_bitmap[cpu];
+
+ for (int j = 0; j < ext_list->ext_data_count; j++) {
+ if (!__riscv_isa_extension_available(vendor_bitmap->isa, ext_data[j].id))
+ continue;
+
+ seq_printf(f, "_%s", ext_data[j].name);
+ }
+ }
+}
+
+static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap, int cpu)
{
if (IS_ENABLED(CONFIG_32BIT))
@@ -222,6 +281,8 @@ static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
seq_printf(f, "%s", riscv_isa_ext[i].name);
}
+ print_vendor_isa(f, cpu);
+
seq_puts(f, "\n");
}
@@ -284,7 +345,7 @@ static int c_show(struct seq_file *m, void *v)
* line.
*/
seq_puts(m, "isa\t\t: ");
- print_isa(m, NULL);
+ print_isa(m, NULL, ALL_CPUS);
print_mmu(m);
if (acpi_disabled) {
@@ -306,7 +367,7 @@ static int c_show(struct seq_file *m, void *v)
* additional extensions not present across all harts.
*/
seq_puts(m, "hart isa\t: ");
- print_isa(m, hart_isa[cpu_id].isa);
+ print_isa(m, hart_isa[cpu_id].isa, cpu_id);
seq_puts(m, "\n");
return 0;
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 1cc7df740edd..e6fbaaf54956 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
/* Make sure tidle is updated */
smp_mb();
bdata->task_ptr = tidle;
- bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ bdata->stack_ptr = task_pt_regs(tidle);
/* Make sure boot data is updated */
smp_mb();
hsm_data = __pa(bdata);
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 613872b0a21a..24869eb88908 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
/* Make sure tidle is updated */
smp_mb();
- WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
}
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 79a5a35fab96..743d53415572 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -11,30 +11,29 @@
#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
#include <linux/ctype.h>
-#include <linux/jump_label.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/of.h>
#include <asm/acpi.h>
#include <asm/alternative.h>
+#include <asm/bugs.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwcap.h>
+#include <asm/text-patching.h>
#include <asm/hwprobe.h>
-#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sbi.h>
#include <asm/vector.h>
-
-#include "copy-unaligned.h"
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/thead.h>
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
-#define MISALIGNED_ACCESS_JIFFIES_LG2 1
-#define MISALIGNED_BUFFER_SIZE 0x4000
-#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
-#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+static bool any_cpu_has_zicboz;
+static bool any_cpu_has_zicbop;
+static bool any_cpu_has_zicbom;
unsigned long elf_hwcap __read_mostly;
@@ -44,10 +43,7 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
/* Per-cpu ISA extensions. */
struct riscv_isainfo hart_isa[NR_CPUS];
-/* Performance information */
-DEFINE_PER_CPU(long, misaligned_access_speed);
-
-static cpumask_t fast_misaligned_access;
+u32 thead_vlenb_of;
/**
* riscv_isa_extension_base() - Get base extension word
@@ -59,9 +55,7 @@ static cpumask_t fast_misaligned_access;
*/
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
{
- if (!isa_bitmap)
- return riscv_isa[0];
- return isa_bitmap[0];
+ return !isa_bitmap ? riscv_isa[0] : isa_bitmap[0];
}
EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
@@ -82,55 +76,208 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned i
if (bit >= RISCV_ISA_EXT_MAX)
return false;
- return test_bit(bit, bmap) ? true : false;
+ return test_bit(bit, bmap);
}
EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
-static bool riscv_isa_extension_check(int id)
+static int riscv_ext_f_depends(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
{
- switch (id) {
- case RISCV_ISA_EXT_ZICBOM:
- if (!riscv_cbom_block_size) {
- pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n");
- return false;
- } else if (!is_power_of_2(riscv_cbom_block_size)) {
- pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n");
- return false;
- }
- return true;
- case RISCV_ISA_EXT_ZICBOZ:
- if (!riscv_cboz_block_size) {
- pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n");
- return false;
- } else if (!is_power_of_2(riscv_cboz_block_size)) {
- pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
- return false;
- }
- return true;
- case RISCV_ISA_EXT_INVALID:
- return false;
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+
+static int riscv_ext_zicbom_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cbom_block_size) {
+ pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(riscv_cbom_block_size)) {
+ pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+
+ any_cpu_has_zicbom = true;
+ return 0;
+}
+
+static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cboz_block_size) {
+ pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n");
+ return -EINVAL;
}
+ if (!is_power_of_2(riscv_cboz_block_size)) {
+ pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+ any_cpu_has_zicboz = true;
+ return 0;
+}
- return true;
+static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!riscv_cbop_block_size) {
+ pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
+ return -EINVAL;
+ }
+ if (!is_power_of_2(riscv_cbop_block_size)) {
+ pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
+ return -EINVAL;
+ }
+ any_cpu_has_zicbop = true;
+ return 0;
}
-#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size) { \
- .name = #_name, \
- .property = #_name, \
- .id = _id, \
- .subset_ext_ids = _subset_exts, \
- .subset_ext_size = _subset_exts_size \
+static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_FPU))
+ return -EINVAL;
+
+ /*
+ * Due to extension ordering, d is checked before f, so no deferral
+ * is required.
+ */
+ if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d)) {
+ pr_warn_once("This kernel does not support systems with F but not D\n");
+ return -EINVAL;
+ }
+
+ return 0;
}
-#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0)
+static int riscv_ext_d_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_FPU))
+ return -EINVAL;
+
+ return 0;
+}
-/* Used to declare pure "lasso" extension (Zk for instance) */
-#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \
- _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, ARRAY_SIZE(_bundled_exts))
+static int riscv_ext_vector_x_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
+ return -EINVAL;
-/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */
-#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \
- _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts))
+ return 0;
+}
+
+static int riscv_ext_vector_float_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
+ return -EINVAL;
+
+ if (!IS_ENABLED(CONFIG_FPU))
+ return -EINVAL;
+
+ /*
+ * The kernel doesn't support systems that don't implement both of
+ * F and D, so if any of the vector extensions that do floating point
+ * are to be usable, both floating point extensions need to be usable.
+ *
+ * Since this function validates vector only, and v/Zve* are probed
+ * after f/d, there's no need for a deferral here.
+ */
+ if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int riscv_ext_vector_crypto_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
+ return -EINVAL;
+
+ /*
+ * It isn't the kernel's job to check that the binding is correct, so
+ * it should be enough to check that any of the vector extensions are
+ * enabled, which in-turn means that vector is usable in this kernel
+ */
+ if (!__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32X))
+ return -EPROBE_DEFER;
+
+ return 0;
+}
+
+static int riscv_ext_zca_depends(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+static int riscv_ext_zcd_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) &&
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_d))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+
+static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (IS_ENABLED(CONFIG_64BIT))
+ return -EINVAL;
+
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZCA) &&
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_f))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+
+static int riscv_vector_f_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
+ return -EINVAL;
+
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVE32F))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+
+static int riscv_ext_zvfbfwma_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZFBFMIN) &&
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_ZVFBFMIN))
+ return 0;
+
+ return -EPROBE_DEFER;
+}
+
+static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data,
+ const unsigned long *isa_bitmap)
+{
+ /* SVADE has already been detected, use SVADE only */
+ if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static const unsigned int riscv_a_exts[] = {
+ RISCV_ISA_EXT_ZAAMO,
+ RISCV_ISA_EXT_ZALRSC,
+};
static const unsigned int riscv_zk_bundled_exts[] = {
RISCV_ISA_EXT_ZBKB,
@@ -202,6 +349,40 @@ static const unsigned int riscv_zvbb_exts[] = {
RISCV_ISA_EXT_ZVKB
};
+#define RISCV_ISA_EXT_ZVE64F_IMPLY_LIST \
+ RISCV_ISA_EXT_ZVE64X, \
+ RISCV_ISA_EXT_ZVE32F, \
+ RISCV_ISA_EXT_ZVE32X
+
+#define RISCV_ISA_EXT_ZVE64D_IMPLY_LIST \
+ RISCV_ISA_EXT_ZVE64F, \
+ RISCV_ISA_EXT_ZVE64F_IMPLY_LIST
+
+#define RISCV_ISA_EXT_V_IMPLY_LIST \
+ RISCV_ISA_EXT_ZVE64D, \
+ RISCV_ISA_EXT_ZVE64D_IMPLY_LIST
+
+static const unsigned int riscv_zve32f_exts[] = {
+ RISCV_ISA_EXT_ZVE32X
+};
+
+static const unsigned int riscv_zve64f_exts[] = {
+ RISCV_ISA_EXT_ZVE64F_IMPLY_LIST
+};
+
+static const unsigned int riscv_zve64d_exts[] = {
+ RISCV_ISA_EXT_ZVE64D_IMPLY_LIST
+};
+
+static const unsigned int riscv_v_exts[] = {
+ RISCV_ISA_EXT_V_IMPLY_LIST
+};
+
+static const unsigned int riscv_zve64x_exts[] = {
+ RISCV_ISA_EXT_ZVE32X,
+ RISCV_ISA_EXT_ZVE64X
+};
+
/*
* While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V
* privileged ISA, the existence of the CSRs is implied by any extension which
@@ -213,6 +394,21 @@ static const unsigned int riscv_xlinuxenvcfg_exts[] = {
};
/*
+ * Zc* spec states that:
+ * - C always implies Zca
+ * - C+F implies Zcf (RV32 only)
+ * - C+D implies Zcd
+ *
+ * These extensions will be enabled and then validated depending on the
+ * availability of F/D RV32.
+ */
+static const unsigned int riscv_c_exts[] = {
+ RISCV_ISA_EXT_ZCA,
+ RISCV_ISA_EXT_ZCF,
+ RISCV_ISA_EXT_ZCD,
+};
+
+/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
*
@@ -254,15 +450,17 @@ static const unsigned int riscv_xlinuxenvcfg_exts[] = {
const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(i, RISCV_ISA_EXT_i),
__RISCV_ISA_EXT_DATA(m, RISCV_ISA_EXT_m),
- __RISCV_ISA_EXT_DATA(a, RISCV_ISA_EXT_a),
- __RISCV_ISA_EXT_DATA(f, RISCV_ISA_EXT_f),
- __RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d),
+ __RISCV_ISA_EXT_SUPERSET(a, RISCV_ISA_EXT_a, riscv_a_exts),
+ __RISCV_ISA_EXT_DATA_VALIDATE(f, RISCV_ISA_EXT_f, riscv_ext_f_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(d, RISCV_ISA_EXT_d, riscv_ext_d_validate),
__RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q),
- __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
- __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
+ __RISCV_ISA_EXT_SUPERSET(c, RISCV_ISA_EXT_c, riscv_c_exts),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
- __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts),
- __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
+ __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
@@ -270,10 +468,21 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
+ __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP),
+ __RISCV_ISA_EXT_DATA(zaamo, RISCV_ISA_EXT_ZAAMO),
+ __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
__RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+ __RISCV_ISA_EXT_DATA(zalrsc, RISCV_ISA_EXT_ZALRSC),
+ __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
__RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zfbfmin, RISCV_ISA_EXT_ZFBFMIN, riscv_ext_f_depends),
__RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
__RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),
+ __RISCV_ISA_EXT_DATA(zca, RISCV_ISA_EXT_ZCA),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zcb, RISCV_ISA_EXT_ZCB, riscv_ext_zca_depends),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zcd, RISCV_ISA_EXT_ZCD, riscv_ext_zcd_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zcf, RISCV_ISA_EXT_ZCF, riscv_ext_zcf_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zcmop, RISCV_ISA_EXT_ZCMOP, riscv_ext_zca_depends),
__RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC),
@@ -292,63 +501,134 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED),
__RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH),
__RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO),
- __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts),
- __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvbc, RISCV_ISA_EXT_ZVBC, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve32f, RISCV_ISA_EXT_ZVE32F, riscv_zve32f_exts, riscv_ext_vector_float_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zve32x, RISCV_ISA_EXT_ZVE32X, riscv_ext_vector_x_validate),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64d, RISCV_ISA_EXT_ZVE64D, riscv_zve64d_exts, riscv_ext_vector_float_validate),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64f, RISCV_ISA_EXT_ZVE64F, riscv_zve64f_exts, riscv_ext_vector_float_validate),
+ __RISCV_ISA_EXT_SUPERSET_VALIDATE(zve64x, RISCV_ISA_EXT_ZVE64X, riscv_zve64x_exts, riscv_ext_vector_x_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfmin, RISCV_ISA_EXT_ZVFBFMIN, riscv_vector_f_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvfbfwma, RISCV_ISA_EXT_ZVFBFWMA, riscv_ext_zvfbfwma_validate),
__RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH),
__RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN),
- __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB),
- __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG),
- __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts),
- __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts),
- __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED),
- __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts),
- __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA),
- __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB),
- __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts),
- __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts),
- __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED),
- __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH),
- __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts),
- __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvkb, RISCV_ISA_EXT_ZVKB, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvkg, RISCV_ISA_EXT_ZVKG, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkn, riscv_zvkn_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvknc, riscv_zvknc_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvkned, RISCV_ISA_EXT_ZVKNED, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvkng, riscv_zvkng_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvknha, RISCV_ISA_EXT_ZVKNHA, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvknhb, RISCV_ISA_EXT_ZVKNHB, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvks, riscv_zvks_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksc, riscv_zvksc_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvksed, RISCV_ISA_EXT_ZVKSED, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvksh, RISCV_ISA_EXT_ZVKSH, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_BUNDLE_VALIDATE(zvksg, riscv_zvksg_bundled_exts, riscv_ext_vector_crypto_validate),
+ __RISCV_ISA_EXT_DATA_VALIDATE(zvkt, RISCV_ISA_EXT_ZVKT, riscv_ext_vector_crypto_validate),
__RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
+ __RISCV_ISA_EXT_DATA(smmpm, RISCV_ISA_EXT_SMMPM),
+ __RISCV_ISA_EXT_SUPERSET(smnpm, RISCV_ISA_EXT_SMNPM, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN),
__RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_SUPERSET(ssnpm, RISCV_ISA_EXT_SSNPM, riscv_xlinuxenvcfg_exts),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
+ __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE),
+ __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
-static void __init match_isa_ext(const struct riscv_isa_ext_data *ext, const char *name,
- const char *name_end, struct riscv_isainfo *isainfo)
+static void riscv_isa_set_ext(const struct riscv_isa_ext_data *ext, unsigned long *bitmap)
{
- if ((name_end - name == strlen(ext->name)) &&
- !strncasecmp(name, ext->name, name_end - name)) {
- /*
- * If this is a bundle, enable all the ISA extensions that
- * comprise the bundle.
- */
- if (ext->subset_ext_size) {
- for (int i = 0; i < ext->subset_ext_size; i++) {
- if (riscv_isa_extension_check(ext->subset_ext_ids[i]))
- set_bit(ext->subset_ext_ids[i], isainfo->isa);
+ if (ext->id != RISCV_ISA_EXT_INVALID)
+ set_bit(ext->id, bitmap);
+
+ for (int i = 0; i < ext->subset_ext_size; i++) {
+ if (ext->subset_ext_ids[i] != RISCV_ISA_EXT_INVALID)
+ set_bit(ext->subset_ext_ids[i], bitmap);
+ }
+}
+
+static const struct riscv_isa_ext_data *riscv_get_isa_ext_data(unsigned int ext_id)
+{
+ for (int i = 0; i < riscv_isa_ext_count; i++) {
+ if (riscv_isa_ext[i].id == ext_id)
+ return &riscv_isa_ext[i];
+ }
+
+ return NULL;
+}
+
+/*
+ * "Resolve" a source ISA bitmap into one that matches kernel configuration as
+ * well as correct extension dependencies. Some extensions depends on specific
+ * kernel configuration to be usable (V needs CONFIG_RISCV_ISA_V for instance)
+ * and this function will actually validate all the extensions provided in
+ * source_isa into the resolved_isa based on extensions validate() callbacks.
+ */
+static void __init riscv_resolve_isa(unsigned long *source_isa,
+ unsigned long *resolved_isa, unsigned long *this_hwcap,
+ unsigned long *isa2hwcap)
+{
+ bool loop;
+ const struct riscv_isa_ext_data *ext;
+ DECLARE_BITMAP(prev_resolved_isa, RISCV_ISA_EXT_MAX);
+ int max_loop_count = riscv_isa_ext_count, ret;
+ unsigned int bit;
+
+ do {
+ loop = false;
+ if (max_loop_count-- < 0) {
+ pr_err("Failed to reach a stable ISA state\n");
+ return;
+ }
+ bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX);
+ for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) {
+ ext = riscv_get_isa_ext_data(bit);
+
+ if (ext && ext->validate) {
+ ret = ext->validate(ext, resolved_isa);
+ if (ret == -EPROBE_DEFER) {
+ loop = true;
+ continue;
+ } else if (ret) {
+ /* Disable the extension entirely */
+ clear_bit(bit, source_isa);
+ continue;
+ }
}
+
+ set_bit(bit, resolved_isa);
+ /* No need to keep it in source isa now that it is enabled */
+ clear_bit(bit, source_isa);
+
+ /* Single letter extensions get set in hwcap */
+ if (bit < RISCV_ISA_EXT_BASE)
+ *this_hwcap |= isa2hwcap[bit];
}
+ } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX));
+}
- /*
- * This is valid even for bundle extensions which uses the RISCV_ISA_EXT_INVALID id
- * (rejected by riscv_isa_extension_check()).
- */
- if (riscv_isa_extension_check(ext->id))
- set_bit(ext->id, isainfo->isa);
+static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap)
+{
+ for (int i = 0; i < riscv_isa_ext_count; i++) {
+ const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i];
+
+ if ((name_end - name == strlen(ext->name)) &&
+ !strncasecmp(name, ext->name, name_end - name)) {
+ riscv_isa_set_ext(ext, bitmap);
+ break;
+ }
}
}
-static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo,
- unsigned long *isa2hwcap, const char *isa)
+static void __init riscv_parse_isa_string(const char *isa, unsigned long *bitmap)
{
/*
* For all possible cpus, we have already validated in
@@ -361,9 +641,24 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
while (*isa) {
const char *ext = isa++;
const char *ext_end = isa;
- bool ext_long = false, ext_err = false;
+ bool ext_err = false;
switch (*ext) {
+ case 'x':
+ case 'X':
+ if (acpi_disabled)
+ pr_warn_once("Vendor extensions are ignored in riscv,isa. Use riscv,isa-extensions instead.");
+ /*
+ * To skip an extension, we find its end.
+ * As multi-letter extensions must be split from other multi-letter
+ * extensions with an "_", the end of a multi-letter extension will
+ * either be the null character or the "_" at the start of the next
+ * multi-letter extension.
+ */
+ for (; *isa && *isa != '_'; ++isa)
+ ;
+ ext_err = true;
+ break;
case 's':
/*
* Workaround for invalid single-letter 's' & 'u' (QEMU).
@@ -379,8 +674,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
}
fallthrough;
case 'S':
- case 'x':
- case 'X':
case 'z':
case 'Z':
/*
@@ -401,7 +694,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
* character itself while eliminating the extensions version number.
* A simple re-increment solves this problem.
*/
- ext_long = true;
for (; *isa && *isa != '_'; ++isa)
if (unlikely(!isalnum(*isa)))
ext_err = true;
@@ -481,17 +773,8 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
if (unlikely(ext_err))
continue;
- if (!ext_long) {
- int nr = tolower(*ext) - 'a';
- if (riscv_isa_extension_check(nr)) {
- *this_hwcap |= isa2hwcap[nr];
- set_bit(nr, isainfo->isa);
- }
- } else {
- for (int i = 0; i < riscv_isa_ext_count; i++)
- match_isa_ext(&riscv_isa_ext[i], ext, ext_end, isainfo);
- }
+ match_isa_ext(ext, ext_end, bitmap);
}
}
@@ -503,6 +786,8 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
struct acpi_table_header *rhct;
acpi_status status;
unsigned int cpu;
+ u64 boot_vendorid;
+ u64 boot_archid;
if (!acpi_disabled) {
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
@@ -510,9 +795,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
return;
}
+ boot_vendorid = riscv_get_mvendorid();
+ boot_archid = riscv_get_marchid();
+
for_each_possible_cpu(cpu) {
struct riscv_isainfo *isainfo = &hart_isa[cpu];
unsigned long this_hwcap = 0;
+ DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 };
if (acpi_disabled) {
node = of_cpu_device_node_get(cpu);
@@ -535,7 +824,7 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
}
}
- riscv_parse_isa_string(&this_hwcap, isainfo, isa2hwcap, isa);
+ riscv_parse_isa_string(isa, source_isa);
/*
* These ones were as they were part of the base ISA when the
@@ -543,10 +832,10 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
* unconditionally where `i` is in riscv,isa on DT systems.
*/
if (acpi_disabled) {
- set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa);
- set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa);
- set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa);
- set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa);
+ set_bit(RISCV_ISA_EXT_ZICSR, source_isa);
+ set_bit(RISCV_ISA_EXT_ZIFENCEI, source_isa);
+ set_bit(RISCV_ISA_EXT_ZICNTR, source_isa);
+ set_bit(RISCV_ISA_EXT_ZIHPM, source_isa);
}
/*
@@ -557,12 +846,13 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
* CPU cores with the ratified spec will contain non-zero
* marchid.
*/
- if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID &&
- riscv_cached_marchid(cpu) == 0x0) {
+ if (acpi_disabled && boot_vendorid == THEAD_VENDOR_ID && boot_archid == 0x0) {
this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v];
- clear_bit(RISCV_ISA_EXT_v, isainfo->isa);
+ clear_bit(RISCV_ISA_EXT_v, source_isa);
}
+ riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
+
/*
* All "okay" hart should have same isa. Set HWCAP based on
* common capabilities of every "okay" hart, in case they don't
@@ -583,14 +873,111 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap)
acpi_put_table((struct acpi_table_header *)rhct);
}
+static void __init riscv_fill_cpu_vendor_ext(struct device_node *cpu_node, int cpu)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i];
+
+ for (int j = 0; j < ext_list->ext_data_count; j++) {
+ const struct riscv_isa_ext_data ext = ext_list->ext_data[j];
+ struct riscv_isavendorinfo *isavendorinfo = &ext_list->per_hart_isa_bitmap[cpu];
+
+ if (of_property_match_string(cpu_node, "riscv,isa-extensions",
+ ext.property) < 0)
+ continue;
+
+ /*
+ * Assume that subset extensions are all members of the
+ * same vendor.
+ */
+ if (ext.subset_ext_size)
+ for (int k = 0; k < ext.subset_ext_size; k++)
+ set_bit(ext.subset_ext_ids[k], isavendorinfo->isa);
+
+ set_bit(ext.id, isavendorinfo->isa);
+ }
+ }
+}
+
+/*
+ * Populate all_harts_isa_bitmap for each vendor with all of the extensions that
+ * are shared across CPUs for that vendor.
+ */
+static void __init riscv_fill_vendor_ext_list(int cpu)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_VENDOR_EXT))
+ return;
+
+ for (int i = 0; i < riscv_isa_vendor_ext_list_size; i++) {
+ struct riscv_isa_vendor_ext_data_list *ext_list = riscv_isa_vendor_ext_list[i];
+
+ if (!ext_list->is_initialized) {
+ bitmap_copy(ext_list->all_harts_isa_bitmap.isa,
+ ext_list->per_hart_isa_bitmap[cpu].isa,
+ RISCV_ISA_VENDOR_EXT_MAX);
+ ext_list->is_initialized = true;
+ } else {
+ bitmap_and(ext_list->all_harts_isa_bitmap.isa,
+ ext_list->all_harts_isa_bitmap.isa,
+ ext_list->per_hart_isa_bitmap[cpu].isa,
+ RISCV_ISA_VENDOR_EXT_MAX);
+ }
+ }
+}
+
+static int has_thead_homogeneous_vlenb(void)
+{
+ int cpu;
+ u32 prev_vlenb = 0;
+ u32 vlenb;
+
+ /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ struct device_node *cpu_node;
+
+ cpu_node = of_cpu_device_node_get(cpu);
+ if (!cpu_node) {
+ pr_warn("Unable to find cpu node\n");
+ return -ENOENT;
+ }
+
+ if (of_property_read_u32(cpu_node, "thead,vlenb", &vlenb)) {
+ of_node_put(cpu_node);
+
+ if (prev_vlenb)
+ return -ENOENT;
+ continue;
+ }
+
+ if (prev_vlenb && vlenb != prev_vlenb) {
+ of_node_put(cpu_node);
+ return -ENOENT;
+ }
+
+ prev_vlenb = vlenb;
+ of_node_put(cpu_node);
+ }
+
+ thead_vlenb_of = vlenb;
+ return 0;
+}
+
static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
{
unsigned int cpu;
+ bool mitigated;
for_each_possible_cpu(cpu) {
unsigned long this_hwcap = 0;
struct device_node *cpu_node;
struct riscv_isainfo *isainfo = &hart_isa[cpu];
+ DECLARE_BITMAP(source_isa, RISCV_ISA_EXT_MAX) = { 0 };
cpu_node = of_cpu_device_node_get(cpu);
if (!cpu_node) {
@@ -610,22 +997,12 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
ext->property) < 0)
continue;
- if (ext->subset_ext_size) {
- for (int j = 0; j < ext->subset_ext_size; j++) {
- if (riscv_isa_extension_check(ext->subset_ext_ids[i]))
- set_bit(ext->subset_ext_ids[j], isainfo->isa);
- }
- }
-
- if (riscv_isa_extension_check(ext->id)) {
- set_bit(ext->id, isainfo->isa);
-
- /* Only single letter extensions get set in hwcap */
- if (strnlen(riscv_isa_ext[i].name, 2) == 1)
- this_hwcap |= isa2hwcap[riscv_isa_ext[i].id];
- }
+ riscv_isa_set_ext(ext, source_isa);
}
+ riscv_resolve_isa(source_isa, isainfo->isa, &this_hwcap, isa2hwcap);
+ riscv_fill_cpu_vendor_ext(cpu_node, cpu);
+
of_node_put(cpu_node);
/*
@@ -641,6 +1018,19 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
else
bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
+
+ riscv_fill_vendor_ext_list(cpu);
+ }
+
+ /*
+ * Execute ghostwrite mitigation immediately after detecting extensions
+ * to disable xtheadvector if necessary.
+ */
+ mitigated = ghostwrite_enable_mitigation();
+
+ if (!mitigated && has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
+ pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
+ disable_xtheadvector();
}
if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
@@ -695,15 +1085,12 @@ void __init riscv_fill_hwcap(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
}
- if (elf_hwcap & COMPAT_HWCAP_ISA_V) {
- riscv_v_setup_vsize();
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) ||
+ has_xtheadvector_no_alternatives()) {
/*
- * ISA string in device tree might have 'v' flag, but
- * CONFIG_RISCV_ISA_V is disabled in kernel.
- * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled.
+ * This cannot fail when called on the boot hart
*/
- if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
- elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
+ riscv_v_setup_vsize();
}
memset(print_str, 0, sizeof(print_str));
@@ -731,251 +1118,21 @@ unsigned long riscv_get_elf_hwcap(void)
return hwcap;
}
-static int check_unaligned_access(void *param)
-{
- int cpu = smp_processor_id();
- u64 start_cycles, end_cycles;
- u64 word_cycles;
- u64 byte_cycles;
- int ratio;
- unsigned long start_jiffies, now;
- struct page *page = param;
- void *dst;
- void *src;
- long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
-
- if (check_unaligned_access_emulated(cpu))
- return 0;
-
- /* Make an unaligned destination buffer. */
- dst = (void *)((unsigned long)page_address(page) | 0x1);
- /* Unalign src as well, but differently (off by 1 + 2 = 3). */
- src = dst + (MISALIGNED_BUFFER_SIZE / 2);
- src += 2;
- word_cycles = -1ULL;
- /* Do a warmup. */
- __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- preempt_disable();
- start_jiffies = jiffies;
- while ((now = jiffies) == start_jiffies)
- cpu_relax();
-
- /*
- * For a fixed amount of time, repeatedly try the function, and take
- * the best time in cycles as the measurement.
- */
- while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
- start_cycles = get_cycles64();
- /* Ensure the CSR read can't reorder WRT to the copy. */
- mb();
- __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- /* Ensure the copy ends before the end time is snapped. */
- mb();
- end_cycles = get_cycles64();
- if ((end_cycles - start_cycles) < word_cycles)
- word_cycles = end_cycles - start_cycles;
- }
-
- byte_cycles = -1ULL;
- __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- start_jiffies = jiffies;
- while ((now = jiffies) == start_jiffies)
- cpu_relax();
-
- while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
- start_cycles = get_cycles64();
- mb();
- __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
- mb();
- end_cycles = get_cycles64();
- if ((end_cycles - start_cycles) < byte_cycles)
- byte_cycles = end_cycles - start_cycles;
- }
-
- preempt_enable();
-
- /* Don't divide by zero. */
- if (!word_cycles || !byte_cycles) {
- pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
- cpu);
-
- return 0;
- }
-
- if (word_cycles < byte_cycles)
- speed = RISCV_HWPROBE_MISALIGNED_FAST;
-
- ratio = div_u64((byte_cycles * 100), word_cycles);
- pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
- cpu,
- ratio / 100,
- ratio % 100,
- (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
-
- per_cpu(misaligned_access_speed, cpu) = speed;
-
- /*
- * Set the value of fast_misaligned_access of a CPU. These operations
- * are atomic to avoid race conditions.
- */
- if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
- cpumask_set_cpu(cpu, &fast_misaligned_access);
- else
- cpumask_clear_cpu(cpu, &fast_misaligned_access);
-
- return 0;
-}
-
-static void check_unaligned_access_nonboot_cpu(void *param)
-{
- unsigned int cpu = smp_processor_id();
- struct page **pages = param;
-
- if (smp_processor_id() != 0)
- check_unaligned_access(pages[cpu]);
-}
-
-DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
-
-static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
-{
- if (cpumask_weight(mask) == weight)
- static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
- else
- static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
-}
-
-static void set_unaligned_access_static_branches_except_cpu(int cpu)
-{
- /*
- * Same as set_unaligned_access_static_branches, except excludes the
- * given CPU from the result. When a CPU is hotplugged into an offline
- * state, this function is called before the CPU is set to offline in
- * the cpumask, and thus the CPU needs to be explicitly excluded.
- */
-
- cpumask_t fast_except_me;
-
- cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
- cpumask_clear_cpu(cpu, &fast_except_me);
-
- modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
-}
-
-static void set_unaligned_access_static_branches(void)
-{
- /*
- * This will be called after check_unaligned_access_all_cpus so the
- * result of unaligned access speed for all CPUs will be available.
- *
- * To avoid the number of online cpus changing between reading
- * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
- * held before calling this function.
- */
-
- cpumask_t fast_and_online;
-
- cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
-
- modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
-}
-
-static int lock_and_set_unaligned_access_static_branch(void)
-{
- cpus_read_lock();
- set_unaligned_access_static_branches();
- cpus_read_unlock();
-
- return 0;
-}
-
-arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
-
-static int riscv_online_cpu(unsigned int cpu)
-{
- static struct page *buf;
-
- /* We are already set since the last check */
- if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
- goto exit;
-
- buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
- if (!buf) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- return -ENOMEM;
- }
-
- check_unaligned_access(buf);
- __free_pages(buf, MISALIGNED_BUFFER_ORDER);
-
-exit:
- set_unaligned_access_static_branches();
-
- return 0;
-}
-
-static int riscv_offline_cpu(unsigned int cpu)
-{
- set_unaligned_access_static_branches_except_cpu(cpu);
-
- return 0;
-}
-
-/* Measure unaligned access on all CPUs present at boot in parallel. */
-static int check_unaligned_access_all_cpus(void)
+void __init riscv_user_isa_enable(void)
{
- unsigned int cpu;
- unsigned int cpu_count = num_possible_cpus();
- struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
- GFP_KERNEL);
-
- if (!bufs) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- return 0;
- }
-
- /*
- * Allocate separate buffers for each CPU so there's no fighting over
- * cache lines.
- */
- for_each_cpu(cpu, cpu_online_mask) {
- bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
- if (!bufs[cpu]) {
- pr_warn("Allocation failure, not measuring misaligned performance\n");
- goto out;
- }
- }
-
- /* Check everybody except 0, who stays behind to tend jiffies. */
- on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-
- /* Check core 0. */
- smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
-
- /*
- * Setup hotplug callbacks for any new CPUs that come online or go
- * offline.
- */
- cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
- riscv_online_cpu, riscv_offline_cpu);
-
-out:
- unaligned_emulation_finish();
- for_each_cpu(cpu, cpu_online_mask) {
- if (bufs[cpu])
- __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
- }
-
- kfree(bufs);
- return 0;
-}
-
-arch_initcall(check_unaligned_access_all_cpus);
-
-void riscv_user_isa_enable(void)
-{
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
- csr_set(CSR_ENVCFG, ENVCFG_CBZE);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ))
+ current->thread.envcfg |= ENVCFG_CBZE;
+ else if (any_cpu_has_zicboz)
+ pr_warn("Zicboz disabled as it is unavailable on some harts\n");
+
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOM))
+ current->thread.envcfg |= ENVCFG_CBCFE;
+ else if (any_cpu_has_zicbom)
+ pr_warn("Zicbom disabled as it is unavailable on some harts\n");
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
+ any_cpu_has_zicbop)
+ pr_warn("Zicbop disabled as it is unavailable on some harts\n");
}
#ifdef CONFIG_RISCV_ALTERNATIVE
@@ -1018,28 +1175,45 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
{
struct alt_entry *alt;
void *oldptr, *altptr;
- u16 id, value;
+ u16 id, value, vendor;
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return;
for (alt = begin; alt < end; alt++) {
- if (alt->vendor_id != 0)
- continue;
-
id = PATCH_ID_CPUFEATURE_ID(alt->patch_id);
+ vendor = PATCH_ID_CPUFEATURE_ID(alt->vendor_id);
- if (id >= RISCV_ISA_EXT_MAX) {
- WARN(1, "This extension id:%d is not in ISA extension list", id);
- continue;
- }
+ /*
+ * Any alternative with a patch_id that is less than
+ * RISCV_ISA_EXT_MAX is interpreted as a standard extension.
+ *
+ * Any alternative with patch_id that is greater than or equal
+ * to RISCV_VENDOR_EXT_ALTERNATIVES_BASE is interpreted as a
+ * vendor extension.
+ */
+ if (id < RISCV_ISA_EXT_MAX) {
+ /*
+ * This patch should be treated as errata so skip
+ * processing here.
+ */
+ if (alt->vendor_id != 0)
+ continue;
- if (!__riscv_isa_extension_available(NULL, id))
- continue;
+ if (!__riscv_isa_extension_available(NULL, id))
+ continue;
- value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id);
- if (!riscv_cpufeature_patch_check(id, value))
+ value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id);
+ if (!riscv_cpufeature_patch_check(id, value))
+ continue;
+ } else if (id >= RISCV_VENDOR_EXT_ALTERNATIVES_BASE) {
+ if (!__riscv_isa_vendor_extension_available(VENDOR_EXT_ALL_CPUS, vendor,
+ id - RISCV_VENDOR_EXT_ALTERNATIVES_BASE))
+ continue;
+ } else {
+ WARN(1, "This extension id:%d is not in ISA extension list", id);
continue;
+ }
oldptr = ALT_OLD_PTR(alt);
altptr = ALT_ALT_PTR(alt);
diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
index 515b2dfbca75..2efc3aaf4a8c 100644
--- a/arch/riscv/kernel/efi-header.S
+++ b/arch/riscv/kernel/efi-header.S
@@ -9,7 +9,7 @@
#include <asm/set_memory.h>
.macro __EFI_PE_HEADER
- .long PE_MAGIC
+ .long IMAGE_NT_SIGNATURE
coff_header:
#ifdef CONFIG_64BIT
.short IMAGE_FILE_MACHINE_RISCV64 // Machine
@@ -27,9 +27,9 @@ coff_header:
optional_header:
#ifdef CONFIG_64BIT
- .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+ .short IMAGE_NT_OPTIONAL_HDR64_MAGIC // PE32+ format
#else
- .short PE_OPT_MAGIC_PE32 // PE32 format
+ .short IMAGE_NT_OPTIONAL_HDR32_MAGIC // PE32 format
#endif
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
@@ -64,7 +64,7 @@ extra_header_fields:
.long efi_header_end - _start // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short 0 // DllCharacteristics
+ .short IMAGE_DLLCHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
deleted file mode 100644
index 54260c16f991..000000000000
--- a/arch/riscv/kernel/elf_kexec.c
+++ /dev/null
@@ -1,475 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2021 Huawei Technologies Co, Ltd.
- *
- * Author: Liao Chang (liaochang1@huawei.com)
- *
- * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
- * for kernel.
- */
-
-#define pr_fmt(fmt) "kexec_image: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/libfdt.h>
-#include <linux/types.h>
-#include <linux/memblock.h>
-#include <asm/setup.h>
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- kvfree(image->arch.fdt);
- image->arch.fdt = NULL;
-
- vfree(image->elf_headers);
- image->elf_headers = NULL;
- image->elf_headers_sz = 0;
-
- return kexec_image_post_load_cleanup_default(image);
-}
-
-static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
- struct kexec_elf_info *elf_info, unsigned long old_pbase,
- unsigned long new_pbase)
-{
- int i;
- int ret = 0;
- size_t size;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
-
- kbuf.image = image;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.buf_align = phdr->p_align;
- kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
- kbuf.memsz = phdr->p_memsz;
- kbuf.top_down = false;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-/*
- * Go through the available phsyical memory regions and find one that hold
- * an image of the specified size.
- */
-static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
- struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
- unsigned long *old_pbase, unsigned long *new_pbase)
-{
- int i;
- int ret;
- struct kexec_buf kbuf;
- const struct elf_phdr *phdr;
- unsigned long lowest_paddr = ULONG_MAX;
- unsigned long lowest_vaddr = ULONG_MAX;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- if (lowest_paddr > phdr->p_paddr)
- lowest_paddr = phdr->p_paddr;
-
- if (lowest_vaddr > phdr->p_vaddr)
- lowest_vaddr = phdr->p_vaddr;
- }
-
- kbuf.image = image;
- kbuf.buf_min = lowest_paddr;
- kbuf.buf_max = ULONG_MAX;
-
- /*
- * Current riscv boot protocol requires 2MB alignment for
- * RV64 and 4MB alignment for RV32
- *
- */
- kbuf.buf_align = PMD_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
- kbuf.top_down = false;
- ret = arch_kexec_locate_mem_hole(&kbuf);
- if (!ret) {
- *old_pbase = lowest_paddr;
- *new_pbase = kbuf.mem;
- image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
- }
- return ret;
-}
-
-#ifdef CONFIG_CRASH_DUMP
-static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
-{
- unsigned int *nr_ranges = arg;
-
- (*nr_ranges)++;
- return 0;
-}
-
-static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
-{
- struct crash_mem *cmem = arg;
-
- cmem->ranges[cmem->nr_ranges].start = res->start;
- cmem->ranges[cmem->nr_ranges].end = res->end;
- cmem->nr_ranges++;
-
- return 0;
-}
-
-static int prepare_elf_headers(void **addr, unsigned long *sz)
-{
- struct crash_mem *cmem;
- unsigned int nr_ranges;
- int ret;
-
- nr_ranges = 1; /* For exclusion of crashkernel region */
- walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
-
- cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
- if (!cmem)
- return -ENOMEM;
-
- cmem->max_nr_ranges = nr_ranges;
- cmem->nr_ranges = 0;
- ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
- if (ret)
- goto out;
-
- /* Exclude crashkernel region */
- ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
- if (!ret)
- ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
-
-out:
- kfree(cmem);
- return ret;
-}
-
-static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
- unsigned long cmdline_len)
-{
- int elfcorehdr_strlen;
- char *cmdline_ptr;
-
- cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
- if (!cmdline_ptr)
- return NULL;
-
- elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
- image->elf_load_addr);
-
- if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
- pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
- kfree(cmdline_ptr);
- return NULL;
- }
-
- memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
- /* Ensure it's nul terminated */
- cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
- return cmdline_ptr;
-}
-#endif
-
-static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
- unsigned long kernel_len, char *initrd,
- unsigned long initrd_len, char *cmdline,
- unsigned long cmdline_len)
-{
- int ret;
- void *fdt;
- unsigned long old_kernel_pbase = ULONG_MAX;
- unsigned long new_kernel_pbase = 0UL;
- unsigned long initrd_pbase = 0UL;
- unsigned long kernel_start;
- struct elfhdr ehdr;
- struct kexec_buf kbuf;
- struct kexec_elf_info elf_info;
- char *modified_cmdline = NULL;
-
- ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
- if (ret)
- return ERR_PTR(ret);
-
- ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
- &old_kernel_pbase, &new_kernel_pbase);
- if (ret)
- goto out;
- kernel_start = image->start;
-
- /* Add the kernel binary to the image */
- ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
- old_kernel_pbase, new_kernel_pbase);
- if (ret)
- goto out;
-
- kbuf.image = image;
- kbuf.buf_min = new_kernel_pbase + kernel_len;
- kbuf.buf_max = ULONG_MAX;
-
-#ifdef CONFIG_CRASH_DUMP
- /* Add elfcorehdr */
- if (image->type == KEXEC_TYPE_CRASH) {
- void *headers;
- unsigned long headers_sz;
- ret = prepare_elf_headers(&headers, &headers_sz);
- if (ret) {
- pr_err("Preparing elf core header failed\n");
- goto out;
- }
-
- kbuf.buffer = headers;
- kbuf.bufsz = headers_sz;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.memsz = headers_sz;
- kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
- kbuf.top_down = true;
-
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- vfree(headers);
- goto out;
- }
- image->elf_headers = headers;
- image->elf_load_addr = kbuf.mem;
- image->elf_headers_sz = headers_sz;
-
- kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
-
- /* Setup cmdline for kdump kernel case */
- modified_cmdline = setup_kdump_cmdline(image, cmdline,
- cmdline_len);
- if (!modified_cmdline) {
- pr_err("Setting up cmdline for kdump kernel failed\n");
- ret = -EINVAL;
- goto out;
- }
- cmdline = modified_cmdline;
- }
-#endif
-
-#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
- /* Add purgatory to the image */
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_load_purgatory(image, &kbuf);
- if (ret) {
- pr_err("Error loading purgatory ret=%d\n", ret);
- goto out;
- }
- kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
-
- ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
- &kernel_start,
- sizeof(kernel_start), 0);
- if (ret)
- pr_err("Error update purgatory ret=%d\n", ret);
-#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
-
- /* Add the initrd to the image */
- if (initrd != NULL) {
- kbuf.buffer = initrd;
- kbuf.bufsz = kbuf.memsz = initrd_len;
- kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = true;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- initrd_pbase = kbuf.mem;
- kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
- }
-
- /* Add the DTB to the image */
- fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
- initrd_len, cmdline, 0);
- if (!fdt) {
- pr_err("Error setting up the new device tree.\n");
- ret = -EINVAL;
- goto out;
- }
-
- fdt_pack(fdt);
- kbuf.buffer = fdt;
- kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
- kbuf.buf_align = PAGE_SIZE;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf.top_down = true;
- ret = kexec_add_buffer(&kbuf);
- if (ret) {
- pr_err("Error add DTB kbuf ret=%d\n", ret);
- goto out_free_fdt;
- }
- /* Cache the fdt buffer address for memory cleanup */
- image->arch.fdt = fdt;
- kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
- goto out;
-
-out_free_fdt:
- kvfree(fdt);
-out:
- kfree(modified_cmdline);
- kexec_free_elf_info(&elf_info);
- return ret ? ERR_PTR(ret) : NULL;
-}
-
-#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
-#define RISCV_IMM_BITS 12
-#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
-#define RISCV_CONST_HIGH_PART(x) \
- (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
-#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
-
-#define ENCODE_ITYPE_IMM(x) \
- (RV_X(x, 0, 12) << 20)
-#define ENCODE_BTYPE_IMM(x) \
- ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
- (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
-#define ENCODE_UTYPE_IMM(x) \
- (RV_X(x, 12, 20) << 12)
-#define ENCODE_JTYPE_IMM(x) \
- ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
- (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
-#define ENCODE_CBTYPE_IMM(x) \
- ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
-#define ENCODE_CJTYPE_IMM(x) \
- ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
- (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
- (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
-#define ENCODE_UJTYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
- (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
-#define ENCODE_UITYPE_IMM(x) \
- (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
-
-#define CLEAN_IMM(type, x) \
- ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
-
-int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
- Elf_Shdr *section,
- const Elf_Shdr *relsec,
- const Elf_Shdr *symtab)
-{
- const char *strtab, *name, *shstrtab;
- const Elf_Shdr *sechdrs;
- Elf64_Rela *relas;
- int i, r_type;
-
- /* String & section header string table */
- sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
- strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
- shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
-
- relas = (void *)pi->ehdr + relsec->sh_offset;
-
- for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
- const Elf_Sym *sym; /* symbol to relocate */
- unsigned long addr; /* final location after relocation */
- unsigned long val; /* relocated symbol value */
- unsigned long sec_base; /* relocated symbol value */
- void *loc; /* tmp location to modify */
-
- sym = (void *)pi->ehdr + symtab->sh_offset;
- sym += ELF64_R_SYM(relas[i].r_info);
-
- if (sym->st_name)
- name = strtab + sym->st_name;
- else
- name = shstrtab + sechdrs[sym->st_shndx].sh_name;
-
- loc = pi->purgatory_buf;
- loc += section->sh_offset;
- loc += relas[i].r_offset;
-
- if (sym->st_shndx == SHN_ABS)
- sec_base = 0;
- else if (sym->st_shndx >= pi->ehdr->e_shnum) {
- pr_err("Invalid section %d for symbol %s\n",
- sym->st_shndx, name);
- return -ENOEXEC;
- } else
- sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
-
- val = sym->st_value;
- val += sec_base;
- val += relas[i].r_addend;
-
- addr = section->sh_addr + relas[i].r_offset;
-
- r_type = ELF64_R_TYPE(relas[i].r_info);
-
- switch (r_type) {
- case R_RISCV_BRANCH:
- *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
- ENCODE_BTYPE_IMM(val - addr);
- break;
- case R_RISCV_JAL:
- *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
- ENCODE_JTYPE_IMM(val - addr);
- break;
- /*
- * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
- * sym is expected to be next to R_RISCV_PCREL_HI20
- * in purgatory relsec. Handle it like R_RISCV_CALL
- * sym, instead of searching the whole relsec.
- */
- case R_RISCV_PCREL_HI20:
- case R_RISCV_CALL_PLT:
- case R_RISCV_CALL:
- *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
- ENCODE_UJTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_BRANCH:
- *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
- ENCODE_CBTYPE_IMM(val - addr);
- break;
- case R_RISCV_RVC_JUMP:
- *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
- ENCODE_CJTYPE_IMM(val - addr);
- break;
- case R_RISCV_ADD32:
- *(u32 *)loc += val;
- break;
- case R_RISCV_SUB32:
- *(u32 *)loc -= val;
- break;
- /* It has been applied by R_RISCV_PCREL_HI20 sym */
- case R_RISCV_PCREL_LO12_I:
- case R_RISCV_ALIGN:
- case R_RISCV_RELAX:
- break;
- default:
- pr_err("Unknown rela relocation: %d\n", r_type);
- return -ENOEXEC;
- }
- }
- return 0;
-}
-
-const struct kexec_file_ops elf_kexec_ops = {
- .probe = kexec_elf_probe,
- .load = elf_kexec_load,
-};
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 9d1a305d5508..75656afa2d6b 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -19,6 +19,79 @@
.section .irqentry.text, "ax"
+.macro new_vmalloc_check
+ REG_S a0, TASK_TI_A0(tp)
+ csrr a0, CSR_CAUSE
+ /* Exclude IRQs */
+ blt a0, zero, .Lnew_vmalloc_restore_context_a0
+
+ REG_S a1, TASK_TI_A1(tp)
+ /* Only check new_vmalloc if we are in page/protection fault */
+ li a1, EXC_LOAD_PAGE_FAULT
+ beq a0, a1, .Lnew_vmalloc_kernel_address
+ li a1, EXC_STORE_PAGE_FAULT
+ beq a0, a1, .Lnew_vmalloc_kernel_address
+ li a1, EXC_INST_PAGE_FAULT
+ bne a0, a1, .Lnew_vmalloc_restore_context_a1
+
+.Lnew_vmalloc_kernel_address:
+ /* Is it a kernel address? */
+ csrr a0, CSR_TVAL
+ bge a0, zero, .Lnew_vmalloc_restore_context_a1
+
+ /* Check if a new vmalloc mapping appeared that could explain the trap */
+ REG_S a2, TASK_TI_A2(tp)
+ /*
+ * Computes:
+ * a0 = &new_vmalloc[BIT_WORD(cpu)]
+ * a1 = BIT_MASK(cpu)
+ */
+ REG_L a2, TASK_TI_CPU(tp)
+ /*
+ * Compute the new_vmalloc element position:
+ * (cpu / 64) * 8 = (cpu >> 6) << 3
+ */
+ srli a1, a2, 6
+ slli a1, a1, 3
+ la a0, new_vmalloc
+ add a0, a0, a1
+ /*
+ * Compute the bit position in the new_vmalloc element:
+ * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6
+ * = cpu - ((cpu >> 6) << 3) << 3
+ */
+ slli a1, a1, 3
+ sub a1, a2, a1
+ /* Compute the "get mask": 1 << bit_pos */
+ li a2, 1
+ sll a1, a2, a1
+
+ /* Check the value of new_vmalloc for this cpu */
+ REG_L a2, 0(a0)
+ and a2, a2, a1
+ beq a2, zero, .Lnew_vmalloc_restore_context
+
+ /* Atomically reset the current cpu bit in new_vmalloc */
+ amoxor.d a0, a1, (a0)
+
+ /* Only emit a sfence.vma if the uarch caches invalid entries */
+ ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1)
+
+ REG_L a0, TASK_TI_A0(tp)
+ REG_L a1, TASK_TI_A1(tp)
+ REG_L a2, TASK_TI_A2(tp)
+ csrw CSR_SCRATCH, x0
+ sret
+
+.Lnew_vmalloc_restore_context:
+ REG_L a2, TASK_TI_A2(tp)
+.Lnew_vmalloc_restore_context_a1:
+ REG_L a1, TASK_TI_A1(tp)
+.Lnew_vmalloc_restore_context_a0:
+ REG_L a0, TASK_TI_A0(tp)
+.endm
+
+
SYM_CODE_START(handle_exception)
/*
* If coming from userspace, preserve the user thread pointer and load
@@ -30,6 +103,20 @@ SYM_CODE_START(handle_exception)
.Lrestore_kernel_tpsp:
csrr tp, CSR_SCRATCH
+
+#ifdef CONFIG_64BIT
+ /*
+ * The RISC-V kernel does not eagerly emit a sfence.vma after each
+ * new vmalloc mapping, which may result in exceptions:
+ * - if the uarch caches invalid entries, the new mapping would not be
+ * observed by the page table walker and an invalidation is needed.
+ * - if the uarch does not cache invalid entries, a reordered access
+ * could "miss" the new mapping and traps: in that case, we only need
+ * to retry the access, no sfence.vma is required.
+ */
+ new_vmalloc_check
+#endif
+
REG_S sp, TASK_TI_KERNEL_SP(tp)
#ifdef CONFIG_VMAP_STACK
@@ -88,7 +175,6 @@ SYM_CODE_START(handle_exception)
call riscv_v_context_nesting_start
#endif
move a0, sp /* pt_regs */
- la ra, ret_from_exception
/*
* MSB of cause differentiates between
@@ -97,7 +183,8 @@ SYM_CODE_START(handle_exception)
bge s4, zero, 1f
/* Handle interrupts */
- tail do_irq
+ call do_irq
+ j ret_from_exception
1:
/* Handle other exceptions */
slli t0, s4, RISCV_LGPTR
@@ -105,12 +192,16 @@ SYM_CODE_START(handle_exception)
la t2, excp_vect_table_end
add t0, t1, t0
/* Check if exception code lies within bounds */
- bgeu t0, t2, 1f
- REG_L t0, 0(t0)
- jr t0
-1:
- tail do_trap_unknown
+ bgeu t0, t2, 3f
+ REG_L t1, 0(t0)
+2: jalr t1
+ j ret_from_exception
+3:
+
+ la t1, do_trap_unknown
+ j 2b
SYM_CODE_END(handle_exception)
+ASM_NOKPROBE(handle_exception)
/*
* The ret_from_exception must be called with interrupt disabled. Here is the
@@ -129,6 +220,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
#endif
bnez s0, 1f
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ call stackleak_erase_on_task_stack
+#endif
+
/* Save unwound kernel stack pointer in thread_info */
addi s0, sp, PT_SIZE_ON_STACK
REG_S s0, TASK_TI_KERNEL_SP(tp)
@@ -183,7 +278,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
#else
sret
#endif
+SYM_INNER_LABEL(ret_from_exception_end, SYM_L_GLOBAL)
SYM_CODE_END(ret_from_exception)
+ASM_NOKPROBE(ret_from_exception)
#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
@@ -219,19 +316,24 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
move a0, sp
tail handle_bad_stack
SYM_CODE_END(handle_kernel_stack_overflow)
+ASM_NOKPROBE(handle_kernel_stack_overflow)
#endif
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_kernel_asm)
+ call schedule_tail
+ move a0, s1 /* fn_arg */
+ move a1, s0 /* fn */
+ move a2, sp /* pt_regs */
+ call ret_from_fork_kernel
+ j ret_from_exception
+SYM_CODE_END(ret_from_fork_kernel_asm)
+
+SYM_CODE_START(ret_from_fork_user_asm)
call schedule_tail
- beqz s0, 1f /* not from kernel thread */
- /* Call fn(arg) */
- move a0, s1
- jalr s0
-1:
move a0, sp /* pt_regs */
- la ra, ret_from_exception
- tail syscall_exit_to_user_mode
-SYM_CODE_END(ret_from_fork)
+ call ret_from_fork_user
+ j ret_from_exception
+SYM_CODE_END(ret_from_fork_user_asm)
#ifdef CONFIG_IRQ_STACKS
/*
@@ -299,9 +401,18 @@ SYM_FUNC_START(__switch_to)
REG_S s9, TASK_THREAD_S9_RA(a3)
REG_S s10, TASK_THREAD_S10_RA(a3)
REG_S s11, TASK_THREAD_S11_RA(a3)
+
+ /* save the user space access flag */
+ csrr s0, CSR_STATUS
+ REG_S s0, TASK_THREAD_SUM_RA(a3)
+
/* Save the kernel shadow call stack pointer */
scs_save_current
/* Restore context from next->thread */
+ REG_L s0, TASK_THREAD_SUM_RA(a4)
+ li s1, SR_SUM
+ and s0, s0, s1
+ csrs CSR_STATUS, s0
REG_L ra, TASK_THREAD_RA_RA(a4)
REG_L sp, TASK_THREAD_SP_RA(a4)
REG_L s0, TASK_THREAD_S0_RA(a4)
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index 2c543f130f93..f74f6b60e347 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -170,7 +170,7 @@ SYM_FUNC_END(__fstate_restore)
__access_func(f31)
-#ifdef CONFIG_RISCV_MISALIGNED
+#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
/*
* Disable compressed instructions set to keep a constant offset between FP
@@ -211,7 +211,7 @@ SYM_FUNC_START(put_f64_reg)
SYM_FUNC_END(put_f64_reg)
/*
- * put_f64_reg - Get a 64 bits FP register value and returned it or store it to
+ * get_f64_reg - Get a 64 bits FP register value and returned it or store it to
* a pointer.
* a0 = FP register index to be retrieved
* a1 = If xlen == 32, pointer which should be loaded with the FP register value
@@ -224,4 +224,4 @@ SYM_FUNC_START(get_f64_reg)
fp_access_epilogue
SYM_FUNC_END(get_f64_reg)
-#endif /* CONFIG_RISCV_MISALIGNED */
+#endif /* CONFIG_RISCV_SCALAR_MISALIGNED */
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index f5aa24d9e1c1..4c6c24380cfd 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -8,98 +8,129 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <linux/irqflags.h>
+#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
-#include <asm/patch.h>
+#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE
-void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+unsigned long ftrace_call_adjust(unsigned long addr)
{
- mutex_lock(&text_mutex);
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return addr + 8 + MCOUNT_AUIPC_SIZE;
- /*
- * The code sequences we use for ftrace can't be patched while the
- * kernel is running, so we need to use stop_machine() to modify them
- * for now. This doesn't play nice with text_mutex, we use this flag
- * to elide the check.
- */
- riscv_patch_in_stop_machine = true;
+ return addr + MCOUNT_AUIPC_SIZE;
}
-void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
{
- riscv_patch_in_stop_machine = false;
+ return fentry_ip - MCOUNT_AUIPC_SIZE;
+}
+
+void arch_ftrace_update_code(int command)
+{
+ mutex_lock(&text_mutex);
+ command |= FTRACE_MAY_SLEEP;
+ ftrace_modify_all_code(command);
mutex_unlock(&text_mutex);
+ flush_icache_all();
}
-static int ftrace_check_current_call(unsigned long hook_pos,
- unsigned int *expected)
+static int __ftrace_modify_call(unsigned long source, unsigned long target, bool validate)
{
+ unsigned int call[2], offset;
unsigned int replaced[2];
- unsigned int nops[2] = {NOP4, NOP4};
- /* we expect nops at the hook position */
- if (!expected)
- expected = nops;
+ offset = target - source;
+ call[1] = to_jalr_t0(offset);
+
+ if (validate) {
+ call[0] = to_auipc_t0(offset);
+ /*
+ * Read the text we want to modify;
+ * return must be -EFAULT on read error
+ */
+ if (copy_from_kernel_nofault(replaced, (void *)source, 2 * MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (replaced[0] != call[0]) {
+ pr_err("%p: expected (%08x) but got (%08x)\n",
+ (void *)source, call[0], replaced[0]);
+ return -EINVAL;
+ }
+ }
- /*
- * Read the text we want to modify;
- * return must be -EFAULT on read error
- */
- if (copy_from_kernel_nofault(replaced, (void *)hook_pos,
- MCOUNT_INSN_SIZE))
- return -EFAULT;
+ /* Replace the jalr at once. Return -EPERM on write error. */
+ if (patch_insn_write((void *)(source + MCOUNT_AUIPC_SIZE), call + 1, MCOUNT_JALR_SIZE))
+ return -EPERM;
- /*
- * Make sure it is what we expect it to be;
- * return must be -EINVAL on failed comparison
- */
- if (memcmp(expected, replaced, sizeof(replaced))) {
- pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n",
- (void *)hook_pos, expected[0], expected[1], replaced[0],
- replaced[1]);
- return -EINVAL;
+ return 0;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *riscv64_rec_get_ops(struct dyn_ftrace *rec)
+{
+ const struct ftrace_ops *ops = NULL;
+
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
}
- return 0;
+ if (!ops)
+ ops = &ftrace_list_ops;
+
+ return ops;
}
-static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
- bool enable, bool ra)
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec, const struct ftrace_ops *ops)
{
- unsigned int call[2];
- unsigned int nops[2] = {NOP4, NOP4};
+ unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
- if (ra)
- make_call_ra(hook_pos, target, call);
- else
- make_call_t0(hook_pos, target, call);
+ return patch_text_nosync((void *)literal, &ops, sizeof(ops));
+}
- /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
- if (patch_text_nosync
- ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
- return -EPERM;
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
- return 0;
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, riscv64_rec_get_ops(rec));
}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int call[2];
+ unsigned long distance, orig_addr, pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ int ret;
- make_call_t0(rec->ip, addr, call);
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
- if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
- return -EPERM;
+ orig_addr = (unsigned long)&ftrace_caller;
+ distance = addr > orig_addr ? addr - orig_addr : orig_addr - addr;
+ if (distance > JALR_RANGE)
+ addr = FTRACE_ADDR;
- return 0;
+ return __ftrace_modify_call(pc, addr, false);
}
-int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
- unsigned long addr)
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
{
- unsigned int nops[2] = {NOP4, NOP4};
+ u32 nop4 = RISCV_INSN_NOP4;
+ int ret;
+
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
- if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ if (patch_insn_write((void *)rec->ip, &nop4, MCOUNT_NOP4_SIZE))
return -EPERM;
return 0;
@@ -114,43 +145,71 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
*/
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
- int out;
+ unsigned long pc = rec->ip - MCOUNT_AUIPC_SIZE;
+ unsigned int nops[2], offset;
+ int ret;
+
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
+
+ offset = (unsigned long) &ftrace_caller - pc;
+ nops[0] = to_auipc_t0(offset);
+ nops[1] = RISCV_INSN_NOP4;
mutex_lock(&text_mutex);
- out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+ ret = patch_insn_write((void *)pc, nops, 2 * MCOUNT_INSN_SIZE);
mutex_unlock(&text_mutex);
- return out;
+ return ret;
}
+ftrace_func_t ftrace_call_dest = ftrace_stub;
int ftrace_update_ftrace_func(ftrace_func_t func)
{
- int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
- (unsigned long)func, true, true);
- if (!ret) {
- ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
- (unsigned long)func, true, true);
- }
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
- return ret;
+ WRITE_ONCE(ftrace_call_dest, func);
+ /*
+ * The data fence ensure that the update to ftrace_call_dest happens
+ * before the write to function_trace_op later in the generic ftrace.
+ * If the sequence is not enforced, then an old ftrace_call_dest may
+ * race loading a new function_trace_op set in ftrace_modify_all_code
+ */
+ smp_wmb();
+ /*
+ * Updating ftrace dpes not take stop_machine path, so irqs should not
+ * be disabled.
+ */
+ WARN_ON(irqs_disabled());
+ smp_call_function(ftrace_sync_ipi, NULL, 1);
+ return 0;
}
-#endif
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#else /* CONFIG_DYNAMIC_FTRACE */
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- unsigned int call[2];
- unsigned long caller = rec->ip;
+ unsigned long caller = rec->ip - MCOUNT_AUIPC_SIZE;
int ret;
- make_call_t0(caller, old_addr, call);
- ret = ftrace_check_current_call(caller, call);
-
+ ret = ftrace_rec_update_ops(rec);
if (ret)
return ret;
- return __ftrace_modify_call(caller, addr, true, false);
+ return __ftrace_modify_call(caller, FTRACE_ADDR, true);
}
#endif
@@ -178,28 +237,25 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
}
#ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- struct pt_regs *regs = arch_ftrace_get_regs(fregs);
- unsigned long *parent = (unsigned long *)&regs->ra;
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->ra;
+ unsigned long old;
- prepare_ftrace_return(parent, ip, frame_pointer(regs));
-}
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
-extern void ftrace_graph_call(void);
-int ftrace_enable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, true, true);
-}
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
-int ftrace_disable_ftrace_graph_caller(void)
-{
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, false, true);
+ /*
+ * We don't suffer access faults, so no extra fault-recovery assembly
+ * is needed here.
+ */
+ old = *parent;
+
+ if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs))
+ *parent = return_hooker;
}
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 4236a69c35cb..bdf3352acf4c 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -131,6 +131,12 @@ secondary_start_sbi:
csrw CSR_IE, zero
csrw CSR_IP, zero
+#ifndef CONFIG_RISCV_M_MODE
+ /* Enable time CSR */
+ li t0, 0x2
+ csrw CSR_SCOUNTEREN, t0
+#endif
+
/* Load the global pointer */
load_global_pointer
@@ -165,10 +171,21 @@ secondary_start_sbi:
#endif
call .Lsetup_trap_vector
scs_load_current
- tail smp_callin
+ call smp_callin
#endif /* CONFIG_SMP */
.align 2
+.Lsecondary_park:
+ /*
+ * Park this hart if we:
+ * - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
+ * - receive an early trap, before setup_trap_vector finished
+ * - fail in smp_callin(), as a successful one wouldn't return
+ */
+ wfi
+ j .Lsecondary_park
+
+.align 2
.Lsetup_trap_vector:
/* Set trap vector to exception handler */
la a0, handle_exception
@@ -181,12 +198,6 @@ secondary_start_sbi:
csrw CSR_SCRATCH, zero
ret
-.align 2
-.Lsecondary_park:
- /* We lack SMP support or have too many harts, so park this hart */
- wfi
- j .Lsecondary_park
-
SYM_CODE_END(_start)
SYM_CODE_START(_start_kernel)
@@ -221,6 +232,10 @@ SYM_CODE_START(_start_kernel)
* to hand it to us.
*/
csrr a0, CSR_MHARTID
+#else
+ /* Enable time CSR */
+ li t0, 0x2
+ csrw CSR_SCOUNTEREN, t0
#endif /* CONFIG_RISCV_M_MODE */
/* Load the global pointer */
@@ -300,6 +315,9 @@ SYM_CODE_START(_start_kernel)
#else
mv a0, a1
#endif /* CONFIG_BUILTIN_DTB */
+ /* Set trap vector to spin forever to help debug */
+ la a3, .Lsecondary_park
+ csrw CSR_TVEC, a3
call setup_vm
#ifdef CONFIG_MMU
la a0, early_pg_dir
diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
index e6694759dbd0..b4c1a6a3fbd2 100644
--- a/arch/riscv/kernel/jump_label.c
+++ b/arch/riscv/kernel/jump_label.c
@@ -9,13 +9,14 @@
#include <linux/memory.h>
#include <linux/mutex.h>
#include <asm/bug.h>
-#include <asm/patch.h>
+#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
+#include <asm/insn-def.h>
-#define RISCV_INSN_NOP 0x00000013U
#define RISCV_INSN_JAL 0x0000006fU
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)jump_entry_code(entry);
u32 insn;
@@ -24,7 +25,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
long offset = jump_entry_target(entry) - jump_entry_code(entry);
if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288))
- return;
+ return true;
insn = RISCV_INSN_JAL |
(((u32)offset & GENMASK(19, 12)) << (12 - 12)) |
@@ -32,10 +33,23 @@ void arch_jump_label_transform(struct jump_entry *entry,
(((u32)offset & GENMASK(10, 1)) << (21 - 1)) |
(((u32)offset & GENMASK(20, 20)) << (31 - 20));
} else {
- insn = RISCV_INSN_NOP;
+ insn = RISCV_INSN_NOP4;
}
- mutex_lock(&text_mutex);
- patch_text_nosync(addr, &insn, sizeof(insn));
- mutex_unlock(&text_mutex);
+ if (early_boot_irqs_disabled) {
+ riscv_patch_in_stop_machine = 1;
+ patch_insn_write(addr, &insn, sizeof(insn));
+ riscv_patch_in_stop_machine = 0;
+ } else {
+ mutex_lock(&text_mutex);
+ patch_insn_write(addr, &insn, sizeof(insn));
+ mutex_unlock(&text_mutex);
+ }
+
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ flush_icache_all();
}
diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c
new file mode 100644
index 000000000000..0ac8348876c4
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_fpu.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#include <linux/export.h>
+#include <linux/preempt.h>
+
+#include <asm/csr.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ fstate_save(current, task_pt_regs(current));
+ csr_set(CSR_SSTATUS, SR_FS);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ csr_clear(CSR_SSTATUS, SR_FS);
+ fstate_restore(current, task_pt_regs(current));
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index 6afe80c7f03a..99972a48e86b 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested)
/* Transfer the ownership of V from user to kernel, then save */
riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
- if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+ if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) {
uvstate = &current->thread.vstate;
__riscv_v_vstate_save(uvstate, uvstate->datap);
}
@@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
return;
depth = riscv_v_ctx_get_depth();
- if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+ if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY))
riscv_preempt_v_set_dirty();
riscv_v_ctx_depth_inc();
@@ -208,7 +208,7 @@ void kernel_vector_begin(void)
{
bool nested = false;
- if (WARN_ON(!has_vector()))
+ if (WARN_ON(!(has_vector() || has_xtheadvector())))
return;
BUG_ON(!may_use_simd());
@@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin);
*/
void kernel_vector_end(void)
{
- if (WARN_ON(!has_vector()))
+ if (WARN_ON(!(has_vector() || has_xtheadvector())))
return;
riscv_v_disable();
diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c
new file mode 100644
index 000000000000..f4755d49b89e
--- /dev/null
+++ b/arch/riscv/kernel/kexec_elf.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2021 Huawei Technologies Co, Ltd.
+ *
+ * Author: Liao Chang (liaochang1@huawei.com)
+ *
+ * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
+ * for kernel.
+ */
+
+#define pr_fmt(fmt) "kexec_image: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <asm/setup.h>
+
+static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+ struct kexec_elf_info *elf_info, unsigned long old_pbase,
+ unsigned long new_pbase)
+{
+ int i;
+ int ret = 0;
+ size_t size;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+
+ kbuf.image = image;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Go through the available phsyical memory regions and find one that hold
+ * an image of the specified size.
+ */
+static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
+ struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
+ unsigned long *old_pbase, unsigned long *new_pbase)
+{
+ int i;
+ int ret;
+ struct kexec_buf kbuf;
+ const struct elf_phdr *phdr;
+ unsigned long lowest_paddr = ULONG_MAX;
+ unsigned long lowest_vaddr = ULONG_MAX;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (lowest_paddr > phdr->p_paddr)
+ lowest_paddr = phdr->p_paddr;
+
+ if (lowest_vaddr > phdr->p_vaddr)
+ lowest_vaddr = phdr->p_vaddr;
+ }
+
+ kbuf.image = image;
+ kbuf.buf_min = lowest_paddr;
+ kbuf.buf_max = ULONG_MAX;
+
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32
+ *
+ */
+ kbuf.buf_align = PMD_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
+ kbuf.top_down = false;
+ ret = arch_kexec_locate_mem_hole(&kbuf);
+ if (!ret) {
+ *old_pbase = lowest_paddr;
+ *new_pbase = kbuf.mem;
+ image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
+ }
+ return ret;
+}
+
+static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long old_kernel_pbase = ULONG_MAX;
+ unsigned long new_kernel_pbase = 0UL;
+ struct elfhdr ehdr;
+ struct kexec_elf_info elf_info;
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
+ &old_kernel_pbase, &new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ /* Add the kernel binary to the image */
+ ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
+ old_kernel_pbase, new_kernel_pbase);
+ if (ret)
+ goto out;
+
+ ret = load_extra_segments(image, image->start, kernel_len,
+ initrd, initrd_len, cmdline, cmdline_len);
+out:
+ kexec_free_elf_info(&elf_info);
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops elf_kexec_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf_kexec_load,
+};
diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c
new file mode 100644
index 000000000000..26a81774a78a
--- /dev/null
+++ b/arch/riscv/kernel/kexec_image.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V Kexec image loader
+ *
+ */
+
+#define pr_fmt(fmt) "kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/pe.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <asm/image.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+ const struct riscv_image_header *h = (const struct riscv_image_header *)kernel_buf;
+
+ if (!h || kernel_len < sizeof(*h))
+ return -EINVAL;
+
+ /* According to Documentation/riscv/boot-image-header.rst,
+ * use "magic2" field to check when version >= 0.2.
+ */
+
+ if (h->version >= RISCV_HEADER_VERSION &&
+ memcmp(&h->magic2, RISCV_IMAGE_MAGIC2, sizeof(h->magic2)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void *image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct riscv_image_header *h;
+ u64 flags;
+ bool be_image, be_kernel;
+ struct kexec_buf kbuf;
+ int ret;
+
+ /* Check Image header */
+ h = (struct riscv_image_header *)kernel;
+ if (!h->image_size) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check endianness */
+ flags = le64_to_cpu(h->flags);
+ be_image = riscv_image_flag_field(flags, RISCV_IMAGE_FLAG_BE);
+ be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ if (be_image != be_kernel) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Load the kernel image */
+ kbuf.image = image;
+ kbuf.buf_min = 0;
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = false;
+
+ kbuf.buffer = kernel;
+ kbuf.bufsz = kernel_len;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = le64_to_cpu(h->image_size);
+ kbuf.buf_align = le64_to_cpu(h->text_offset);
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add kernel image ret=%d\n", ret);
+ goto out;
+ }
+
+ image->start = kbuf.mem;
+
+ pr_info("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
+
+ ret = load_extra_segments(image, kbuf.mem, kbuf.memsz,
+ initrd, initrd_len, cmdline, cmdline_len);
+
+out:
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops image_kexec_ops = {
+ .probe = image_probe,
+ .load = image_load,
+};
diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c
index 2e0266ae6bd7..9f3db3503dab 100644
--- a/arch/riscv/kernel/kgdb.c
+++ b/arch/riscv/kernel/kgdb.c
@@ -254,6 +254,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
regs->epc = pc;
}
+noinline void arch_kgdb_breakpoint(void)
+{
+ asm(".global kgdb_compiled_break\n"
+ "kgdb_compiled_break: ebreak\n");
+}
+
void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer,
char *remcom_out_buffer)
{
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ed9cad20c039..2306ce3e5f22 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -114,37 +114,6 @@ void machine_shutdown(void)
#endif
}
-static void machine_kexec_mask_interrupts(void)
-{
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
- int ret;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
/*
* machine_crash_shutdown - Prepare to kexec after a kernel crash
*
diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c
index b0bf8c1722c0..e36104af2e24 100644
--- a/arch/riscv/kernel/machine_kexec_file.c
+++ b/arch/riscv/kernel/machine_kexec_file.c
@@ -7,8 +7,369 @@
* Author: Liao Chang (liaochang1@huawei.com)
*/
#include <linux/kexec.h>
+#include <linux/elf.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
const struct kexec_file_ops * const kexec_file_loaders[] = {
&elf_kexec_ops,
+ &image_kexec_ops,
NULL
};
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ kvfree(image->arch.fdt);
+ image->arch.fdt = NULL;
+
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
+{
+ unsigned int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
+{
+ struct crash_mem *cmem = arg;
+
+ cmem->ranges[cmem->nr_ranges].start = res->start;
+ cmem->ranges[cmem->nr_ranges].end = res->end;
+ cmem->nr_ranges++;
+
+ return 0;
+}
+
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+
+ nr_ranges = 1; /* For exclusion of crashkernel region */
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
+ if (ret)
+ goto out;
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (!ret)
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+out:
+ kfree(cmem);
+ return ret;
+}
+
+static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
+
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ /* Ensure it's nul terminated */
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
+}
+#endif
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RISCV_IMM_BITS 12
+#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
+#define RISCV_CONST_HIGH_PART(x) \
+ (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
+#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
+
+#define ENCODE_ITYPE_IMM(x) \
+ (RV_X(x, 0, 12) << 20)
+#define ENCODE_BTYPE_IMM(x) \
+ ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
+ (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
+#define ENCODE_UTYPE_IMM(x) \
+ (RV_X(x, 12, 20) << 12)
+#define ENCODE_JTYPE_IMM(x) \
+ ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
+ (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
+#define ENCODE_CBTYPE_IMM(x) \
+ ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
+#define ENCODE_CJTYPE_IMM(x) \
+ ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
+ (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
+ (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
+#define ENCODE_UJTYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
+ (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
+#define ENCODE_UITYPE_IMM(x) \
+ (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
+
+#define CLEAN_IMM(type, x) \
+ ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
+ Elf64_Rela *relas;
+ int i, r_type;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ unsigned long sec_base; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= pi->ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
+
+ val = sym->st_value;
+ val += sec_base;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+
+ switch (r_type) {
+ case R_RISCV_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
+ ENCODE_BTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_JAL:
+ *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
+ ENCODE_JTYPE_IMM(val - addr);
+ break;
+ /*
+ * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
+ * sym is expected to be next to R_RISCV_PCREL_HI20
+ * in purgatory relsec. Handle it like R_RISCV_CALL
+ * sym, instead of searching the whole relsec.
+ */
+ case R_RISCV_PCREL_HI20:
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_CALL:
+ *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
+ ENCODE_UJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_BRANCH:
+ *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
+ ENCODE_CBTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_RVC_JUMP:
+ *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
+ ENCODE_CJTYPE_IMM(val - addr);
+ break;
+ case R_RISCV_ADD16:
+ *(u16 *)loc += val;
+ break;
+ case R_RISCV_SUB16:
+ *(u16 *)loc -= val;
+ break;
+ case R_RISCV_ADD32:
+ *(u32 *)loc += val;
+ break;
+ case R_RISCV_SUB32:
+ *(u32 *)loc -= val;
+ break;
+ /* It has been applied by R_RISCV_PCREL_HI20 sym */
+ case R_RISCV_PCREL_LO12_I:
+ case R_RISCV_ALIGN:
+ case R_RISCV_RELAX:
+ break;
+ case R_RISCV_64:
+ *(u64 *)loc = val;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+
+int load_extra_segments(struct kimage *image, unsigned long kernel_start,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ void *fdt;
+ unsigned long initrd_pbase = 0UL;
+ struct kexec_buf kbuf;
+ char *modified_cmdline = NULL;
+
+ kbuf.image = image;
+ kbuf.buf_min = kernel_start + kernel_len;
+ kbuf.buf_max = ULONG_MAX;
+
+#ifdef CONFIG_CRASH_DUMP
+ /* Add elfcorehdr */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ void *headers;
+ unsigned long headers_sz;
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
+#endif
+
+#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
+ /* Add purgatory to the image */
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_load_purgatory(image, &kbuf);
+ if (ret) {
+ pr_err("Error loading purgatory ret=%d\n", ret);
+ goto out;
+ }
+ kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
+
+ ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
+ &kernel_start,
+ sizeof(kernel_start), 0);
+ if (ret)
+ pr_err("Error update purgatory ret=%d\n", ret);
+#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
+
+ /* Add the initrd to the image */
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_pbase = kbuf.mem;
+ kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
+ }
+
+ /* Add the DTB to the image */
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
+ initrd_len, cmdline, 0);
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdt_pack(fdt);
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ pr_err("Error add DTB kbuf ret=%d\n", ret);
+ goto out_free_fdt;
+ }
+ /* Cache the fdt buffer address for memory cleanup */
+ image->arch.fdt = fdt;
+ kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kfree(modified_cmdline);
+ return ret;
+}
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index b7561288e8da..48f6c4f7dca0 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -13,7 +13,6 @@
.text
-#define FENTRY_RA_OFFSET 8
#define ABI_SIZE_ON_STACK 80
#define ABI_A0 0
#define ABI_A1 8
@@ -56,204 +55,153 @@
addi sp, sp, ABI_SIZE_ON_STACK
.endm
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-
/**
-* SAVE_ABI_REGS - save regs against the pt_regs struct
-*
-* @all: tell if saving all the regs
-*
-* If all is set, all the regs will be saved, otherwise only ABI
-* related regs (a0-a7,epc,ra and optional s0) will be saved.
+* SAVE_ABI_REGS - save regs against the ftrace_regs struct
*
* After the stack is established,
*
* 0(sp) stores the PC of the traced function which can be accessed
-* by &(fregs)->regs->epc in tracing function. Note that the real
-* function entry address should be computed with -FENTRY_RA_OFFSET.
+* by &(fregs)->epc in tracing function.
*
* 8(sp) stores the function return address (i.e. parent IP) that
-* can be accessed by &(fregs)->regs->ra in tracing function.
+* can be accessed by &(fregs)->ra in tracing function.
*
* The other regs are saved at the respective localtion and accessed
-* by the respective pt_regs member.
+* by the respective ftrace_regs member.
*
* Here is the layout of stack for your reference.
*
* PT_SIZE_ON_STACK -> +++++++++
* + ..... +
-* + t3-t6 +
-* + s2-s11+
* + a0-a7 + --++++-> ftrace_caller saved
-* + s1 + +
-* + s0 + --+
-* + t0-t2 + +
-* + tp + +
-* + gp + +
+* + t1 + --++++-> direct tramp address
+* + s0 + --+ // frame pointer
* + sp + +
* + ra + --+ // parent IP
* sp -> + epc + --+ // PC
* +++++++++
**/
- .macro SAVE_ABI_REGS, all=0
- addi sp, sp, -PT_SIZE_ON_STACK
-
- REG_S t0, PT_EPC(sp)
- REG_S x1, PT_RA(sp)
-
- // save the ABI regs
-
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
-
- // save the leftover regs
-
- .if \all == 1
- REG_S x2, PT_SP(sp)
- REG_S x3, PT_GP(sp)
- REG_S x4, PT_TP(sp)
- REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x8, PT_S0(sp)
- REG_S x9, PT_S1(sp)
- REG_S x18, PT_S2(sp)
- REG_S x19, PT_S3(sp)
- REG_S x20, PT_S4(sp)
- REG_S x21, PT_S5(sp)
- REG_S x22, PT_S6(sp)
- REG_S x23, PT_S7(sp)
- REG_S x24, PT_S8(sp)
- REG_S x25, PT_S9(sp)
- REG_S x26, PT_S10(sp)
- REG_S x27, PT_S11(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
-
- // save s0 if FP_TEST defined
-
- .else
+ .macro SAVE_ABI_REGS
+ addi sp, sp, -FREGS_SIZE_ON_STACK
+ REG_S t0, FREGS_EPC(sp)
+ REG_S x1, FREGS_RA(sp)
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- REG_S x8, PT_S0(sp)
+ REG_S x8, FREGS_S0(sp)
#endif
- .endif
+ REG_S x6, FREGS_T1(sp)
+#ifdef CONFIG_CC_IS_CLANG
+ REG_S x7, FREGS_T2(sp)
+ REG_S x28, FREGS_T3(sp)
+ REG_S x29, FREGS_T4(sp)
+ REG_S x30, FREGS_T5(sp)
+ REG_S x31, FREGS_T6(sp)
+#endif
+ // save the arguments
+ REG_S x10, FREGS_A0(sp)
+ REG_S x11, FREGS_A1(sp)
+ REG_S x12, FREGS_A2(sp)
+ REG_S x13, FREGS_A3(sp)
+ REG_S x14, FREGS_A4(sp)
+ REG_S x15, FREGS_A5(sp)
+ REG_S x16, FREGS_A6(sp)
+ REG_S x17, FREGS_A7(sp)
+ mv a0, sp
+ addi a0, a0, FREGS_SIZE_ON_STACK
+ REG_S a0, FREGS_SP(sp) // Put original SP on stack
.endm
- .macro RESTORE_ABI_REGS, all=0
- REG_L t0, PT_EPC(sp)
- REG_L x1, PT_RA(sp)
- REG_L x10, PT_A0(sp)
- REG_L x11, PT_A1(sp)
- REG_L x12, PT_A2(sp)
- REG_L x13, PT_A3(sp)
- REG_L x14, PT_A4(sp)
- REG_L x15, PT_A5(sp)
- REG_L x16, PT_A6(sp)
- REG_L x17, PT_A7(sp)
-
- .if \all == 1
- REG_L x2, PT_SP(sp)
- REG_L x3, PT_GP(sp)
- REG_L x4, PT_TP(sp)
- REG_L x6, PT_T1(sp)
- REG_L x7, PT_T2(sp)
- REG_L x8, PT_S0(sp)
- REG_L x9, PT_S1(sp)
- REG_L x18, PT_S2(sp)
- REG_L x19, PT_S3(sp)
- REG_L x20, PT_S4(sp)
- REG_L x21, PT_S5(sp)
- REG_L x22, PT_S6(sp)
- REG_L x23, PT_S7(sp)
- REG_L x24, PT_S8(sp)
- REG_L x25, PT_S9(sp)
- REG_L x26, PT_S10(sp)
- REG_L x27, PT_S11(sp)
- REG_L x28, PT_T3(sp)
- REG_L x29, PT_T4(sp)
- REG_L x30, PT_T5(sp)
- REG_L x31, PT_T6(sp)
-
- .else
+ .macro RESTORE_ABI_REGS
+ REG_L t0, FREGS_EPC(sp)
+ REG_L x1, FREGS_RA(sp)
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- REG_L x8, PT_S0(sp)
+ REG_L x8, FREGS_S0(sp)
#endif
- .endif
- addi sp, sp, PT_SIZE_ON_STACK
+ REG_L x6, FREGS_T1(sp)
+#ifdef CONFIG_CC_IS_CLANG
+ REG_L x7, FREGS_T2(sp)
+ REG_L x28, FREGS_T3(sp)
+ REG_L x29, FREGS_T4(sp)
+ REG_L x30, FREGS_T5(sp)
+ REG_L x31, FREGS_T6(sp)
+#endif
+ // restore the arguments
+ REG_L x10, FREGS_A0(sp)
+ REG_L x11, FREGS_A1(sp)
+ REG_L x12, FREGS_A2(sp)
+ REG_L x13, FREGS_A3(sp)
+ REG_L x14, FREGS_A4(sp)
+ REG_L x15, FREGS_A5(sp)
+ REG_L x16, FREGS_A6(sp)
+ REG_L x17, FREGS_A7(sp)
+
+ addi sp, sp, FREGS_SIZE_ON_STACK
.endm
.macro PREPARE_ARGS
- addi a0, t0, -FENTRY_RA_OFFSET
+ addi a0, t0, -MCOUNT_JALR_SIZE // ip (callsite's jalr insn)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ mv a1, ra // parent_ip
+ REG_L a2, -16(t0) // op
+ REG_L ra, FTRACE_OPS_FUNC(a2) // op->func
+#else
la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+ REG_L a2, 0(a1) // op
+ mv a1, ra // parent_ip
+#endif
+ mv a3, sp // regs
.endm
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
-
-#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
SYM_FUNC_START(ftrace_caller)
- SAVE_ABI
-
- addi a0, t0, -FENTRY_RA_OFFSET
- la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
-
-SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * When CALL_OPS is enabled (2 or 4) nops [8B] are placed before the
+ * function entry, these are later overwritten with the pointer to the
+ * associated struct ftrace_ops.
+ *
+ * -8: &ftrace_ops of the associated tracer function.
+ *<ftrace enable>:
+ * 0: auipc t0/ra, 0x?
+ * 4: jalr t0/ra, ?(t0/ra)
+ *
+ * -8: &ftrace_nop_ops
+ *<ftrace disable>:
+ * 0: nop
+ * 4: nop
+ *
+ * t0 is set to ip+8 after the jalr is executed at the callsite,
+ * so we find the associated op at t0-16.
+ */
+ REG_L t1, -16(t0) // op Should be SZ_REG instead of 16
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi a0, sp, ABI_RA
- REG_L a1, ABI_T0(sp)
- addi a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv a2, s0
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /*
+ * If the op has a direct call, handle it immediately without
+ * saving/restoring registers.
+ */
+ REG_L t1, FTRACE_OPS_DIRECT_CALL(t1)
+ bnez t1, ftrace_caller_direct
#endif
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
- call ftrace_stub
#endif
- RESTORE_ABI
- jr t0
-SYM_FUNC_END(ftrace_caller)
-
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
-SYM_FUNC_START(ftrace_regs_caller)
- mv t1, zero
- SAVE_ABI_REGS 1
- PREPARE_ARGS
-
-SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
- call ftrace_stub
-
- RESTORE_ABI_REGS 1
- bnez t1, .Ldirect
- jr t0
-.Ldirect:
- jr t1
-SYM_FUNC_END(ftrace_regs_caller)
-
-SYM_FUNC_START(ftrace_caller)
- SAVE_ABI_REGS 0
+ SAVE_ABI_REGS
PREPARE_ARGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ jalr ra
+#else
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
- call ftrace_stub
-
- RESTORE_ABI_REGS 0
+ REG_L ra, ftrace_call_dest
+ jalr ra, 0(ra)
+#endif
+ RESTORE_ABI_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ bnez t1, ftrace_caller_direct
+#endif
jr t0
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL)
+ jr t1
+#endif
SYM_FUNC_END(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
SYM_CODE_START(ftrace_stub_direct_tramp)
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 3a42f6287909..da4a4000e57e 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -26,12 +26,12 @@
* register if a0 was not saved.
*/
.macro SAVE_RET_ABI_STATE
- addi sp, sp, -4*SZREG
- REG_S s0, 2*SZREG(sp)
- REG_S ra, 3*SZREG(sp)
- REG_S a0, 1*SZREG(sp)
- REG_S a1, 0*SZREG(sp)
- addi s0, sp, 4*SZREG
+ addi sp, sp, -FREGS_SIZE_ON_STACK
+ REG_S ra, FREGS_RA(sp)
+ REG_S s0, FREGS_S0(sp)
+ REG_S a0, FREGS_A0(sp)
+ REG_S a1, FREGS_A1(sp)
+ addi s0, sp, FREGS_SIZE_ON_STACK
.endm
.macro RESTORE_ABI_STATE
@@ -41,11 +41,11 @@
.endm
.macro RESTORE_RET_ABI_STATE
- REG_L ra, 3*SZREG(sp)
- REG_L s0, 2*SZREG(sp)
- REG_L a0, 1*SZREG(sp)
- REG_L a1, 0*SZREG(sp)
- addi sp, sp, 4*SZREG
+ REG_L ra, FREGS_RA(sp)
+ REG_L s0, FREGS_S0(sp)
+ REG_L a0, FREGS_A0(sp)
+ REG_L a1, FREGS_A1(sp)
+ addi sp, sp, FREGS_SIZE_ON_STACK
.endm
SYM_TYPED_FUNC_START(ftrace_stub)
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index e264e59e596e..75551ac6504c 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
+#include <linux/sort.h>
unsigned long module_emit_got_entry(struct module *mod, unsigned long val)
{
@@ -55,43 +56,70 @@ unsigned long module_emit_plt_entry(struct module *mod, unsigned long val)
return (unsigned long)&plt[i];
}
-static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y)
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
{
- return x->r_info == y->r_info && x->r_addend == y->r_addend;
+ const Elf_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(x->r_info, y->r_info);
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
}
static bool duplicate_rela(const Elf_Rela *rela, int idx)
{
- int i;
- for (i = 0; i < idx; i++) {
- if (is_rela_equal(&rela[i], &rela[idx]))
- return true;
- }
- return false;
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding slot.
+ */
+ return idx > 0 && cmp_rela(rela + idx, rela + idx - 1) == 0;
}
-static void count_max_entries(Elf_Rela *relas, int num,
+static void count_max_entries(const Elf_Rela *relas, size_t num,
unsigned int *plts, unsigned int *gots)
{
- unsigned int type, i;
-
- for (i = 0; i < num; i++) {
- type = ELF_RISCV_R_TYPE(relas[i].r_info);
- if (type == R_RISCV_CALL_PLT) {
- if (!duplicate_rela(relas, i))
- (*plts)++;
- } else if (type == R_RISCV_GOT_HI20) {
- if (!duplicate_rela(relas, i))
- (*gots)++;
+ for (size_t i = 0; i < num; i++) {
+ if (duplicate_rela(relas, i))
+ continue;
+
+ switch (ELF_R_TYPE(relas[i].r_info)) {
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_PLT32:
+ (*plts)++;
+ break;
+ case R_RISCV_GOT_HI20:
+ (*gots)++;
+ break;
+ default:
+ unreachable();
}
}
}
+static bool rela_needs_plt_got_entry(const Elf_Rela *rela)
+{
+ switch (ELF_R_TYPE(rela->r_info)) {
+ case R_RISCV_CALL_PLT:
+ case R_RISCV_GOT_HI20:
+ case R_RISCV_PLT32:
+ return true;
+ default:
+ return false;
+ }
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
+ size_t num_scratch_relas = 0;
unsigned int num_plts = 0;
unsigned int num_gots = 0;
+ Elf_Rela *scratch = NULL;
+ size_t scratch_size = 0;
int i;
/*
@@ -121,9 +149,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
/* Calculate the maxinum number of entries */
for (i = 0; i < ehdr->e_shnum; i++) {
+ size_t num_relas = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset;
- int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela);
Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info;
+ size_t scratch_size_needed;
if (sechdrs[i].sh_type != SHT_RELA)
continue;
@@ -132,7 +161,28 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dst_sec->sh_flags & SHF_EXECINSTR))
continue;
- count_max_entries(relas, num_rela, &num_plts, &num_gots);
+ /*
+ * apply_relocate_add() relies on HI20 and LO12 relocation pairs being
+ * close together, so sort a copy of the section to avoid interfering.
+ */
+ scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch);
+ if (scratch_size_needed > scratch_size) {
+ scratch_size = scratch_size_needed;
+ scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+ }
+
+ for (size_t j = 0; j < num_relas; j++)
+ if (rela_needs_plt_got_entry(&relas[j]))
+ scratch[num_scratch_relas++] = relas[j];
+ }
+
+ if (scratch) {
+ /* sort the accumulated PLT/GOT relocations so duplicates are adjacent */
+ sort(scratch, num_scratch_relas, sizeof(*scratch), cmp_rela, NULL);
+ count_max_entries(scratch, num_scratch_relas, &num_plts, &num_gots);
+ kvfree(scratch);
}
mod->arch.plt.shdr->sh_type = SHT_NOBITS;
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 5e5a82644451..7f6147c18033 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/moduleloader.h>
-#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/pgtable.h>
#include <asm/alternative.h>
@@ -24,7 +23,7 @@ struct used_bucket {
struct relocation_head {
struct hlist_node node;
- struct list_head *rel_entry;
+ struct list_head rel_entry;
void *location;
};
@@ -635,7 +634,7 @@ process_accumulated_relocations(struct module *me,
location = rel_head_iter->location;
list_for_each_entry_safe(rel_entry_iter,
rel_entry_iter_tmp,
- rel_head_iter->rel_entry,
+ &rel_head_iter->rel_entry,
head) {
curr_type = rel_entry_iter->type;
reloc_handlers[curr_type].reloc_handler(
@@ -649,7 +648,7 @@ process_accumulated_relocations(struct module *me,
kfree(bucket_iter);
}
- kfree(*relocation_hashtable);
+ kvfree(*relocation_hashtable);
}
static int add_relocation_to_accumulate(struct module *me, int type,
@@ -705,16 +704,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
return -ENOMEM;
}
- rel_head->rel_entry =
- kmalloc(sizeof(struct list_head), GFP_KERNEL);
-
- if (!rel_head->rel_entry) {
- kfree(entry);
- kfree(rel_head);
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD(rel_head->rel_entry);
+ INIT_LIST_HEAD(&rel_head->rel_entry);
rel_head->location = location;
INIT_HLIST_NODE(&rel_head->node);
if (!current_head->first) {
@@ -723,7 +713,6 @@ static int add_relocation_to_accumulate(struct module *me, int type,
if (!bucket) {
kfree(entry);
- kfree(rel_head->rel_entry);
kfree(rel_head);
return -ENOMEM;
}
@@ -736,7 +725,7 @@ static int add_relocation_to_accumulate(struct module *me, int type,
}
/* Add relocation to head of discovered rel_head */
- list_add_tail(&entry->head, rel_head->rel_entry);
+ list_add_tail(&entry->head, &rel_head->rel_entry);
return 0;
}
@@ -763,9 +752,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
hashtable_size <<= should_double_size;
- *relocation_hashtable = kmalloc_array(hashtable_size,
- sizeof(**relocation_hashtable),
- GFP_KERNEL);
+ /* Number of relocations may be large, so kvmalloc it */
+ *relocation_hashtable = kvmalloc_array(hashtable_size,
+ sizeof(**relocation_hashtable),
+ GFP_KERNEL);
if (!*relocation_hashtable)
return 0;
@@ -788,8 +778,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
int res;
unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
struct hlist_head *relocation_hashtable;
- struct list_head used_buckets_list;
unsigned int hashtable_bits;
+ LIST_HEAD(used_buckets_list);
hashtable_bits = initialize_relocation_hashtable(num_relocations,
&relocation_hashtable);
@@ -797,8 +787,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
if (!relocation_hashtable)
return -ENOMEM;
- INIT_LIST_HEAD(&used_buckets_list);
-
pr_debug("Applying relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
@@ -872,7 +860,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}
j++;
- if (j > sechdrs[relsec].sh_size / sizeof(*rel))
+ if (j == num_relocations)
j = 0;
} while (j_idx != j);
@@ -905,17 +893,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-void *module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, MODULES_VADDR,
- MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
index 0d6225fd3194..fa6b0339a65d 100644
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
lo, hi, flags, 0, 0, 0);
if (ret.error) {
- if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
+ if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE)
pr_warn("Failed to disable steal-time shmem");
else
pr_warn("Failed to set steal-time shmem");
@@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu)
static int pv_time_cpu_down_prepare(unsigned int cpu)
{
- return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
- SBI_STA_SHMEM_DISABLE, 0);
+ return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0);
}
static u64 pv_time_steal_clock(int cpu)
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a0..db13c9ddf9e3 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -13,13 +13,13 @@
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/ftrace.h>
-#include <asm/patch.h>
+#include <asm/text-patching.h>
#include <asm/sections.h>
struct patch_insn {
void *addr;
u32 *insns;
- int ninsns;
+ size_t len;
atomic_t cpu_count;
};
@@ -54,7 +54,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap)
BUG_ON(!page);
return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
+ offset_in_page(addr));
}
static void patch_unmap(int fixmap)
@@ -65,8 +65,8 @@ NOKPROBE_SYMBOL(patch_unmap);
static int __patch_insn_set(void *addr, u8 c, size_t len)
{
+ bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE;
void *waddr = addr;
- bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE;
/*
* Only two pages can be mapped at a time for writing.
@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
*/
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -87,19 +89,29 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
memset(waddr, c, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
static int __patch_insn_write(void *addr, const void *insn, size_t len)
{
+ bool across_pages = (offset_in_page(addr) + len) > PAGE_SIZE;
void *waddr = addr;
- bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
int ret;
/*
@@ -122,6 +134,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -129,11 +143,21 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
ret = copy_to_kernel_nofault(waddr, insn, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
@@ -155,69 +179,70 @@ NOKPROBE_SYMBOL(__patch_insn_write);
static int patch_insn_set(void *addr, u8 c, size_t len)
{
- size_t patched = 0;
size_t size;
- int ret = 0;
+ int ret;
/*
* __patch_insn_set() can only work on 2 pages at a time so call it in a
* loop with len <= 2 * PAGE_SIZE.
*/
- while (patched < len && !ret) {
- size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched);
- ret = __patch_insn_set(addr + patched, c, size);
-
- patched += size;
+ while (len) {
+ size = min(len, PAGE_SIZE * 2 - offset_in_page(addr));
+ ret = __patch_insn_set(addr, c, size);
+ if (ret)
+ return ret;
+
+ addr += size;
+ len -= size;
}
- return ret;
+ return 0;
}
NOKPROBE_SYMBOL(patch_insn_set);
int patch_text_set_nosync(void *addr, u8 c, size_t len)
{
- u32 *tp = addr;
int ret;
- ret = patch_insn_set(tp, c, len);
-
+ ret = patch_insn_set(addr, c, len);
if (!ret)
- flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
return ret;
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
-static int patch_insn_write(void *addr, const void *insn, size_t len)
+int patch_insn_write(void *addr, const void *insn, size_t len)
{
- size_t patched = 0;
size_t size;
- int ret = 0;
+ int ret;
/*
* Copy the instructions to the destination address, two pages at a time
* because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE.
*/
- while (patched < len && !ret) {
- size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched);
- ret = __patch_insn_write(addr + patched, insn + patched, size);
-
- patched += size;
+ while (len) {
+ size = min(len, PAGE_SIZE * 2 - offset_in_page(addr));
+ ret = __patch_insn_write(addr, insn, size);
+ if (ret)
+ return ret;
+
+ addr += size;
+ insn += size;
+ len -= size;
}
- return ret;
+ return 0;
}
NOKPROBE_SYMBOL(patch_insn_write);
int patch_text_nosync(void *addr, const void *insns, size_t len)
{
- u32 *tp = addr;
int ret;
- ret = patch_insn_write(tp, insns, len);
-
+ ret = patch_insn_write(addr, insns, len);
if (!ret)
- flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
return ret;
}
@@ -226,33 +251,36 @@ NOKPROBE_SYMBOL(patch_text_nosync);
static int patch_text_cb(void *data)
{
struct patch_insn *patch = data;
- unsigned long len;
- int i, ret = 0;
+ int ret = 0;
if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
- for (i = 0; ret == 0 && i < patch->ninsns; i++) {
- len = GET_INSN_LENGTH(patch->insns[i]);
- ret = patch_text_nosync(patch->addr + i * len,
- &patch->insns[i], len);
- }
- atomic_inc(&patch->cpu_count);
+ ret = patch_insn_write(patch->addr, patch->insns, patch->len);
+ /*
+ * Make sure the patching store is effective *before* we
+ * increment the counter which releases all waiting CPUs
+ * by using the release variant of atomic increment. The
+ * release pairs with the call to local_flush_icache_all()
+ * on the waiting CPU.
+ */
+ atomic_inc_return_release(&patch->cpu_count);
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
- smp_mb();
+
+ local_flush_icache_all();
}
return ret;
}
NOKPROBE_SYMBOL(patch_text_cb);
-int patch_text(void *addr, u32 *insns, int ninsns)
+int patch_text(void *addr, u32 *insns, size_t len)
{
int ret;
struct patch_insn patch = {
.addr = addr,
.insns = insns,
- .ninsns = ninsns,
+ .len = len,
.cpu_count = ATOMIC_INIT(0),
};
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index 3348a61de7d9..b465bc9eb870 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -6,37 +6,9 @@
#include <asm/stacktrace.h>
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
- unsigned long fp, unsigned long reg_ra)
+static bool fill_callchain(void *entry, unsigned long pc)
{
- struct stackframe buftail;
- unsigned long ra = 0;
- unsigned long __user *user_frame_tail =
- (unsigned long __user *)(fp - sizeof(struct stackframe));
-
- /* Check accessibility of one struct frame_tail beyond */
- if (!access_ok(user_frame_tail, sizeof(buftail)))
- return 0;
- if (__copy_from_user_inatomic(&buftail, user_frame_tail,
- sizeof(buftail)))
- return 0;
-
- if (reg_ra != 0)
- ra = reg_ra;
- else
- ra = buftail.ra;
-
- fp = buftail.fp;
- if (ra != 0)
- perf_callchain_store(entry, ra);
- else
- return 0;
-
- return fp;
+ return perf_callchain_store(entry, pc) == 0;
}
/*
@@ -56,23 +28,21 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- unsigned long fp = 0;
+ if (perf_guest_state()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
- fp = regs->s0;
- perf_callchain_store(entry, regs->epc);
-
- fp = user_backtrace(entry, fp, regs->ra);
- while (fp && !(fp & 0x3) && entry->nr < entry->max_stack)
- fp = user_backtrace(entry, fp, 0);
-}
-
-static bool fill_callchain(void *entry, unsigned long pc)
-{
- return perf_callchain_store(entry, pc) == 0;
+ arch_stack_walk_user(fill_callchain, entry, regs);
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
+ if (perf_guest_state()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
walk_stackframe(NULL, regs, fill_callchain, entry);
}
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
index 07915dc9279e..81d69d45c06c 100644
--- a/arch/riscv/kernel/pi/Makefile
+++ b/arch/riscv/kernel/pi/Makefile
@@ -5,20 +5,23 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+ -include $(srctree)/include/linux/hidden.h \
-D__DISABLE_EXPORTS -ffreestanding \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# Disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
KBUILD_CFLAGS += -mcmodel=medany
CFLAGS_cmdline_early.o += -D__NO_FORTIFY
+CFLAGS_fdt_early.o += -D__NO_FORTIFY
+# lib/string.c already defines __NO_FORTIFY
+CFLAGS_ctype.o += -D__NO_FORTIFY
+CFLAGS_lib-fdt.o += -D__NO_FORTIFY
CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY
-
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+CFLAGS_archrandom_early.o += -D__NO_FORTIFY
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
--remove-section=.note.gnu.property \
@@ -35,5 +38,5 @@ $(obj)/string.o: $(srctree)/lib/string.c FORCE
$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o archrandom_early.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/riscv/kernel/pi/archrandom_early.c b/arch/riscv/kernel/pi/archrandom_early.c
new file mode 100644
index 000000000000..3f05d3cf3b7b
--- /dev/null
+++ b/arch/riscv/kernel/pi/archrandom_early.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/csr.h>
+#include <linux/processor.h>
+
+#include "pi.h"
+
+/*
+ * To avoid rewriting code include asm/archrandom.h and create macros
+ * for the functions that won't be included.
+ */
+#undef riscv_has_extension_unlikely
+#define riscv_has_extension_likely(...) false
+#undef pr_err_once
+#define pr_err_once(...)
+
+#include <asm/archrandom.h>
+
+u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa)
+{
+ unsigned long seed = 0;
+
+ if (!fdt_early_match_extension_isa((const void *)dtb_pa, "zkr"))
+ return 0;
+
+ if (!csr_seed_long(&seed))
+ return 0;
+
+ return seed;
+}
diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c
index f6d4dedffb84..fbcdc9e4e143 100644
--- a/arch/riscv/kernel/pi/cmdline_early.c
+++ b/arch/riscv/kernel/pi/cmdline_early.c
@@ -6,15 +6,9 @@
#include <asm/pgtable.h>
#include <asm/setup.h>
-static char early_cmdline[COMMAND_LINE_SIZE];
+#include "pi.h"
-/*
- * Declare the functions that are exported (but prefixed) here so that LLVM
- * does not complain it lacks the 'static' keyword (which, if added, makes
- * LLVM complain because the function is actually unused in this file).
- */
-u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa);
-bool set_nokaslr_from_cmdline(uintptr_t dtb_pa);
+static char early_cmdline[COMMAND_LINE_SIZE];
static char *get_early_cmdline(uintptr_t dtb_pa)
{
diff --git a/arch/riscv/kernel/pi/fdt_early.c b/arch/riscv/kernel/pi/fdt_early.c
index 899610e042ab..9bdee2fafe47 100644
--- a/arch/riscv/kernel/pi/fdt_early.c
+++ b/arch/riscv/kernel/pi/fdt_early.c
@@ -2,13 +2,9 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/libfdt.h>
+#include <linux/ctype.h>
-/*
- * Declare the functions that are exported (but prefixed) here so that LLVM
- * does not complain it lacks the 'static' keyword (which, if added, makes
- * LLVM complain because the function is actually unused in this file).
- */
-u64 get_kaslr_seed(uintptr_t dtb_pa);
+#include "pi.h"
u64 get_kaslr_seed(uintptr_t dtb_pa)
{
@@ -28,3 +24,162 @@ u64 get_kaslr_seed(uintptr_t dtb_pa)
*prop = 0;
return ret;
}
+
+/**
+ * fdt_device_is_available - check if a device is available for use
+ *
+ * @fdt: pointer to the device tree blob
+ * @node: offset of the node whose property to find
+ *
+ * Returns true if the status property is absent or set to "okay" or "ok",
+ * false otherwise
+ */
+static bool fdt_device_is_available(const void *fdt, int node)
+{
+ const char *status;
+ int statlen;
+
+ status = fdt_getprop(fdt, node, "status", &statlen);
+ if (!status)
+ return true;
+
+ if (statlen > 0) {
+ if (!strcmp(status, "okay") || !strcmp(status, "ok"))
+ return true;
+ }
+
+ return false;
+}
+
+/* Copy of fdt_nodename_eq_ */
+static int fdt_node_name_eq(const void *fdt, int offset,
+ const char *s)
+{
+ int olen;
+ int len = strlen(s);
+ const char *p = fdt_get_name(fdt, offset, &olen);
+
+ if (!p || olen < len)
+ /* short match */
+ return 0;
+
+ if (memcmp(p, s, len) != 0)
+ return 0;
+
+ if (p[len] == '\0')
+ return 1;
+ else if (!memchr(s, '@', len) && (p[len] == '@'))
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * isa_string_contains - check if isa string contains an extension
+ *
+ * @isa_str: isa string to search
+ * @ext_name: the extension to search for
+ *
+ * Returns true if the extension is in the given isa string,
+ * false otherwise
+ */
+static bool isa_string_contains(const char *isa_str, const char *ext_name)
+{
+ size_t i, single_end, len = strlen(ext_name);
+ char ext_end;
+
+ /* Error must contain rv32/64 */
+ if (strlen(isa_str) < 4)
+ return false;
+
+ if (len == 1) {
+ single_end = strcspn(isa_str, "sSxXzZ");
+ /* Search for single chars between rv32/64 and multi-letter extensions */
+ for (i = 4; i < single_end; i++) {
+ if (tolower(isa_str[i]) == ext_name[0])
+ return true;
+ }
+ return false;
+ }
+
+ /* Skip to start of multi-letter extensions */
+ isa_str = strpbrk(isa_str, "sSxXzZ");
+ while (isa_str) {
+ if (strncasecmp(isa_str, ext_name, len) == 0) {
+ ext_end = isa_str[len];
+ /* Check if matches the whole extension. */
+ if (ext_end == '\0' || ext_end == '_')
+ return true;
+ }
+ /* Multi-letter extensions must be split from other multi-letter
+ * extensions with an "_", the end of a multi-letter extension will
+ * either be the null character or the "_" at the start of the next
+ * multi-letter extension.
+ */
+ isa_str = strchr(isa_str, '_');
+ if (isa_str)
+ isa_str++;
+ }
+
+ return false;
+}
+
+/**
+ * early_cpu_isa_ext_available - check if cpu node has an extension
+ *
+ * @fdt: pointer to the device tree blob
+ * @node: offset of the cpu node
+ * @ext_name: the extension to search for
+ *
+ * Returns true if the cpu node has the extension,
+ * false otherwise
+ */
+static bool early_cpu_isa_ext_available(const void *fdt, int node, const char *ext_name)
+{
+ const void *prop;
+ int len;
+
+ prop = fdt_getprop(fdt, node, "riscv,isa-extensions", &len);
+ if (prop && fdt_stringlist_contains(prop, len, ext_name))
+ return true;
+
+ prop = fdt_getprop(fdt, node, "riscv,isa", &len);
+ if (prop && isa_string_contains(prop, ext_name))
+ return true;
+
+ return false;
+}
+
+/**
+ * fdt_early_match_extension_isa - check if all cpu nodes have an extension
+ *
+ * @fdt: pointer to the device tree blob
+ * @ext_name: the extension to search for
+ *
+ * Returns true if the all available the cpu nodes have the extension,
+ * false otherwise
+ */
+bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name)
+{
+ int node, parent;
+ bool ret = false;
+
+ parent = fdt_path_offset(fdt, "/cpus");
+ if (parent < 0)
+ return false;
+
+ fdt_for_each_subnode(node, fdt, parent) {
+ if (!fdt_node_name_eq(fdt, node, "cpu"))
+ continue;
+
+ if (!fdt_device_is_available(fdt, node))
+ continue;
+
+ if (!early_cpu_isa_ext_available(fdt, node, ext_name))
+ return false;
+
+ ret = true;
+ }
+
+ return ret;
+}
diff --git a/arch/riscv/kernel/pi/pi.h b/arch/riscv/kernel/pi/pi.h
new file mode 100644
index 000000000000..21141d84fea6
--- /dev/null
+++ b/arch/riscv/kernel/pi/pi.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RISCV_PI_H_
+#define _RISCV_PI_H_
+
+#include <linux/types.h>
+
+/*
+ * The following functions are exported (but prefixed). Declare them here so
+ * that LLVM does not complain it lacks the 'static' keyword (which, if
+ * added, makes LLVM complain because the function is unused).
+ */
+
+u64 get_kaslr_seed(uintptr_t dtb_pa);
+u64 get_kaslr_seed_zkr(const uintptr_t dtb_pa);
+bool set_nokaslr_from_cmdline(uintptr_t dtb_pa);
+u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+
+bool fdt_early_match_extension_isa(const void *fdt, const char *ext_name);
+
+#endif /* _RISCV_PI_H_ */
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
index 8265ff497977..d2129f2c61b8 100644
--- a/arch/riscv/kernel/probes/Makefile
+++ b/arch/riscv/kernel/probes/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o
obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
-obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o
CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
deleted file mode 100644
index 7142ec42e889..000000000000
--- a/arch/riscv/kernel/probes/ftrace.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/kprobes.h>
-
-/* Ftrace callback handler for kprobes -- called under preepmt disabled */
-void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
-{
- struct kprobe *p;
- struct pt_regs *regs;
- struct kprobe_ctlblk *kcb;
- int bit;
-
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0)
- return;
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto out;
-
- regs = ftrace_get_regs(fregs);
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- unsigned long orig_ip = instruction_pointer(regs);
-
- instruction_pointer_set(regs, ip);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs)) {
- /*
- * Emulate singlestep (and also recover regs->pc)
- * as if there is a nop
- */
- instruction_pointer_set(regs,
- (unsigned long)p->addr + MCOUNT_INSN_SIZE);
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- instruction_pointer_set(regs, orig_ip);
- }
-
- /*
- * If pre_handler returns !0, it changes regs->pc. We have to
- * skip emulating post_handler.
- */
- __this_cpu_write(current_kprobe, NULL);
- }
-out:
- ftrace_test_recursion_unlock(bit);
-}
-NOKPROBE_SYMBOL(kprobe_ftrace_handler);
-
-int arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.api.insn = NULL;
- return 0;
-}
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..c0738d6c6498 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -6,12 +6,13 @@
#include <linux/extable.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <linux/vmalloc.h>
#include <asm/ptrace.h>
#include <linux/uaccess.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
#include <asm/bug.h>
-#include <asm/patch.h>
+#include <asm/text-patching.h>
#include "decode-insn.h"
@@ -23,14 +24,13 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
+ size_t len = GET_INSN_LENGTH(p->opcode);
u32 insn = __BUG_INSN_32;
- unsigned long offset = GET_INSN_LENGTH(p->opcode);
- p->ainsn.api.restore = (unsigned long)p->addr + offset;
+ p->ainsn.api.restore = (unsigned long)p->addr + len;
- patch_text(p->ainsn.api.insn, &p->opcode, 1);
- patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
- &insn, 1);
+ patch_text_nosync(p->ainsn.api.insn, &p->opcode, len);
+ patch_text_nosync((void *)p->ainsn.api.insn + len, &insn, GET_INSN_LENGTH(insn));
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
@@ -104,29 +104,21 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
- VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ?
- __BUG_INSN_32 : __BUG_INSN_16;
+ size_t len = GET_INSN_LENGTH(p->opcode);
+ u32 insn = len == 4 ? __BUG_INSN_32 : __BUG_INSN_16;
- patch_text(p->addr, &insn, 1);
+ patch_text(p->addr, &insn, len);
}
/* remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, &p->opcode, 1);
+ size_t len = GET_INSN_LENGTH(p->opcode);
+
+ patch_text(p->addr, &p->opcode, len);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
index 4b3dc8beaf77..cc15f7ca6cc1 100644
--- a/arch/riscv/kernel/probes/uprobes.c
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
/* Initialize the slot */
void *kaddr = kmap_atomic(page);
void *dst = kaddr + (vaddr & ~PAGE_MASK);
+ unsigned long start = (unsigned long)dst;
memcpy(dst, src, len);
@@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
*(uprobe_opcode_t *)dst = __BUG_INSN_32;
}
+ flush_icache_range(start, start + len);
kunmap_atomic(kaddr);
-
- /*
- * We probably need flush_icache_user_page() but it needs vma.
- * This should work on most of architectures by default. If
- * architecture needs to do something different it can define
- * its own version of the function.
- */
- flush_dcache_page(page);
}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c..a0a40889d79a 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -7,6 +7,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/bitfield.h>
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -15,7 +16,10 @@
#include <linux/tick.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/personality.h>
+#include <linux/entry-common.h>
+#include <asm/asm-prototypes.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
@@ -26,8 +30,7 @@
#include <asm/cpuidle.h>
#include <asm/vector.h>
#include <asm/cpufeature.h>
-
-register unsigned long gp_in_global __asm__("gp");
+#include <asm/exec.h>
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
@@ -35,9 +38,10 @@ unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
-extern asmlinkage void ret_from_fork(void);
+extern asmlinkage void ret_from_fork_kernel_asm(void);
+extern asmlinkage void ret_from_fork_user_asm(void);
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
@@ -56,7 +60,7 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
if (!unaligned_ctl_available())
return -EINVAL;
- return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+ return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
}
void __show_regs(struct pt_regs *regs)
@@ -101,6 +105,13 @@ void show_regs(struct pt_regs *regs)
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_u32_below(PAGE_SIZE);
+ return sp & ~0xf;
+}
+
#ifdef CONFIG_COMPAT
static bool compat_mode_supported __read_mostly;
@@ -173,12 +184,16 @@ void flush_thread(void)
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ envcfg_update_bits(current, ENVCFG_PMM, ENVCFG_PMM_PMLEN_0);
+#endif
}
void arch_release_task_struct(struct task_struct *tsk)
{
/* Free the vector context of datap. */
- if (has_vector())
+ if (has_vector() || has_xtheadvector())
riscv_v_thread_free(tsk);
}
@@ -194,6 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
+{
+ fn(fn_arg);
+
+ syscall_exit_to_user_mode(regs);
+}
+
+asmlinkage void ret_from_fork_user(struct pt_regs *regs)
+{
+ syscall_exit_to_user_mode(regs);
+}
+
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
@@ -201,18 +228,22 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ /* Ensure all threads in this mm have the same pointer masking mode. */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM) && p->mm && (clone_flags & CLONE_VM))
+ set_bit(MM_CONTEXT_LOCK_PMLEN, &p->mm->context.flags);
+
memset(&p->thread.s, 0, sizeof(p->thread.s));
/* p->thread holds context to be restored by __switch_to() */
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
+ p->thread.ra = (unsigned long)ret_from_fork_kernel_asm;
} else {
*childregs = *(current_pt_regs());
/* Turn off status.VS */
@@ -222,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
- p->thread.s[0] = 0;
+ p->thread.ra = (unsigned long)ret_from_fork_user_asm;
}
p->thread.riscv_v_flags = 0;
- if (has_vector())
+ if (has_vector() || has_xtheadvector())
riscv_v_thread_alloc(p);
- p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
@@ -236,3 +266,154 @@ void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}
+
+#ifdef CONFIG_RISCV_ISA_SUPM
+enum {
+ PMLEN_0 = 0,
+ PMLEN_7 = 7,
+ PMLEN_16 = 16,
+};
+
+static bool have_user_pmlen_7;
+static bool have_user_pmlen_16;
+
+/*
+ * Control the relaxed ABI allowing tagged user addresses into the kernel.
+ */
+static unsigned int tagged_addr_disabled;
+
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
+{
+ unsigned long valid_mask = PR_PMLEN_MASK | PR_TAGGED_ADDR_ENABLE;
+ struct thread_info *ti = task_thread_info(task);
+ struct mm_struct *mm = task->mm;
+ unsigned long pmm;
+ u8 pmlen;
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ return -EINVAL;
+
+ if (is_compat_thread(ti))
+ return -EINVAL;
+
+ if (arg & ~valid_mask)
+ return -EINVAL;
+
+ /*
+ * Prefer the smallest PMLEN that satisfies the user's request,
+ * in case choosing a larger PMLEN has a performance impact.
+ */
+ pmlen = FIELD_GET(PR_PMLEN_MASK, arg);
+ if (pmlen == PMLEN_0) {
+ pmm = ENVCFG_PMM_PMLEN_0;
+ } else if (pmlen <= PMLEN_7 && have_user_pmlen_7) {
+ pmlen = PMLEN_7;
+ pmm = ENVCFG_PMM_PMLEN_7;
+ } else if (pmlen <= PMLEN_16 && have_user_pmlen_16) {
+ pmlen = PMLEN_16;
+ pmm = ENVCFG_PMM_PMLEN_16;
+ } else {
+ return -EINVAL;
+ }
+
+ /*
+ * Do not allow the enabling of the tagged address ABI if globally
+ * disabled via sysctl abi.tagged_addr_disabled, if pointer masking
+ * is disabled for userspace.
+ */
+ if (arg & PR_TAGGED_ADDR_ENABLE && (tagged_addr_disabled || !pmlen))
+ return -EINVAL;
+
+ if (!(arg & PR_TAGGED_ADDR_ENABLE))
+ pmlen = PMLEN_0;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ if (test_bit(MM_CONTEXT_LOCK_PMLEN, &mm->context.flags) && mm->context.pmlen != pmlen) {
+ mmap_write_unlock(mm);
+ return -EBUSY;
+ }
+
+ envcfg_update_bits(task, ENVCFG_PMM, pmm);
+ mm->context.pmlen = pmlen;
+
+ mmap_write_unlock(mm);
+
+ return 0;
+}
+
+long get_tagged_addr_ctrl(struct task_struct *task)
+{
+ struct thread_info *ti = task_thread_info(task);
+ long ret = 0;
+
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ return -EINVAL;
+
+ if (is_compat_thread(ti))
+ return -EINVAL;
+
+ /*
+ * The mm context's pmlen is set only when the tagged address ABI is
+ * enabled, so the effective PMLEN must be extracted from envcfg.PMM.
+ */
+ switch (task->thread.envcfg & ENVCFG_PMM) {
+ case ENVCFG_PMM_PMLEN_7:
+ ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_7);
+ break;
+ case ENVCFG_PMM_PMLEN_16:
+ ret = FIELD_PREP(PR_PMLEN_MASK, PMLEN_16);
+ break;
+ }
+
+ if (task->mm->context.pmlen)
+ ret |= PR_TAGGED_ADDR_ENABLE;
+
+ return ret;
+}
+
+static bool try_to_set_pmm(unsigned long value)
+{
+ csr_set(CSR_ENVCFG, value);
+ return (csr_read_clear(CSR_ENVCFG, ENVCFG_PMM) & ENVCFG_PMM) == value;
+}
+
+/*
+ * Global sysctl to disable the tagged user addresses support. This control
+ * only prevents the tagged address ABI enabling via prctl() and does not
+ * disable it for tasks that already opted in to the relaxed ABI.
+ */
+
+static const struct ctl_table tagged_addr_sysctl_table[] = {
+ {
+ .procname = "tagged_addr_disabled",
+ .mode = 0644,
+ .data = &tagged_addr_disabled,
+ .maxlen = sizeof(int),
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static int __init tagged_addr_init(void)
+{
+ if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM))
+ return 0;
+
+ /*
+ * envcfg.PMM is a WARL field. Detect which values are supported.
+ * Assume the supported PMLEN values are the same on all harts.
+ */
+ csr_clear(CSR_ENVCFG, ENVCFG_PMM);
+ have_user_pmlen_7 = try_to_set_pmm(ENVCFG_PMM_PMLEN_7);
+ have_user_pmlen_16 = try_to_set_pmm(ENVCFG_PMM_PMLEN_16);
+
+ if (!register_sysctl("abi", tagged_addr_sysctl_table))
+ return -EINVAL;
+
+ return 0;
+}
+core_initcall(tagged_addr_init);
+#endif /* CONFIG_RISCV_ISA_SUPM */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index e8515aa9d80b..ea67e9fb7a58 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -28,6 +28,9 @@ enum riscv_regset {
#ifdef CONFIG_RISCV_ISA_V
REGSET_V,
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ REGSET_TAGGED_ADDR_CTRL,
+#endif
};
static int riscv_gpr_get(struct task_struct *target,
@@ -152,6 +155,35 @@ static int riscv_vr_set(struct task_struct *target,
}
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+static int tagged_addr_ctrl_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long ctrl = get_tagged_addr_ctrl(target);
+
+ if (IS_ERR_VALUE(ctrl))
+ return ctrl;
+
+ return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
static const struct user_regset riscv_user_regset[] = {
[REGSET_X] = {
.core_note_type = NT_PRSTATUS,
@@ -182,6 +214,16 @@ static const struct user_regset riscv_user_regset[] = {
.set = riscv_vr_set,
},
#endif
+#ifdef CONFIG_RISCV_ISA_SUPM
+ [REGSET_TAGGED_ADDR_CTRL] = {
+ .core_note_type = NT_RISCV_TAGGED_ADDR_CTRL,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = tagged_addr_ctrl_get,
+ .set = tagged_addr_ctrl_set,
+ },
+#endif
};
static const struct user_regset_view riscv_user_native_view = {
@@ -377,14 +419,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
return ret;
}
+#else
+static const struct user_regset_view compat_riscv_user_native_view = {};
#endif /* CONFIG_COMPAT */
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_32BIT))
+ if (is_compat_thread(&task->thread_info))
return &compat_riscv_user_native_view;
else
-#endif
return &riscv_user_native_view;
}
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index a72879b4249a..5ab1c7e1a6ed 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -12,9 +12,6 @@
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strncmp);
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(__memmove);
diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c
index a4559695ce62..0cc5559c08d8 100644
--- a/arch/riscv/kernel/sbi-ipi.c
+++ b/arch/riscv/kernel/sbi-ipi.c
@@ -13,6 +13,9 @@
#include <linux/irqdomain.h>
#include <asm/sbi.h>
+DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
+EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence);
+
static int sbi_ipi_virq;
static void sbi_ipi_handle(struct irq_desc *desc)
@@ -68,10 +71,16 @@ void __init sbi_ipi_init(void)
* the masking/unmasking of virtual IPIs is done
* via generic IPI-Mux
*/
- cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING,
"irqchip/sbi-ipi:starting",
sbi_ipi_starting_cpu, NULL);
- riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false);
+ riscv_ipi_set_virq_range(virq, BITS_PER_BYTE);
pr_info("providing IPIs using SBI IPI extension\n");
+
+ /*
+ * Use the SBI remote fence extension to avoid
+ * the extra context switch needed to handle IPIs.
+ */
+ static_branch_enable(&riscv_sbi_for_rfence);
}
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index e66e0999a800..53836a9235e3 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -24,51 +24,6 @@ static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
- unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4,
- unsigned long arg5)
-{
- struct sbiret ret;
-
- register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
- register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
- register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
- register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
- register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
- register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
- register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
- register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
- asm volatile ("ecall"
- : "+r" (a0), "+r" (a1)
- : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
- : "memory");
- ret.error = a0;
- ret.value = a1;
-
- return ret;
-}
-EXPORT_SYMBOL(sbi_ecall);
-
-int sbi_err_map_linux_errno(int err)
-{
- switch (err) {
- case SBI_SUCCESS:
- return 0;
- case SBI_ERR_DENIED:
- return -EPERM;
- case SBI_ERR_INVALID_PARAM:
- return -EINVAL;
- case SBI_ERR_INVALID_ADDRESS:
- return -EFAULT;
- case SBI_ERR_NOT_SUPPORTED:
- case SBI_ERR_FAILURE:
- default:
- return -ENOTSUPP;
- };
-}
-EXPORT_SYMBOL(sbi_err_map_linux_errno);
-
#ifdef CONFIG_RISCV_SBI_V01
static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
{
@@ -344,6 +299,76 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
return 0;
}
+static bool sbi_fwft_supported;
+
+struct fwft_set_req {
+ u32 feature;
+ unsigned long value;
+ unsigned long flags;
+ atomic_t error;
+};
+
+static void cpu_sbi_fwft_set(void *arg)
+{
+ struct fwft_set_req *req = arg;
+ int ret;
+
+ ret = sbi_fwft_set(req->feature, req->value, req->flags);
+ if (ret)
+ atomic_set(&req->error, ret);
+}
+
+/**
+ * sbi_fwft_set() - Set a feature on the local hart
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags)
+{
+ struct sbiret ret;
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET,
+ feature, value, flags, 0, 0, 0);
+
+ return sbi_err_map_linux_errno(ret.error);
+}
+
+/**
+ * sbi_fwft_set_cpumask() - Set a feature for the specified cpumask
+ * @mask: CPU mask of cpus that need the feature to be set
+ * @feature: The feature ID to be set
+ * @value: The feature value to be set
+ * @flags: FWFT feature set flags
+ *
+ * Return: 0 on success, appropriate linux error code otherwise.
+ */
+int sbi_fwft_set_cpumask(const cpumask_t *mask, u32 feature,
+ unsigned long value, unsigned long flags)
+{
+ struct fwft_set_req req = {
+ .feature = feature,
+ .value = value,
+ .flags = flags,
+ .error = ATOMIC_INIT(0),
+ };
+
+ if (!sbi_fwft_supported)
+ return -EOPNOTSUPP;
+
+ if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT)
+ return -EINVAL;
+
+ on_each_cpu_mask(mask, cpu_sbi_fwft_set, &req, 1);
+
+ return atomic_read(&req.error);
+}
+
/**
* sbi_set_timer() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
@@ -528,17 +553,6 @@ long sbi_probe_extension(int extid)
}
EXPORT_SYMBOL(sbi_probe_extension);
-static long __sbi_base_ecall(int fid)
-{
- struct sbiret ret;
-
- ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
- if (!ret.error)
- return ret.value;
- else
- return sbi_err_map_linux_errno(ret.error);
-}
-
static inline long sbi_get_spec_version(void)
{
return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION);
@@ -665,7 +679,7 @@ void __init sbi_init(void)
} else {
__sbi_rfence = __sbi_rfence_v01;
}
- if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ if (sbi_spec_version >= sbi_mk_version(0, 3) &&
sbi_probe_extension(SBI_EXT_SRST)) {
pr_info("SBI SRST extension detected\n");
pm_power_off = sbi_srst_power_off;
@@ -673,11 +687,16 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
- if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
- (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_DBCN) > 0) {
pr_info("SBI DBCN extension detected\n");
sbi_debug_console_available = true;
}
+ if (sbi_spec_version >= sbi_mk_version(3, 0) &&
+ sbi_probe_extension(SBI_EXT_FWFT)) {
+ pr_info("SBI FWFT extension detected\n");
+ sbi_fwft_supported = true;
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/sbi_ecall.c b/arch/riscv/kernel/sbi_ecall.c
new file mode 100644
index 000000000000..24aabb4fbde3
--- /dev/null
+++ b/arch/riscv/kernel/sbi_ecall.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Rivos Inc. */
+
+#include <asm/sbi.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
+long __sbi_base_ecall(int fid)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ return ret.value;
+ else
+ return sbi_err_map_linux_errno(ret.error);
+}
+EXPORT_SYMBOL(__sbi_base_ecall);
+
+struct sbiret __sbi_ecall(unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5,
+ int fid, int ext)
+{
+ struct sbiret ret;
+
+ trace_sbi_call(ext, fid);
+
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ asm volatile ("ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ trace_sbi_return(ext, ret.error, ret.value);
+
+ return ret;
+}
+EXPORT_SYMBOL(__sbi_ecall);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4f73c0ae44b2..f7c9a1caa83e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -66,6 +66,9 @@ static struct resource bss_res = { .name = "Kernel bss", };
static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
#endif
+static int num_standard_resources;
+static struct resource *standard_resources;
+
static int __init add_resource(struct resource *parent,
struct resource *res)
{
@@ -139,7 +142,7 @@ static void __init init_resources(void)
struct resource *res = NULL;
struct resource *mem_res = NULL;
size_t mem_res_sz = 0;
- int num_resources = 0, res_idx = 0;
+ int num_resources = 0, res_idx = 0, non_resv_res = 0;
int ret = 0;
/* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
@@ -147,9 +150,7 @@ static void __init init_resources(void)
res_idx = num_resources - 1;
mem_res_sz = num_resources * sizeof(*mem_res);
- mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
- if (!mem_res)
- panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
+ mem_res = memblock_alloc_or_panic(mem_res_sz, SMP_CACHE_BYTES);
/*
* Start by adding the reserved regions, if they overlap
@@ -195,6 +196,7 @@ static void __init init_resources(void)
/* Add /memory regions to the resource tree */
for_each_mem_region(region) {
res = &mem_res[res_idx--];
+ non_resv_res++;
if (unlikely(memblock_is_nomap(region))) {
res->name = "Reserved";
@@ -212,6 +214,9 @@ static void __init init_resources(void)
goto error;
}
+ num_standard_resources = non_resv_res;
+ standard_resources = &mem_res[res_idx + 1];
+
/* Clean-up any unused pre-allocated resources */
if (res_idx >= 0)
memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res));
@@ -223,11 +228,38 @@ static void __init init_resources(void)
memblock_free(mem_res, mem_res_sz);
}
+static int __init reserve_memblock_reserved_regions(void)
+{
+ u64 i, j;
+
+ for (i = 0; i < num_standard_resources; i++) {
+ struct resource *mem = &standard_resources[i];
+ phys_addr_t r_start, r_end, mem_size = resource_size(mem);
+
+ if (!memblock_is_region_reserved(mem->start, mem_size))
+ continue;
+
+ for_each_reserved_mem_range(j, &r_start, &r_end) {
+ resource_size_t start, end;
+
+ start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+ end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+ if (start > mem->end || end < mem->start)
+ continue;
+
+ reserve_region_with_split(mem, start, end, "Reserved");
+ }
+ }
+
+ return 0;
+}
+arch_initcall(reserve_memblock_reserved_regions);
static void __init parse_dtb(void)
{
/* Early scan of device tree from init memory */
- if (early_init_dt_scan(dtb_early_va)) {
+ if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) {
const char *name = of_flat_dt_get_machine_name();
if (name) {
@@ -237,11 +269,42 @@ static void __init parse_dtb(void)
} else {
pr_err("No DTB passed to the kernel\n");
}
+}
+
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+DEFINE_STATIC_KEY_TRUE(qspinlock_key);
+EXPORT_SYMBOL(qspinlock_key);
+#endif
+
+static void __init riscv_spinlock_init(void)
+{
+ char *using_ext = NULL;
+
+ if (IS_ENABLED(CONFIG_RISCV_TICKET_SPINLOCKS)) {
+ pr_info("Ticket spinlock: enabled\n");
+ return;
+ }
-#ifdef CONFIG_CMDLINE_FORCE
- strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
- pr_info("Forcing kernel command line to: %s\n", boot_command_line);
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
+ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
+ riscv_isa_extension_available(NULL, ZABHA) &&
+ riscv_isa_extension_available(NULL, ZACAS)) {
+ using_ext = "using Zabha";
+ } else if (riscv_isa_extension_available(NULL, ZICCRSE)) {
+ using_ext = "using Ziccrse";
+ }
+#if defined(CONFIG_RISCV_COMBO_SPINLOCKS)
+ else {
+ static_branch_disable(&qspinlock_key);
+ pr_info("Ticket spinlock: enabled\n");
+ return;
+ }
#endif
+
+ if (!using_ext)
+ pr_err("Queued spinlock without Zabha or Ziccrse");
+ else
+ pr_info("Queued spinlock %s: enabled\n", using_ext);
}
extern void __init init_rt_signal_env(void);
@@ -281,13 +344,15 @@ void __init setup_arch(char **cmdline_p)
setup_smp();
#endif
- if (!acpi_disabled)
+ if (!acpi_disabled) {
acpi_init_rintc_map();
+ acpi_map_cpus_to_nodes();
+ }
riscv_init_cbo_blocksizes();
riscv_fill_hwcap();
- init_rt_signal_env();
apply_boot_alternatives();
+ init_rt_signal_env();
if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
riscv_isa_extension_available(NULL, ZICBOM))
@@ -295,6 +360,7 @@ void __init setup_arch(char **cmdline_p)
riscv_set_dma_cache_alignment();
riscv_user_isa_enable();
+ riscv_spinlock_init();
}
bool arch_cpu_is_hotpluggable(int cpu)
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 501e66debf69..08378fea3a11 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -84,7 +84,7 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
datap = state + 1;
/* datap is designed to be 16 byte aligned for better performance */
- WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
+ WARN_ON(!IS_ALIGNED((unsigned long)datap, 16));
get_cpu_vector_context();
riscv_v_vstate_save(&current->thread.vstate, regs);
@@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
struct __sc_riscv_v_state __user *state = sc_vec;
void __user *datap;
+ /*
+ * Mark the vstate as clean prior performing the actual copy,
+ * to avoid getting the vstate incorrectly clobbered by the
+ * discarded vector state.
+ */
+ riscv_v_vstate_set_restore(current, regs);
+
/* Copy everything of __sc_riscv_v_state except datap. */
err = __copy_from_user(&current->thread.vstate, &state->v_state,
offsetof(struct __riscv_v_ext_state, datap));
@@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
* Copy the whole vector content from user space datap. Use
* copy_from_user to prevent information leak.
*/
- err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
- if (unlikely(err))
- return err;
-
- riscv_v_vstate_set_restore(current, regs);
-
- return err;
+ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
}
#else
#define save_v_state(task, regs) (0)
@@ -188,7 +189,7 @@ static long restore_sigcontext(struct pt_regs *regs,
return 0;
case RISCV_V_MAGIC:
- if (!has_vector() || !riscv_v_vstate_query(regs) ||
+ if (!(has_vector() || has_xtheadvector()) || !riscv_v_vstate_query(regs) ||
size != riscv_v_sc_size)
return -EINVAL;
@@ -210,16 +211,10 @@ static size_t get_rt_frame_size(bool cal_all)
frame_size = sizeof(*frame);
- if (has_vector()) {
+ if (has_vector() || has_xtheadvector()) {
if (cal_all || riscv_v_vstate_query(task_pt_regs(current)))
total_context_size += riscv_v_sc_size;
}
- /*
- * Preserved a __riscv_ctx_hdr for END signal context header if an
- * extension uses __riscv_extra_ext_header
- */
- if (total_context_size)
- total_context_size += sizeof(struct __riscv_ctx_hdr);
frame_size += total_context_size;
@@ -283,7 +278,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
if (has_fpu())
err |= save_fp_state(regs, &sc->sc_fpregs);
/* Save the vector state. */
- if (has_vector() && riscv_v_vstate_query(regs))
+ if ((has_vector() || has_xtheadvector()) && riscv_v_vstate_query(regs))
err |= save_v_state(regs, (void __user **)&sc_ext_ptr);
/* Write zero to fp-reserved space and check it on restore_sigcontext */
err |= __put_user(0, &sc->sc_extdesc.reserved);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 45dd4035416e..e650dec44817 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/kexec.h>
+#include <linux/kgdb.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/smp.h>
@@ -21,6 +22,7 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/irq_work.h>
+#include <linux/nmi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -33,16 +35,21 @@ enum ipi_message_type {
IPI_CPU_CRASH_STOP,
IPI_IRQ_WORK,
IPI_TIMER,
+ IPI_CPU_BACKTRACE,
+ IPI_KGDB_ROUNDUP,
IPI_MAX
};
unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = INVALID_HARTID
};
+EXPORT_SYMBOL_GPL(__cpuid_to_hartid_map);
void __init smp_setup_processor_id(void)
{
cpuid_to_hartid_map(0) = boot_cpu_hartid;
+
+ pr_info("Booting Linux on hartid %lu\n", boot_cpu_hartid);
}
static DEFINE_PER_CPU_READ_MOSTLY(int, ipi_dummy_dev);
@@ -113,6 +120,7 @@ void arch_irq_work_raise(void)
static irqreturn_t handle_IPI(int irq, void *data)
{
+ unsigned int cpu = smp_processor_id();
int ipi = irq - ipi_virq_base;
switch (ipi) {
@@ -126,7 +134,7 @@ static irqreturn_t handle_IPI(int irq, void *data)
ipi_stop();
break;
case IPI_CPU_CRASH_STOP:
- ipi_cpu_crash_stop(smp_processor_id(), get_irq_regs());
+ ipi_cpu_crash_stop(cpu, get_irq_regs());
break;
case IPI_IRQ_WORK:
irq_work_run();
@@ -136,8 +144,14 @@ static irqreturn_t handle_IPI(int irq, void *data)
tick_receive_broadcast();
break;
#endif
+ case IPI_CPU_BACKTRACE:
+ nmi_cpu_backtrace(get_irq_regs());
+ break;
+ case IPI_KGDB_ROUNDUP:
+ kgdb_nmicallback(cpu, get_irq_regs());
+ break;
default:
- pr_warn("CPU%d: unhandled IPI%d\n", smp_processor_id(), ipi);
+ pr_warn("CPU%d: unhandled IPI%d\n", cpu, ipi);
break;
}
@@ -171,10 +185,7 @@ bool riscv_ipi_have_virq_range(void)
return (ipi_virq_base) ? true : false;
}
-DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
-EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence);
-
-void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
+void riscv_ipi_set_virq_range(int virq, int nr)
{
int i, err;
@@ -197,12 +208,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence)
/* Enabled IPIs for boot CPU immediately */
riscv_ipi_enable();
-
- /* Update RFENCE static key */
- if (use_for_rfence)
- static_branch_enable(&riscv_ipi_for_rfence);
- else
- static_branch_disable(&riscv_ipi_for_rfence);
}
static const char * const ipi_names[] = {
@@ -212,6 +217,8 @@ static const char * const ipi_names[] = {
[IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
+ [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
+ [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
};
void show_ipi_stats(struct seq_file *p, int prec)
@@ -332,3 +339,29 @@ void arch_smp_send_reschedule(int cpu)
send_ipi_single(cpu, IPI_RESCHEDULE);
}
EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
+
+static void riscv_backtrace_ipi(cpumask_t *mask)
+{
+ send_ipi_mask(mask, IPI_CPU_BACKTRACE);
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
+{
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, riscv_backtrace_ipi);
+}
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(void)
+{
+ int this_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* No need to roundup ourselves */
+ if (cpu == this_cpu)
+ continue;
+
+ send_ipi_single(cpu, IPI_KGDB_ROUNDUP);
+ }
+}
+#endif
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index c4ed7d977f57..601a321e0f17 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -26,9 +26,8 @@
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
-#include <asm/cpufeature.h>
+#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
-#include <asm/cpufeature.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
@@ -97,7 +96,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = true;
- early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count));
return 0;
}
@@ -107,7 +105,6 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un
}
cpuid_to_hartid_map(cpu_count) = hart;
- early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count));
cpu_count++;
return 0;
@@ -215,6 +212,15 @@ asmlinkage __visible void smp_callin(void)
struct mm_struct *mm = &init_mm;
unsigned int curr_cpuid = smp_processor_id();
+ if (has_vector()) {
+ /*
+ * Return as early as possible so the hart with a mismatching
+ * vlen won't boot.
+ */
+ if (riscv_v_setup_vsize())
+ return;
+ }
+
/* All kernel threads share the same mm context. */
mmgrab(mm);
current->active_mm = mm;
@@ -225,19 +231,17 @@ asmlinkage __visible void smp_callin(void)
riscv_ipi_enable();
numa_add_cpu(curr_cpuid);
- set_cpu_online(curr_cpuid, 1);
- if (has_vector()) {
- if (riscv_v_setup_vsize())
- elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
- }
+ pr_debug("CPU%u: Booted secondary hartid %lu\n", curr_cpuid,
+ cpuid_to_hartid_map(curr_cpuid));
- riscv_user_isa_enable();
+ set_cpu_online(curr_cpuid, true);
/*
- * Remote TLB flushes are ignored while the CPU is offline, so emit
- * a local TLB flush right now just in case.
+ * Remote cache and TLB flushes are ignored while the CPU is offline,
+ * so flush them both right now just in case.
*/
+ local_flush_icache_all();
local_flush_tlb_all();
complete(&cpu_running);
/*
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 64a9c093aef9..3fe9e6edef8f 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -16,12 +16,24 @@
#ifdef CONFIG_FRAME_POINTER
-extern asmlinkage void ret_from_exception(void);
+extern asmlinkage void handle_exception(void);
+extern unsigned long ret_from_exception_end;
+
+static inline int fp_is_valid(unsigned long fp, unsigned long sp)
+{
+ unsigned long low, high;
+
+ low = sp + sizeof(struct stackframe);
+ high = ALIGN(sp, THREAD_SIZE);
+
+ return !(fp < low || fp > high || fp & 0x07);
+}
void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg)
{
unsigned long fp, sp, pc;
+ int graph_idx = 0;
int level = 0;
if (regs) {
@@ -41,29 +53,28 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
}
for (;;) {
- unsigned long low, high;
struct stackframe *frame;
if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc))))
break;
- /* Validate frame pointer */
- low = sp + sizeof(struct stackframe);
- high = ALIGN(sp, THREAD_SIZE);
- if (unlikely(fp < low || fp > high || fp & 0x7))
+ if (unlikely(!fp_is_valid(fp, sp)))
break;
+
/* Unwind stack frame */
frame = (struct stackframe *)fp - 1;
sp = fp;
- if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
+ if (regs && (regs->epc == pc) && fp_is_valid(frame->ra, sp)) {
+ /* We hit function where ra is not saved on the stack */
fp = frame->ra;
pc = regs->ra;
} else {
fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra,
&frame->ra);
- if (pc == (unsigned long)ret_from_exception) {
- if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
+ if (pc >= (unsigned long)handle_exception &&
+ pc < (unsigned long)&ret_from_exception_end) {
+ if (unlikely(!fn(arg, pc)))
break;
pc = ((struct pt_regs *)sp)->epc;
@@ -148,8 +159,51 @@ unsigned long __get_wchan(struct task_struct *task)
return pc;
}
-noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long unwind_user_frame(stack_trace_consume_fn consume_entry,
+ void *cookie, unsigned long fp,
+ unsigned long reg_ra)
+{
+ struct stackframe buftail;
+ unsigned long ra = 0;
+ unsigned long __user *user_frame_tail =
+ (unsigned long __user *)(fp - sizeof(struct stackframe));
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(user_frame_tail, sizeof(buftail)))
+ return 0;
+ if (__copy_from_user_inatomic(&buftail, user_frame_tail,
+ sizeof(buftail)))
+ return 0;
+
+ ra = reg_ra ? : buftail.ra;
+
+ fp = buftail.fp;
+ if (!ra || !consume_entry(cookie, ra))
+ return 0;
+
+ return fp;
+}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ unsigned long fp = 0;
+
+ fp = regs->s0;
+ if (!consume_entry(cookie, regs->epc))
+ return;
+
+ fp = unwind_user_frame(consume_entry, cookie, fp, regs->ra);
+ while (fp && !(fp & 0x7))
+ fp = unwind_user_frame(consume_entry, cookie, fp, 0);
+}
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 299795341e8a..24b3f57d467f 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -14,8 +14,7 @@
void suspend_save_csrs(struct suspend_context *context)
{
- context->scratch = csr_read(CSR_SCRATCH);
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG))
context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
context->ie = csr_read(CSR_IE);
@@ -31,19 +30,33 @@ void suspend_save_csrs(struct suspend_context *context)
*/
#ifdef CONFIG_MMU
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) {
+ context->stimecmp = csr_read(CSR_STIMECMP);
+#if __riscv_xlen < 64
+ context->stimecmph = csr_read(CSR_STIMECMPH);
+#endif
+ }
+
context->satp = csr_read(CSR_SATP);
#endif
}
void suspend_restore_csrs(struct suspend_context *context)
{
- csr_write(CSR_SCRATCH, context->scratch);
- if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG))
+ csr_write(CSR_SCRATCH, 0);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_XLINUXENVCFG))
csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
csr_write(CSR_IE, context->ie);
#ifdef CONFIG_MMU
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SSTC)) {
+ csr_write(CSR_STIMECMP, context->stimecmp);
+#if __riscv_xlen < 64
+ csr_write(CSR_STIMECMPH, context->stimecmph);
+#endif
+ }
+
csr_write(CSR_SATP, context->satp);
#endif
}
@@ -132,4 +145,53 @@ static int __init sbi_system_suspend_init(void)
}
arch_initcall(sbi_system_suspend_init);
+
+static int sbi_suspend_finisher(unsigned long suspend_type,
+ unsigned long resume_addr,
+ unsigned long opaque)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND,
+ suspend_type, resume_addr, opaque, 0, 0, 0);
+
+ return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0;
+}
+
+int riscv_sbi_hart_suspend(u32 state)
+{
+ if (state & SBI_HSM_SUSP_NON_RET_BIT)
+ return cpu_suspend(state, sbi_suspend_finisher);
+ else
+ return sbi_suspend_finisher(state, 0, 0);
+}
+
+bool riscv_sbi_suspend_state_is_valid(u32 state)
+{
+ if (state > SBI_HSM_SUSPEND_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_RET_PLATFORM)
+ return false;
+
+ if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT &&
+ state < SBI_HSM_SUSPEND_NON_RET_PLATFORM)
+ return false;
+
+ return true;
+}
+
+bool riscv_sbi_hsm_is_supported(void)
+{
+ /*
+ * The SBI HSM suspend function is only available when:
+ * 1) SBI version is 0.3 or higher
+ * 2) SBI HSM extension is available
+ */
+ if (sbi_spec_version < sbi_mk_version(0, 3) ||
+ !sbi_probe_extension(SBI_EXT_HSM)) {
+ pr_info("HSM suspend not available\n");
+ return false;
+ }
+
+ return true;
+}
#endif /* CONFIG_RISCV_SBI */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index a7c56b41efd2..0b170e18a2be 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -8,11 +8,15 @@
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
#include <asm/sbi.h>
#include <asm/switch_to.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/vector.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
+#include <asm/vendor_extensions/thead_hwprobe.h>
#include <vdso/vsyscall.h>
@@ -69,7 +73,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
if (riscv_isa_extension_available(NULL, c))
pair->value |= RISCV_HWPROBE_IMA_C;
- if (has_vector())
+ if (has_vector() && riscv_isa_extension_available(NULL, v))
pair->value |= RISCV_HWPROBE_IMA_V;
/*
@@ -92,29 +96,53 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
* regardless of the kernel's configuration, as no other checks, besides
* presence in the hart_isa bitmap, are made.
*/
+ EXT_KEY(ZAAMO);
+ EXT_KEY(ZABHA);
+ EXT_KEY(ZACAS);
+ EXT_KEY(ZALRSC);
+ EXT_KEY(ZAWRS);
EXT_KEY(ZBA);
EXT_KEY(ZBB);
- EXT_KEY(ZBS);
- EXT_KEY(ZICBOZ);
EXT_KEY(ZBC);
-
EXT_KEY(ZBKB);
EXT_KEY(ZBKC);
EXT_KEY(ZBKX);
+ EXT_KEY(ZBS);
+ EXT_KEY(ZCA);
+ EXT_KEY(ZCB);
+ EXT_KEY(ZCMOP);
+ EXT_KEY(ZICBOM);
+ EXT_KEY(ZICBOZ);
+ EXT_KEY(ZICNTR);
+ EXT_KEY(ZICOND);
+ EXT_KEY(ZIHINTNTL);
+ EXT_KEY(ZIHINTPAUSE);
+ EXT_KEY(ZIHPM);
+ EXT_KEY(ZIMOP);
EXT_KEY(ZKND);
EXT_KEY(ZKNE);
EXT_KEY(ZKNH);
EXT_KEY(ZKSED);
EXT_KEY(ZKSH);
EXT_KEY(ZKT);
- EXT_KEY(ZIHINTNTL);
EXT_KEY(ZTSO);
- EXT_KEY(ZACAS);
- EXT_KEY(ZICOND);
+ /*
+ * All the following extensions must depend on the kernel
+ * support of V.
+ */
if (has_vector()) {
EXT_KEY(ZVBB);
EXT_KEY(ZVBC);
+ EXT_KEY(ZVE32F);
+ EXT_KEY(ZVE32X);
+ EXT_KEY(ZVE64D);
+ EXT_KEY(ZVE64F);
+ EXT_KEY(ZVE64X);
+ EXT_KEY(ZVFBFMIN);
+ EXT_KEY(ZVFBFWMA);
+ EXT_KEY(ZVFH);
+ EXT_KEY(ZVFHMIN);
EXT_KEY(ZVKB);
EXT_KEY(ZVKG);
EXT_KEY(ZVKNED);
@@ -123,15 +151,19 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
EXT_KEY(ZVKSED);
EXT_KEY(ZVKSH);
EXT_KEY(ZVKT);
- EXT_KEY(ZVFH);
- EXT_KEY(ZVFHMIN);
}
if (has_fpu()) {
+ EXT_KEY(ZCD);
+ EXT_KEY(ZCF);
+ EXT_KEY(ZFA);
+ EXT_KEY(ZFBFMIN);
EXT_KEY(ZFH);
EXT_KEY(ZFHMIN);
- EXT_KEY(ZFA);
}
+
+ if (IS_ENABLED(CONFIG_RISCV_ISA_SUPM))
+ EXT_KEY(SUPM);
#undef EXT_KEY
}
@@ -139,7 +171,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
pair->value &= ~missing;
}
-static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext)
{
struct riscv_hwprobe pair;
@@ -147,6 +179,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
return (pair.value & ext);
}
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
int cpu;
@@ -159,16 +192,65 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
perf = this_perf;
if (perf != this_perf) {
- perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
break;
}
}
if (perf == -1ULL)
- return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
return perf;
}
+#else
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+ if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST;
+
+ if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available())
+ return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED;
+
+ return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW;
+}
+#endif
+
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ /* Return if supported or not even if speed wasn't probed */
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(vector_misaligned_access, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+ return perf;
+}
+#else
+static u64 hwprobe_vec_misaligned(const struct cpumask *cpus)
+{
+ if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_VECTOR_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+
+ if (IS_ENABLED(CONFIG_RISCV_SLOW_VECTOR_UNALIGNED_ACCESS))
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
+
+ return RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+}
+#endif
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
@@ -194,14 +276,39 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
break;
case RISCV_HWPROBE_KEY_CPUPERF_0:
+ case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF:
pair->value = hwprobe_misaligned(cpus);
break;
+ case RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF:
+ pair->value = hwprobe_vec_misaligned(cpus);
+ break;
+
case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
pair->value = 0;
if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
pair->value = riscv_cboz_block_size;
break;
+ case RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE:
+ pair->value = 0;
+ if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOM))
+ pair->value = riscv_cbom_block_size;
+ break;
+ case RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS:
+ pair->value = user_max_virt_addr();
+ break;
+
+ case RISCV_HWPROBE_KEY_TIME_CSR_FREQ:
+ pair->value = riscv_timebase;
+ break;
+
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0:
+ hwprobe_isa_vendor_ext_sifive_0(pair, cpus);
+ break;
+
+ case RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0:
+ hwprobe_isa_vendor_ext_thead_0(pair, cpus);
+ break;
/*
* For forward compatibility, unknown keys don't fail the whole
@@ -362,8 +469,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
static int __init init_hwprobe_vdso_data(void)
{
- struct vdso_data *vd = __arch_get_k_vdso_data();
- struct arch_vdso_data *avd = &vd->arch_data;
+ struct vdso_arch_data *avd = vdso_k_arch_data;
u64 id_bitsmash = 0;
struct riscv_hwprobe pair;
int key;
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index f1c1416a9f1e..d77afe05578f 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -7,7 +7,6 @@
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
-#include <asm-generic/mman-common.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -24,7 +23,7 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
#ifdef CONFIG_64BIT
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0);
}
@@ -33,7 +32,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, offset)
+ unsigned long, fd, unsigned long, offset)
{
/*
* Note that the shift for mmap2 is constant (12),
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index dda913764903..6f1a36cb0f3f 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -9,14 +9,16 @@
#include <asm-generic/syscalls.h>
#include <asm/syscall.h>
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+
#undef __SYSCALL
#define __SYSCALL(nr, call) asmlinkage long __riscv_##call(const struct pt_regs *);
-#include <asm/unistd.h>
+#include <asm/syscall_table.h>
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = __riscv_##call,
void * const sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __riscv_sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table.h>
};
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index a1b9be3c4332..9c83848797a7 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/randomize_kstack.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/signal.h>
@@ -34,7 +35,7 @@
int show_unhandled_signals = 1;
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
{
@@ -80,7 +81,7 @@ void die(struct pt_regs *regs, const char *str)
oops_enter();
- spin_lock_irqsave(&die_lock, flags);
+ raw_spin_lock_irqsave(&die_lock, flags);
console_verbose();
bust_spinlocks(1);
@@ -99,7 +100,7 @@ void die(struct pt_regs *regs, const char *str)
bust_spinlocks(0);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
- spin_unlock_irqrestore(&die_lock, flags);
+ raw_spin_unlock_irqrestore(&die_lock, flags);
oops_exit();
if (in_interrupt())
@@ -121,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
- dump_instr(KERN_EMERG, regs);
+ dump_instr(KERN_INFO, regs);
}
force_sig_fault(signo, code, (void __user *)addr);
@@ -197,47 +198,57 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
DO_ERROR_INFO(do_trap_load_fault,
SIGSEGV, SEGV_ACCERR, "load access fault");
-asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
+enum misaligned_access_type {
+ MISALIGNED_STORE,
+ MISALIGNED_LOAD,
+};
+static const struct {
+ const char *type_str;
+ int (*handler)(struct pt_regs *regs);
+} misaligned_handler[] = {
+ [MISALIGNED_STORE] = {
+ .type_str = "Oops - store (or AMO) address misaligned",
+ .handler = handle_misaligned_store,
+ },
+ [MISALIGNED_LOAD] = {
+ .type_str = "Oops - load address misaligned",
+ .handler = handle_misaligned_load,
+ },
+};
+
+static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type type)
{
+ irqentry_state_t state;
+
if (user_mode(regs)) {
irqentry_enter_from_user_mode(regs);
+ local_irq_enable();
+ } else {
+ state = irqentry_nmi_enter(regs);
+ }
- if (handle_misaligned_load(regs))
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - load address misaligned");
+ if (misaligned_handler[type].handler(regs))
+ do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+ misaligned_handler[type].type_str);
+ if (user_mode(regs)) {
+ local_irq_disable();
irqentry_exit_to_user_mode(regs);
} else {
- irqentry_state_t state = irqentry_nmi_enter(regs);
-
- if (handle_misaligned_load(regs))
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - load address misaligned");
-
irqentry_nmi_exit(regs, state);
}
}
-asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
{
- if (user_mode(regs)) {
- irqentry_enter_from_user_mode(regs);
-
- if (handle_misaligned_store(regs))
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - store (or AMO) address misaligned");
-
- irqentry_exit_to_user_mode(regs);
- } else {
- irqentry_state_t state = irqentry_nmi_enter(regs);
-
- if (handle_misaligned_store(regs))
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - store (or AMO) address misaligned");
+ do_trap_misaligned(regs, MISALIGNED_LOAD);
+}
- irqentry_nmi_exit(regs, state);
- }
+asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs)
+{
+ do_trap_misaligned(regs, MISALIGNED_STORE);
}
+
DO_ERROR_INFO(do_trap_store_fault,
SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
DO_ERROR_INFO(do_trap_ecall_s,
@@ -310,22 +321,36 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
}
}
-asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+asmlinkage __visible __trap_section __no_stack_protector
+void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
long syscall = regs->a7;
regs->epc += 4;
regs->orig_a0 = regs->a0;
+ regs->a0 = -ENOSYS;
riscv_v_vstate_discard(regs);
syscall = syscall_enter_from_user_mode(regs, syscall);
+ add_random_kstack_offset();
+
if (syscall >= 0 && syscall < NR_syscalls)
syscall_handler(regs, syscall);
- else if (syscall != -1)
- regs->a0 = -ENOSYS;
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
+ * for RV32I or RV64I.
+ *
+ * The resulting 6 bits of entropy is seen in SP[9:4].
+ */
+ choose_random_kstack_offset(get_random_u16());
syscall_exit_to_user_mode(regs);
} else {
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 8ded225e8c5b..dd8e4af6583f 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -16,6 +16,8 @@
#include <asm/entry-common.h>
#include <asm/hwprobe.h>
#include <asm/cpufeature.h>
+#include <asm/sbi.h>
+#include <asm/vector.h>
#define INSN_MATCH_LB 0x3
#define INSN_MASK_LB 0x707f
@@ -87,6 +89,13 @@
#define INSN_MATCH_C_FSWSP 0xe002
#define INSN_MASK_C_FSWSP 0xe003
+#define INSN_MATCH_C_LHU 0x8400
+#define INSN_MASK_C_LHU 0xfc43
+#define INSN_MATCH_C_LH 0x8440
+#define INSN_MASK_C_LH 0xfc43
+#define INSN_MATCH_C_SH 0x8c00
+#define INSN_MASK_C_SH 0xfc43
+
#define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4)
#if defined(CONFIG_64BIT)
@@ -136,8 +145,6 @@
#define REG_PTR(insn, pos, regs) \
(ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
-#define GET_RM(insn) (((insn) >> 12) & 7)
-
#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
@@ -264,86 +271,14 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
-#ifdef CONFIG_RISCV_M_MODE
-static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
-{
- u8 val;
-
- asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr));
- *r_val = val;
-
- return 0;
-}
-
-static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
-{
- asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr));
-
- return 0;
-}
-
-static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn)
-{
- register ulong __mepc asm ("a2") = mepc;
- ulong val, rvc_mask = 3, tmp;
-
- asm ("and %[tmp], %[addr], 2\n"
- "bnez %[tmp], 1f\n"
-#if defined(CONFIG_64BIT)
- __stringify(LWU) " %[insn], (%[addr])\n"
-#else
- __stringify(LW) " %[insn], (%[addr])\n"
-#endif
- "and %[tmp], %[insn], %[rvc_mask]\n"
- "beq %[tmp], %[rvc_mask], 2f\n"
- "sll %[insn], %[insn], %[xlen_minus_16]\n"
- "srl %[insn], %[insn], %[xlen_minus_16]\n"
- "j 2f\n"
- "1:\n"
- "lhu %[insn], (%[addr])\n"
- "and %[tmp], %[insn], %[rvc_mask]\n"
- "bne %[tmp], %[rvc_mask], 2f\n"
- "lhu %[tmp], 2(%[addr])\n"
- "sll %[tmp], %[tmp], 16\n"
- "add %[insn], %[insn], %[tmp]\n"
- "2:"
- : [insn] "=&r" (val), [tmp] "=&r" (tmp)
- : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask),
- [xlen_minus_16] "i" (XLEN_MINUS_16));
-
- *r_insn = val;
-
- return 0;
-}
-#else
-static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
-{
- if (user_mode(regs)) {
- return __get_user(*r_val, (u8 __user *)addr);
- } else {
- *r_val = *addr;
- return 0;
- }
-}
-
-static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
-{
- if (user_mode(regs)) {
- return __put_user(val, (u8 __user *)addr);
- } else {
- *addr = val;
- return 0;
- }
-}
-
-#define __read_insn(regs, insn, insn_addr) \
+#define __read_insn(regs, insn, insn_addr, type) \
({ \
int __ret; \
\
if (user_mode(regs)) { \
- __ret = __get_user(insn, insn_addr); \
+ __ret = get_user(insn, (type __user *) insn_addr); \
} else { \
- insn = *(__force u16 *)insn_addr; \
+ insn = *(type *)insn_addr; \
__ret = 0; \
} \
\
@@ -356,9 +291,8 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
if (epc & 0x2) {
ulong tmp = 0;
- u16 __user *insn_addr = (u16 __user *)epc;
- if (__read_insn(regs, insn, insn_addr))
+ if (__read_insn(regs, insn, epc, u16))
return -EFAULT;
/* __get_user() uses regular "lw" which sign extend the loaded
* value make sure to clear higher order bits in case we "or" it
@@ -369,16 +303,14 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
*r_insn = insn;
return 0;
}
- insn_addr++;
- if (__read_insn(regs, tmp, insn_addr))
+ epc += sizeof(u16);
+ if (__read_insn(regs, tmp, epc, u16))
return -EFAULT;
*r_insn = (tmp << 16) | insn;
return 0;
} else {
- u32 __user *insn_addr = (u32 __user *)epc;
-
- if (__read_insn(regs, insn, insn_addr))
+ if (__read_insn(regs, insn, epc, u32))
return -EFAULT;
if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) {
*r_insn = insn;
@@ -390,7 +322,6 @@ static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
return 0;
}
}
-#endif
union reg_data {
u8 data_bytes[8];
@@ -398,22 +329,47 @@ union reg_data {
u64 data_u64;
};
-static bool unaligned_ctl __read_mostly;
-
/* sysctl hooks */
int unaligned_enabled __read_mostly = 1; /* Enabled by default */
-int handle_misaligned_load(struct pt_regs *regs)
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+ unsigned long epc = regs->epc;
+ unsigned long insn;
+
+ if (get_insn(regs, epc, &insn))
+ return -1;
+
+ /* Only return 0 when in check_vector_unaligned_access_emulated */
+ if (*this_cpu_ptr(&vector_misaligned_access) == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+ *this_cpu_ptr(&vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+ regs->epc = epc + INSN_LEN(insn);
+ return 0;
+ }
+
+ /* If vector instruction we don't emulate it yet */
+ regs->epc = epc;
+ return -1;
+}
+#else
+static int handle_vector_misaligned_load(struct pt_regs *regs)
+{
+ return -1;
+}
+#endif
+
+static int handle_scalar_misaligned_load(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
unsigned long insn;
unsigned long addr = regs->badaddr;
- int i, fp = 0, shift = 0, len = 0;
+ int fp = 0, shift = 0, len = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
- *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+ *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED;
if (!unaligned_enabled)
return -1;
@@ -481,6 +437,13 @@ int handle_misaligned_load(struct pt_regs *regs)
fp = 1;
len = 4;
#endif
+ } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) {
+ len = 2;
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) {
+ len = 2;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
} else {
regs->epc = epc;
return -1;
@@ -490,9 +453,11 @@ int handle_misaligned_load(struct pt_regs *regs)
return -EOPNOTSUPP;
val.data_u64 = 0;
- for (i = 0; i < len; i++) {
- if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
+ if (user_mode(regs)) {
+ if (copy_from_user_nofault(&val, (u8 __user *)addr, len))
return -1;
+ } else {
+ memcpy(&val, (u8 *)addr, len);
}
if (!fp)
@@ -507,13 +472,13 @@ int handle_misaligned_load(struct pt_regs *regs)
return 0;
}
-int handle_misaligned_store(struct pt_regs *regs)
+static int handle_scalar_misaligned_store(struct pt_regs *regs)
{
union reg_data val;
unsigned long epc = regs->epc;
unsigned long insn;
unsigned long addr = regs->badaddr;
- int i, len = 0, fp = 0;
+ int len = 0, fp = 0;
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
@@ -578,6 +543,9 @@ int handle_misaligned_store(struct pt_regs *regs)
len = 4;
val.data_ulong = GET_F32_RS2C(insn, regs);
#endif
+ } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
+ len = 2;
+ val.data_ulong = GET_RS2S(insn, regs);
} else {
regs->epc = epc;
return -1;
@@ -586,9 +554,11 @@ int handle_misaligned_store(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_FPU) && fp)
return -EOPNOTSUPP;
- for (i = 0; i < len; i++) {
- if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
+ if (user_mode(regs)) {
+ if (copy_to_user_nofault((u8 __user *)addr, &val, len))
return -1;
+ } else {
+ memcpy((u8 *)addr, &val, len);
}
regs->epc = epc + INSN_LEN(insn);
@@ -596,52 +566,225 @@ int handle_misaligned_store(struct pt_regs *regs)
return 0;
}
-bool check_unaligned_access_emulated(int cpu)
+int handle_misaligned_load(struct pt_regs *regs)
+{
+ unsigned long epc = regs->epc;
+ unsigned long insn;
+
+ if (IS_ENABLED(CONFIG_RISCV_VECTOR_MISALIGNED)) {
+ if (get_insn(regs, epc, &insn))
+ return -1;
+
+ if (insn_is_vector(insn))
+ return handle_vector_misaligned_load(regs);
+ }
+
+ if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+ return handle_scalar_misaligned_load(regs);
+
+ return -1;
+}
+
+int handle_misaligned_store(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_RISCV_SCALAR_MISALIGNED))
+ return handle_scalar_misaligned_store(regs);
+
+ return -1;
+}
+
+#ifdef CONFIG_RISCV_VECTOR_MISALIGNED
+void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused)
+{
+ long *mas_ptr = this_cpu_ptr(&vector_misaligned_access);
+ unsigned long tmp_var;
+
+ *mas_ptr = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+ kernel_vector_begin();
+ /*
+ * In pre-13.0.0 versions of GCC, vector registers cannot appear in
+ * the clobber list. This inline asm clobbers v0, but since we do not
+ * currently build the kernel with V enabled, the v0 clobber arg is not
+ * needed (as the compiler will not emit vector code itself). If the kernel
+ * is changed to build with V enabled, the clobber arg will need to be
+ * added here.
+ */
+ __asm__ __volatile__ (
+ ".balign 4\n\t"
+ ".option push\n\t"
+ ".option arch, +zve32x\n\t"
+ " vsetivli zero, 1, e16, m1, ta, ma\n\t" // Vectors of 16b
+ " vle16.v v0, (%[ptr])\n\t" // Load bytes
+ ".option pop\n\t"
+ : : [ptr] "r" ((u8 *)&tmp_var + 1));
+ kernel_vector_end();
+}
+
+bool __init check_vector_unaligned_access_emulated_all_cpus(void)
+{
+ int cpu;
+
+ /*
+ * While being documented as very slow, schedule_on_each_cpu() is used since
+ * kernel_vector_begin() expects irqs to be enabled or it will panic()
+ */
+ schedule_on_each_cpu(check_vector_unaligned_access_emulated);
+
+ for_each_online_cpu(cpu)
+ if (per_cpu(vector_misaligned_access, cpu)
+ == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return false;
+
+ return true;
+}
+#else
+bool __init check_vector_unaligned_access_emulated_all_cpus(void)
+{
+ return false;
+}
+#endif
+
+static bool all_cpus_unaligned_scalar_access_emulated(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ if (per_cpu(misaligned_access_speed, cpu) !=
+ RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED)
+ return false;
+
+ return true;
+}
+
+#ifdef CONFIG_RISCV_SCALAR_MISALIGNED
+
+static bool unaligned_ctl __read_mostly;
+
+static void check_unaligned_access_emulated(void *arg __always_unused)
{
+ int cpu = smp_processor_id();
long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
unsigned long tmp_var, tmp_val;
- bool misaligned_emu_detected;
- *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
__asm__ __volatile__ (
" "REG_L" %[tmp], 1(%[ptr])\n"
: [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+}
+
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+
+ check_unaligned_access_emulated(NULL);
- misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
/*
* If unaligned_ctl is already set, this means that we detected that all
* CPUS uses emulated misaligned access at boot time. If that changed
* when hotplugging the new cpu, this is something we don't handle.
*/
- if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+ if (unlikely(unaligned_ctl && (*mas_ptr != RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED))) {
pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
- while (true)
- cpu_relax();
+ return -EINVAL;
}
- return misaligned_emu_detected;
+ return 0;
}
-void unaligned_emulation_finish(void)
+bool __init check_unaligned_access_emulated_all_cpus(void)
{
- int cpu;
-
/*
* We can only support PR_UNALIGN controls if all CPUs have misaligned
* accesses emulated since tasks requesting such control can run on any
* CPU.
*/
- for_each_present_cpu(cpu) {
- if (per_cpu(misaligned_access_speed, cpu) !=
- RISCV_HWPROBE_MISALIGNED_EMULATED) {
- return;
- }
- }
+ on_each_cpu(check_unaligned_access_emulated, NULL, 1);
+
+ if (!all_cpus_unaligned_scalar_access_emulated())
+ return false;
+
unaligned_ctl = true;
+ return true;
}
bool unaligned_ctl_available(void)
{
return unaligned_ctl;
}
+#else
+bool __init check_unaligned_access_emulated_all_cpus(void)
+{
+ return false;
+}
+static int cpu_online_check_unaligned_access_emulated(unsigned int cpu)
+{
+ return 0;
+}
+#endif
+
+static bool misaligned_traps_delegated;
+
+#ifdef CONFIG_RISCV_SBI
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu)
+{
+ if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) &&
+ misaligned_traps_delegated) {
+ pr_crit("Misaligned trap delegation non homogeneous (expected delegated)");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void __init unaligned_access_init(void)
+{
+ int ret;
+
+ ret = sbi_fwft_set_online_cpus(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0);
+ if (ret)
+ return;
+
+ misaligned_traps_delegated = true;
+ pr_info("SBI misaligned access exception delegation ok\n");
+ /*
+ * Note that we don't have to take any specific action here, if
+ * the delegation is successful, then
+ * check_unaligned_access_emulated() will verify that indeed the
+ * platform traps on misaligned accesses.
+ */
+}
+#else
+void __init unaligned_access_init(void) {}
+
+static int cpu_online_sbi_unaligned_setup(unsigned int cpu __always_unused)
+{
+ return 0;
+}
+
+#endif
+
+int cpu_online_unaligned_access_init(unsigned int cpu)
+{
+ int ret;
+
+ ret = cpu_online_sbi_unaligned_setup(cpu);
+ if (ret)
+ return ret;
+
+ return cpu_online_check_unaligned_access_emulated(cpu);
+}
+
+bool misaligned_traps_can_delegate(void)
+{
+ /*
+ * Either we successfully requested misaligned traps delegation for all
+ * CPUs, or the SBI does not implement the FWFT extension but delegated
+ * the exception by default.
+ */
+ return misaligned_traps_delegated ||
+ all_cpus_unaligned_scalar_access_emulated();
+}
+EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate);
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
new file mode 100644
index 000000000000..ae2068425fbc
--- /dev/null
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+#include <asm/vector.h>
+
+#include "copy-unaligned.h"
+
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
+DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
+DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+
+static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
+static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
+
+static cpumask_t fast_misaligned_access;
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+static int check_unaligned_access(void *param)
+{
+ int cpu = smp_processor_id();
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page = param;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW;
+
+ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN)
+ return 0;
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+ /* Do a warmup. */
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ preempt_disable();
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ mb();
+ __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ preempt_enable();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+ cpu);
+
+ return 0;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow");
+
+ per_cpu(misaligned_access_speed, cpu) = speed;
+
+ /*
+ * Set the value of fast_misaligned_access of a CPU. These operations
+ * are atomic to avoid race conditions.
+ */
+ if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST)
+ cpumask_set_cpu(cpu, &fast_misaligned_access);
+ else
+ cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
+ return 0;
+}
+
+static void __init check_unaligned_access_nonboot_cpu(void *param)
+{
+ unsigned int cpu = smp_processor_id();
+ struct page **pages = param;
+
+ if (smp_processor_id() != 0)
+ check_unaligned_access(pages[cpu]);
+}
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static void __init check_unaligned_access_speed_all_cpus(void)
+{
+ unsigned int cpu;
+ unsigned int cpu_count = num_possible_cpus();
+ struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
+
+ if (!bufs) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ return;
+ }
+
+ /*
+ * Allocate separate buffers for each CPU so there's no fighting over
+ * cache lines.
+ */
+ for_each_cpu(cpu, cpu_online_mask) {
+ bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!bufs[cpu]) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ goto out;
+ }
+ }
+
+ /* Check everybody except 0, who stays behind to tend jiffies. */
+ on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+ /* Check core 0. */
+ smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+out:
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (bufs[cpu])
+ __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+ }
+
+ kfree(bufs);
+}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static void __init check_unaligned_access_speed_all_cpus(void)
+{
+}
+#endif
+
+DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+ if (cpumask_weight(mask) == weight)
+ static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
+ else
+ static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+ /*
+ * Same as set_unaligned_access_static_branches, except excludes the
+ * given CPU from the result. When a CPU is hotplugged into an offline
+ * state, this function is called before the CPU is set to offline in
+ * the cpumask, and thus the CPU needs to be explicitly excluded.
+ */
+
+ cpumask_t fast_except_me;
+
+ cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+ cpumask_clear_cpu(cpu, &fast_except_me);
+
+ modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+ /*
+ * This will be called after check_unaligned_access_all_cpus so the
+ * result of unaligned access speed for all CPUs will be available.
+ *
+ * To avoid the number of online cpus changing between reading
+ * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+ * held before calling this function.
+ */
+
+ cpumask_t fast_and_online;
+
+ cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+ modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int __init lock_and_set_unaligned_access_static_branch(void)
+{
+ cpus_read_lock();
+ set_unaligned_access_static_branches();
+ cpus_read_unlock();
+
+ return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
+static int riscv_online_cpu(unsigned int cpu)
+{
+ int ret = cpu_online_unaligned_access_init(cpu);
+
+ if (ret)
+ return ret;
+
+ /* We are already set since the last check */
+ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
+ goto exit;
+ } else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
+ per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param;
+ goto exit;
+ }
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+ {
+ static struct page *buf;
+
+ buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!buf) {
+ pr_warn("Allocation failure, not measuring misaligned performance\n");
+ return -ENOMEM;
+ }
+
+ check_unaligned_access(buf);
+ __free_pages(buf, MISALIGNED_BUFFER_ORDER);
+ }
+#endif
+
+exit:
+ set_unaligned_access_static_branches();
+
+ return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+ set_unaligned_access_static_branches_except_cpu(cpu);
+
+ return 0;
+}
+
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+static void check_vector_unaligned_access(struct work_struct *work __always_unused)
+{
+ int cpu = smp_processor_id();
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
+
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return;
+
+ page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!page) {
+ pr_warn("Allocation failure, not measuring vector misaligned performance\n");
+ return;
+ }
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+
+ /* Do a warmup. */
+ kernel_vector_begin();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ kernel_vector_end();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n",
+ cpu);
+
+ goto free;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow");
+
+ per_cpu(vector_misaligned_access, cpu) = speed;
+
+free:
+ __free_pages(page, MISALIGNED_BUFFER_ORDER);
+}
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ schedule_on_each_cpu(check_vector_unaligned_access);
+
+ return 0;
+}
+#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
+static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ return 0;
+}
+#endif
+
+static int riscv_online_cpu_vec(unsigned int cpu)
+{
+ if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+ per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
+ return 0;
+ }
+
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return 0;
+
+ check_vector_unaligned_access_emulated(NULL);
+ check_vector_unaligned_access(NULL);
+#endif
+
+ return 0;
+}
+
+static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" };
+
+static int __init set_unaligned_scalar_speed_param(char *str)
+{
+ if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW]))
+ unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW;
+ else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST]))
+ unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST;
+ else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED]))
+ unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED;
+ else
+ return -EINVAL;
+
+ return 1;
+}
+__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param);
+
+static int __init set_unaligned_vector_speed_param(char *str)
+{
+ if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW]))
+ unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
+ else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST]))
+ unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+ else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED]))
+ unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+ else
+ return -EINVAL;
+
+ return 1;
+}
+__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param);
+
+static int __init check_unaligned_access_all_cpus(void)
+{
+ int cpu;
+
+ unaligned_access_init();
+
+ if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
+ pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n",
+ speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param);
+ for_each_online_cpu(cpu)
+ per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param;
+ } else if (!check_unaligned_access_emulated_all_cpus()) {
+ check_unaligned_access_speed_all_cpus();
+ }
+
+ if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+ if (!has_vector() &&
+ unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) {
+ pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n",
+ speed_str[unaligned_vector_speed_param]);
+ } else {
+ pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n",
+ speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param);
+ }
+ }
+
+ if (!has_vector())
+ unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
+
+ if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
+ for_each_online_cpu(cpu)
+ per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
+ } else if (!check_vector_unaligned_access_emulated_all_cpus() &&
+ IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
+ kthread_run(vec_check_unaligned_access_speed_all_cpus,
+ NULL, "vec_check_unaligned_access_speed_all_cpus");
+ }
+
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu, riscv_offline_cpu);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu_vec, NULL);
+
+ return 0;
+}
+
+arch_initcall(check_unaligned_access_all_cpus);
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 98315b98256d..3a8e038b10a2 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -13,33 +13,17 @@
#include <linux/err.h>
#include <asm/page.h>
#include <asm/vdso.h>
-#include <linux/time_namespace.h>
+#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
#include <vdso/vsyscall.h>
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
-enum rv_vdso_map {
- RV_VDSO_MAP_VVAR,
- RV_VDSO_MAP_VDSO,
-};
-
-#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
-
-static union vdso_data_store vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = vdso_data_store.data;
+#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT)
struct __vdso_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
unsigned long vdso_pages;
- /* Data Mapping */
- struct vm_special_mapping *dm;
/* Code Mapping */
struct vm_special_mapping *cm;
};
@@ -86,112 +70,29 @@ static void __init __vdso_init(struct __vdso_info *vdso_info)
vdso_info->cm->pages = vdso_pagelist;
}
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-/*
- * The vvar mapping contains data for a specific time namespace, so when a task
- * changes namespace we must unmap its vvar data for the old namespace.
- * Subsequent faults will map in data for the new namespace.
- *
- * For more details see timens_setup_vdso_data().
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
-
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, vdso_info.dm))
- zap_vma_pages(vma);
-#ifdef CONFIG_COMPAT
- if (vma_is_special_mapping(vma, compat_vdso_info.dm))
- zap_vma_pages(vma);
-#endif
- }
-
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long pfn;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- if (timens_page)
- pfn = page_to_pfn(timens_page);
- else
- pfn = sym_to_pfn(vdso_data);
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = sym_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
-
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
-static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
- [RV_VDSO_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
- [RV_VDSO_MAP_VDSO] = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
- },
+static struct vm_special_mapping rv_vdso_map __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
};
static struct __vdso_info vdso_info __ro_after_init = {
.name = "vdso",
.vdso_code_start = vdso_start,
.vdso_code_end = vdso_end,
- .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR],
- .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO],
+ .cm = &rv_vdso_map,
};
#ifdef CONFIG_COMPAT
-static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = {
- [RV_VDSO_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
- [RV_VDSO_MAP_VDSO] = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
- },
+static struct vm_special_mapping rv_compat_vdso_map __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
};
static struct __vdso_info compat_vdso_info __ro_after_init = {
.name = "compat_vdso",
.vdso_code_start = compat_vdso_start,
.vdso_code_end = compat_vdso_end,
- .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR],
- .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO],
+ .cm = &rv_compat_vdso_map,
};
#endif
@@ -214,7 +115,7 @@ static int __setup_additional_pages(struct mm_struct *mm,
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
void *ret;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
@@ -226,8 +127,7 @@ static int __setup_additional_pages(struct mm_struct *mm,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
- (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm);
+ ret = vdso_install_vvar_mapping(mm, vdso_base);
if (IS_ERR(ret))
goto up_fail;
@@ -236,7 +136,7 @@ static int __setup_additional_pages(struct mm_struct *mm,
ret =
_install_special_mapping(mm, vdso_base, vdso_text_len,
- (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+ (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_SEALED_SYSMAP),
vdso_info->cm);
if (IS_ERR(ret))
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 9b517fe1b8a8..9ebb5e590f93 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -2,7 +2,7 @@
# Copied from arch/tile/kernel/vdso/Makefile
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
# Symbols present in the vdso
vdso-syms = rt_sigreturn
ifdef CONFIG_64BIT
@@ -13,16 +13,29 @@ vdso-syms += flush_icache
vdso-syms += hwprobe
vdso-syms += sys_hwprobe
+ifdef CONFIG_VDSO_GETRANDOM
+vdso-syms += getrandom
+endif
+
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ifdef CONFIG_VDSO_GETRANDOM
+obj-vdso += vgetrandom-chacha.o
+endif
+
ccflags-y := -fno-stack-protector
ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -fno-builtin
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
endif
+ifneq ($(c-getrandom-y),)
+ CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y)
+endif
+
CFLAGS_hwprobe.o += -fPIC
# Build rules
@@ -37,21 +50,17 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
-
-# Disable profiling and instrumentation for VDSO code
-GCOV_PROFILE := n
-KCOV_INSTRUMENT := n
-KASAN_SANITIZE := n
-UBSAN_SANITIZE := n
+CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Force dependency
$(obj)/vdso.o: $(obj)/vdso.so
# link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,vdsold)
-LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \
- --build-id=sha1 --hash-style=both --eh-frame-hdr
+ $(call if_changed,vdsold_and_check)
+LDFLAGS_vdso.so.dbg = -shared -soname=linux-vdso.so.1 \
+ --build-id=sha1 --eh-frame-hdr
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -59,7 +68,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
@@ -69,7 +78,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
# actual build commands
# The DSO images are built using a special linker script
# Make sure only to export the intended __vdso_xxx symbol offsets.
-quiet_cmd_vdsold = VDSOLD $@
- cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
+quiet_cmd_vdsold_and_check = VDSOLD $@
+ cmd_vdsold_and_check = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
$(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
- rm $@.tmp
+ rm $@.tmp && \
+ $(cmd_vdso_check)
diff --git a/arch/riscv/kernel/vdso/getrandom.c b/arch/riscv/kernel/vdso/getrandom.c
new file mode 100644
index 000000000000..f21922e8cebd
--- /dev/null
+++ b/arch/riscv/kernel/vdso/getrandom.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index 1e926e4b5881..2ddeba6c68dd 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -16,8 +16,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
size_t cpusetsize, unsigned long *cpus,
unsigned int flags)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
- const struct arch_vdso_data *avd = &vd->arch_data;
+ const struct vdso_arch_data *avd = &vdso_u_arch_data;
bool all_cpus = !cpusetsize && !cpus;
struct riscv_hwprobe *p = pairs;
struct riscv_hwprobe *end = pairs + pair_count;
@@ -51,8 +50,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
size_t cpusetsize, unsigned long *cpus,
unsigned int flags)
{
- const struct vdso_data *vd = __arch_get_vdso_data();
- const struct arch_vdso_data *avd = &vd->arch_data;
+ const struct vdso_arch_data *avd = &vdso_u_arch_data;
struct riscv_hwprobe *p = pairs;
struct riscv_hwprobe *end = pairs + pair_count;
unsigned char *c = (unsigned char *)cpus;
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index cbe2a179331d..7c15b0f4ee3b 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -4,15 +4,14 @@
*/
#include <asm/page.h>
#include <asm/vdso.h>
+#include <vdso/datapage.h>
OUTPUT_ARCH(riscv)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
+ VDSO_VVAR_SYMS
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -81,6 +80,9 @@ VERSION
#ifndef COMPAT_VDSO
__vdso_riscv_hwprobe;
#endif
+#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO)
+ __vdso_getrandom;
+#endif
local: *;
};
}
diff --git a/arch/riscv/kernel/vdso/vgetrandom-chacha.S b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..5f0dad8f2373
--- /dev/null
+++ b/arch/riscv/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ *
+ * Based on arch/loongarch/vdso/vgetrandom-chacha.S.
+ */
+
+#include <asm/asm.h>
+#include <linux/linkage.h>
+
+.text
+
+.macro ROTRI rd rs imm
+ slliw t0, \rs, 32 - \imm
+ srliw \rd, \rs, \imm
+ or \rd, \rd, t0
+.endm
+
+.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
+ \op \d0, \d0, \s0
+ \op \d1, \d1, \s1
+ \op \d2, \d2, \s2
+ \op \d3, \d3, \s3
+.endm
+
+/*
+ * a0: output bytes
+ * a1: 32-byte key input
+ * a2: 8-byte counter input/output
+ * a3: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+#define output a0
+#define key a1
+#define counter a2
+#define nblocks a3
+#define i a4
+#define state0 s0
+#define state1 s1
+#define state2 s2
+#define state3 s3
+#define state4 s4
+#define state5 s5
+#define state6 s6
+#define state7 s7
+#define state8 s8
+#define state9 s9
+#define state10 s10
+#define state11 s11
+#define state12 a5
+#define state13 a6
+#define state14 a7
+#define state15 t1
+#define cnt t2
+#define copy0 t3
+#define copy1 t4
+#define copy2 t5
+#define copy3 t6
+
+/* Packs to be used with OP_4REG */
+#define line0 state0, state1, state2, state3
+#define line1 state4, state5, state6, state7
+#define line2 state8, state9, state10, state11
+#define line3 state12, state13, state14, state15
+
+#define line1_perm state5, state6, state7, state4
+#define line2_perm state10, state11, state8, state9
+#define line3_perm state15, state12, state13, state14
+
+#define copy copy0, copy1, copy2, copy3
+
+#define _16 16, 16, 16, 16
+#define _20 20, 20, 20, 20
+#define _24 24, 24, 24, 24
+#define _25 25, 25, 25, 25
+
+ /*
+ * The ABI requires s0-s9 saved.
+ * This does not violate the stack-less requirement: no sensitive data
+ * is spilled onto the stack.
+ */
+ addi sp, sp, -12*SZREG
+ REG_S s0, (sp)
+ REG_S s1, SZREG(sp)
+ REG_S s2, 2*SZREG(sp)
+ REG_S s3, 3*SZREG(sp)
+ REG_S s4, 4*SZREG(sp)
+ REG_S s5, 5*SZREG(sp)
+ REG_S s6, 6*SZREG(sp)
+ REG_S s7, 7*SZREG(sp)
+ REG_S s8, 8*SZREG(sp)
+ REG_S s9, 9*SZREG(sp)
+ REG_S s10, 10*SZREG(sp)
+ REG_S s11, 11*SZREG(sp)
+
+ ld cnt, (counter)
+
+ li copy0, 0x61707865
+ li copy1, 0x3320646e
+ li copy2, 0x79622d32
+ li copy3, 0x6b206574
+
+.Lblock:
+ /* state[0,1,2,3] = "expand 32-byte k" */
+ mv state0, copy0
+ mv state1, copy1
+ mv state2, copy2
+ mv state3, copy3
+
+ /* state[4,5,..,11] = key */
+ lw state4, (key)
+ lw state5, 4(key)
+ lw state6, 8(key)
+ lw state7, 12(key)
+ lw state8, 16(key)
+ lw state9, 20(key)
+ lw state10, 24(key)
+ lw state11, 28(key)
+
+ /* state[12,13] = counter */
+ mv state12, cnt
+ srli state13, cnt, 32
+
+ /* state[14,15] = 0 */
+ mv state14, zero
+ mv state15, zero
+
+ li i, 10
+.Lpermute:
+ /* odd round */
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _16
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _20
+
+ OP_4REG addw line0, line1
+ OP_4REG xor line3, line0
+ OP_4REG ROTRI line3, _24
+
+ OP_4REG addw line2, line3
+ OP_4REG xor line1, line2
+ OP_4REG ROTRI line1, _25
+
+ /* even round */
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _16
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _20
+
+ OP_4REG addw line0, line1_perm
+ OP_4REG xor line3_perm, line0
+ OP_4REG ROTRI line3_perm, _24
+
+ OP_4REG addw line2_perm, line3_perm
+ OP_4REG xor line1_perm, line2_perm
+ OP_4REG ROTRI line1_perm, _25
+
+ addi i, i, -1
+ bnez i, .Lpermute
+
+ /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
+ OP_4REG addw line0, copy
+ sw state0, (output)
+ sw state1, 4(output)
+ sw state2, 8(output)
+ sw state3, 12(output)
+
+ /* from now on state[0,1,2,3] are scratch registers */
+
+ /* state[0,1,2,3] = lo(key) */
+ lw state0, (key)
+ lw state1, 4(key)
+ lw state2, 8(key)
+ lw state3, 12(key)
+
+ /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
+ OP_4REG addw line1, line0
+ sw state4, 16(output)
+ sw state5, 20(output)
+ sw state6, 24(output)
+ sw state7, 28(output)
+
+ /* state[0,1,2,3] = hi(key) */
+ lw state0, 16(key)
+ lw state1, 20(key)
+ lw state2, 24(key)
+ lw state3, 28(key)
+
+ /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
+ OP_4REG addw line2, line0
+ sw state8, 32(output)
+ sw state9, 36(output)
+ sw state10, 40(output)
+ sw state11, 44(output)
+
+ /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
+ addw state12, state12, cnt
+ srli state0, cnt, 32
+ addw state13, state13, state0
+ sw state12, 48(output)
+ sw state13, 52(output)
+ sw state14, 56(output)
+ sw state15, 60(output)
+
+ /* ++counter */
+ addi cnt, cnt, 1
+
+ /* output += 64 */
+ addi output, output, 64
+ /* --nblocks */
+ addi nblocks, nblocks, -1
+ bnez nblocks, .Lblock
+
+ /* counter = [cnt_lo, cnt_hi] */
+ sd cnt, (counter)
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these
+ * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and
+ * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
+ * only need to zero state[12,...,15].
+ */
+ mv state12, zero
+ mv state13, zero
+ mv state14, zero
+ mv state15, zero
+
+ REG_L s0, (sp)
+ REG_L s1, SZREG(sp)
+ REG_L s2, 2*SZREG(sp)
+ REG_L s3, 3*SZREG(sp)
+ REG_L s4, 4*SZREG(sp)
+ REG_L s5, 5*SZREG(sp)
+ REG_L s6, 6*SZREG(sp)
+ REG_L s7, 7*SZREG(sp)
+ REG_L s8, 8*SZREG(sp)
+ REG_L s9, 9*SZREG(sp)
+ REG_L s10, 10*SZREG(sp)
+ REG_L s11, 11*SZREG(sp)
+ addi sp, sp, 12*SZREG
+
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/riscv/kernel/vec-copy-unaligned.S b/arch/riscv/kernel/vec-copy-unaligned.S
new file mode 100644
index 000000000000..7ce4de6f6e69
--- /dev/null
+++ b/arch/riscv/kernel/vec-copy-unaligned.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2024 Rivos Inc. */
+
+#include <linux/args.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ .text
+
+#define WORD_EEW 32
+
+#define WORD_SEW CONCATENATE(e, WORD_EEW)
+#define VEC_L CONCATENATE(vle, WORD_EEW).v
+#define VEC_S CONCATENATE(vse, WORD_EEW).v
+
+/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using word loads and stores. */
+/* Note: The size is truncated to a multiple of WORD_EEW */
+SYM_FUNC_START(__riscv_copy_vec_words_unaligned)
+ andi a4, a2, ~(WORD_EEW-1)
+ beqz a4, 2f
+ add a3, a1, a4
+ .option push
+ .option arch, +zve32x
+1:
+ vsetivli t0, 8, WORD_SEW, m8, ta, ma
+ VEC_L v0, (a1)
+ VEC_S v0, (a0)
+ addi a0, a0, WORD_EEW
+ addi a1, a1, WORD_EEW
+ bltu a1, a3, 1b
+
+2:
+ .option pop
+ ret
+SYM_FUNC_END(__riscv_copy_vec_words_unaligned)
+
+/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using only byte accesses. */
+/* Note: The size is truncated to a multiple of 8 */
+SYM_FUNC_START(__riscv_copy_vec_bytes_unaligned)
+ andi a4, a2, ~(8-1)
+ beqz a4, 2f
+ add a3, a1, a4
+ .option push
+ .option arch, +zve32x
+1:
+ vsetivli t0, 8, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vse8.v v0, (a0)
+ addi a0, a0, 8
+ addi a1, a1, 8
+ bltu a1, a3, 1b
+
+2:
+ .option pop
+ ret
+SYM_FUNC_END(__riscv_copy_vec_bytes_unaligned)
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 6727d1d3b8f2..184f780c932d 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -33,7 +33,17 @@ int riscv_v_setup_vsize(void)
{
unsigned long this_vsize;
- /* There are 32 vector registers with vlenb length. */
+ /*
+ * There are 32 vector registers with vlenb length.
+ *
+ * If the thead,vlenb property was provided by the firmware, use that
+ * instead of probing the CSRs.
+ */
+ if (thead_vlenb_of) {
+ riscv_v_vsize = thead_vlenb_of * 32;
+ return 0;
+ }
+
riscv_v_enable();
this_vsize = csr_read(CSR_VLENB) * 32;
riscv_v_disable();
@@ -53,7 +63,7 @@ int riscv_v_setup_vsize(void)
void __init riscv_v_setup_ctx_cache(void)
{
- if (!has_vector())
+ if (!(has_vector() || has_xtheadvector()))
return;
riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
@@ -66,7 +76,7 @@ void __init riscv_v_setup_ctx_cache(void)
#endif
}
-static bool insn_is_vector(u32 insn_buf)
+bool insn_is_vector(u32 insn_buf)
{
u32 opcode = insn_buf & __INSN_OPCODE_MASK;
u32 width, csr;
@@ -173,8 +183,11 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
u32 __user *epc = (u32 __user *)regs->epc;
u32 insn = (u32)regs->badaddr;
+ if (!(has_vector() || has_xtheadvector()))
+ return false;
+
/* Do not handle if V is not supported, or disabled */
- if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
+ if (!riscv_v_vstate_ctrl_user_allowed())
return false;
/* If V has been enabled then it is not the first-use trap */
@@ -213,7 +226,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
bool inherit;
int cur, next;
- if (!has_vector())
+ if (!(has_vector() || has_xtheadvector()))
return;
next = riscv_v_ctrl_get_next(tsk);
@@ -235,7 +248,7 @@ void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
long riscv_v_vstate_ctrl_get_current(void)
{
- if (!has_vector())
+ if (!(has_vector() || has_xtheadvector()))
return -EINVAL;
return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK;
@@ -246,7 +259,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg)
bool inherit;
int cur, next;
- if (!has_vector())
+ if (!(has_vector() || has_xtheadvector()))
return -EINVAL;
if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK)
@@ -284,7 +297,7 @@ long riscv_v_vstate_ctrl_set_current(unsigned long arg)
#ifdef CONFIG_SYSCTL
-static struct ctl_table riscv_v_default_vstate_table[] = {
+static const struct ctl_table riscv_v_default_vstate_table[] = {
{
.procname = "riscv_v_default_allow",
.data = &riscv_v_implicit_uacc,
@@ -296,7 +309,7 @@ static struct ctl_table riscv_v_default_vstate_table[] = {
static int __init riscv_v_sysctl_init(void)
{
- if (has_vector())
+ if (has_vector() || has_xtheadvector())
if (!register_sysctl("abi", riscv_v_default_vstate_table))
return -EINVAL;
return 0;
@@ -306,7 +319,7 @@ static int __init riscv_v_sysctl_init(void)
static int __init riscv_v_sysctl_init(void) { return 0; }
#endif /* ! CONFIG_SYSCTL */
-static int riscv_v_init(void)
+static int __init riscv_v_init(void)
{
return riscv_v_sysctl_init();
}
diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c
new file mode 100644
index 000000000000..92d8ff81f42c
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos, Inc
+ */
+
+#include <asm/vendorid_list.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+#include <asm/vendor_extensions/sifive.h>
+#include <asm/vendor_extensions/thead.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+struct riscv_isa_vendor_ext_data_list *riscv_isa_vendor_ext_list[] = {
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
+ &riscv_isa_vendor_ext_list_andes,
+#endif
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ &riscv_isa_vendor_ext_list_sifive,
+#endif
+#ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+ &riscv_isa_vendor_ext_list_thead,
+#endif
+};
+
+const size_t riscv_isa_vendor_ext_list_size = ARRAY_SIZE(riscv_isa_vendor_ext_list);
+
+/**
+ * __riscv_isa_vendor_extension_available() - Check whether given vendor
+ * extension is available or not.
+ *
+ * @cpu: check if extension is available on this cpu
+ * @vendor: vendor that the extension is a member of
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: When cpu is -1, will check if extension is available on all cpus
+ */
+bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsigned int bit)
+{
+ struct riscv_isavendorinfo *bmap;
+ struct riscv_isavendorinfo *cpu_bmap;
+
+ switch (vendor) {
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES
+ case ANDES_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap;
+ cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap;
+ break;
+ #endif
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE
+ case SIFIVE_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_sifive.all_harts_isa_bitmap;
+ cpu_bmap = riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap;
+ break;
+ #endif
+ #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_THEAD
+ case THEAD_VENDOR_ID:
+ bmap = &riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap;
+ cpu_bmap = riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap;
+ break;
+ #endif
+ default:
+ return false;
+ }
+
+ if (cpu != -1)
+ bmap = &cpu_bmap[cpu];
+
+ if (bit >= RISCV_ISA_VENDOR_EXT_MAX)
+ return false;
+
+ return test_bit(bit, bmap->isa);
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_vendor_extension_available);
diff --git a/arch/riscv/kernel/vendor_extensions/Makefile b/arch/riscv/kernel/vendor_extensions/Makefile
new file mode 100644
index 000000000000..a4eca96d1c8a
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_ANDES) += andes.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_SIFIVE) += sifive_hwprobe.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead.o
+obj-$(CONFIG_RISCV_ISA_VENDOR_EXT_THEAD) += thead_hwprobe.o
diff --git a/arch/riscv/kernel/vendor_extensions/andes.c b/arch/riscv/kernel/vendor_extensions/andes.c
new file mode 100644
index 000000000000..51f302b6d503
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/andes.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/andes.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All Andes vendor extensions supported in Linux */
+static const struct riscv_isa_ext_data riscv_isa_vendor_ext_andes[] = {
+ __RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_VENDOR_EXT_XANDESPMU),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_andes = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_andes),
+ .ext_data = riscv_isa_vendor_ext_andes,
+};
diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c
new file mode 100644
index 000000000000..1411337dc1e6
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/sifive.h>
+
+#include <linux/array_size.h>
+#include <linux/types.h>
+
+/* All SiFive vendor extensions supported in Linux */
+const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = {
+ __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF),
+ __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD),
+ __RISCV_ISA_EXT_DATA(xsfvqmaccqoq, RISCV_ISA_VENDOR_EXT_XSFVQMACCQOQ),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_sifive = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_sifive),
+ .ext_data = riscv_isa_vendor_ext_sifive,
+};
diff --git a/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
new file mode 100644
index 000000000000..1f77f6309763
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/sifive_hwprobe.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/vendor_extensions/sifive.h>
+#include <asm/vendor_extensions/sifive_hwprobe.h>
+#include <asm/vendor_extensions/vendor_hwprobe.h>
+
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+#include <uapi/asm/hwprobe.h>
+#include <uapi/asm/vendor/sifive.h>
+
+void hwprobe_isa_vendor_ext_sifive_0(struct riscv_hwprobe *pair, const struct cpumask *cpus)
+{
+ VENDOR_EXTENSION_SUPPORTED(pair, cpus,
+ riscv_isa_vendor_ext_list_sifive.per_hart_isa_bitmap, {
+ VENDOR_EXT_KEY(XSFVQMACCDOD);
+ VENDOR_EXT_KEY(XSFVQMACCQOQ);
+ VENDOR_EXT_KEY(XSFVFNRCLIPXFQF);
+ VENDOR_EXT_KEY(XSFVFWMACCQQQ);
+ });
+}
diff --git a/arch/riscv/kernel/vendor_extensions/thead.c b/arch/riscv/kernel/vendor_extensions/thead.c
new file mode 100644
index 000000000000..519dbf70710a
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/thead.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/cpufeature.h>
+#include <asm/vendor_extensions.h>
+#include <asm/vendor_extensions/thead.h>
+
+#include <linux/array_size.h>
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+/* All T-Head vendor extensions supported in Linux */
+static const struct riscv_isa_ext_data riscv_isa_vendor_ext_thead[] = {
+ __RISCV_ISA_EXT_DATA(xtheadvector, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR),
+};
+
+struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_thead = {
+ .ext_data_count = ARRAY_SIZE(riscv_isa_vendor_ext_thead),
+ .ext_data = riscv_isa_vendor_ext_thead,
+};
+
+void disable_xtheadvector(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap[cpu].isa);
+
+ clear_bit(RISCV_ISA_VENDOR_EXT_XTHEADVECTOR, riscv_isa_vendor_ext_list_thead.all_harts_isa_bitmap.isa);
+}
diff --git a/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c
new file mode 100644
index 000000000000..2eba34011786
--- /dev/null
+++ b/arch/riscv/kernel/vendor_extensions/thead_hwprobe.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/vendor_extensions/thead.h>
+#include <asm/vendor_extensions/thead_hwprobe.h>
+#include <asm/vendor_extensions/vendor_hwprobe.h>
+
+#include <linux/cpumask.h>
+#include <linux/types.h>
+
+#include <uapi/asm/hwprobe.h>
+#include <uapi/asm/vendor/thead.h>
+
+void hwprobe_isa_vendor_ext_thead_0(struct riscv_hwprobe *pair, const struct cpumask *cpus)
+{
+ VENDOR_EXTENSION_SUPPORTED(pair, cpus,
+ riscv_isa_vendor_ext_list_thead.per_hart_isa_bitmap, {
+ VENDOR_EXT_KEY(XTHEADVECTOR);
+ });
+}
diff --git a/arch/riscv/kernel/vmcore_info.c b/arch/riscv/kernel/vmcore_info.c
index 6d7a22522d63..d5e448aa90e7 100644
--- a/arch/riscv/kernel/vmcore_info.c
+++ b/arch/riscv/kernel/vmcore_info.c
@@ -19,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void)
#endif
#endif
vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
+#ifdef CONFIG_XIP_KERNEL
+ /* TODO: Communicate with crash-utility developers on the information to
+ * export. The XIP case is more complicated, because the virtual-physical
+ * address offset depends on whether the address is in ROM or in RAM.
+ */
+#else
vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
kernel_map.va_kernel_pa_offset);
+#endif
}
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index 8c3daa1b0531..a7611789bad5 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -14,6 +14,7 @@
#include <asm/page.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
+#include <asm/set_memory.h>
OUTPUT_ARCH(riscv)
ENTRY(_start)
@@ -65,10 +66,10 @@ SECTIONS
* From this point, stuff is considered writable and will be copied to RAM
*/
__data_loc = ALIGN(PAGE_SIZE); /* location in file */
- . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */
+ . = ALIGN(SECTION_ALIGN); /* location in memory */
#undef LOAD_OFFSET
-#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK))
+#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc)
_sdata = .; /* Start of data section */
_data = .;
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 002ca58dd998..61bd5ba6680a 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -97,6 +97,9 @@ SECTIONS
{
EXIT_DATA
}
+
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(L1_CACHE_BYTES)
.rel.dyn : {