aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig91
-rw-r--r--arch/riscv/Kconfig.socs7
-rw-r--r--arch/riscv/Makefile6
-rw-r--r--arch/riscv/boot/dts/Makefile5
-rw-r--r--arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi37
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100.dtsi10
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi33
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110.dtsi50
-rw-r--r--arch/riscv/boot/dts/thead/Makefile2
-rw-r--r--arch/riscv/boot/dts/thead/th1520-lichee-module-4a.dtsi38
-rw-r--r--arch/riscv/boot/dts/thead/th1520-lichee-pi-4a.dts32
-rw-r--r--arch/riscv/boot/dts/thead/th1520.dtsi422
-rw-r--r--arch/riscv/configs/defconfig2
-rw-r--r--arch/riscv/errata/Makefile4
-rw-r--r--arch/riscv/errata/thead/errata.c7
-rw-r--r--arch/riscv/include/asm/acenv.h11
-rw-r--r--arch/riscv/include/asm/acpi.h84
-rw-r--r--arch/riscv/include/asm/asm-extable.h6
-rw-r--r--arch/riscv/include/asm/atomic.h72
-rw-r--r--arch/riscv/include/asm/cpu.h8
-rw-r--r--arch/riscv/include/asm/cpufeature.h10
-rw-r--r--arch/riscv/include/asm/csr.h20
-rw-r--r--arch/riscv/include/asm/elf.h11
-rw-r--r--arch/riscv/include/asm/extable.h4
-rw-r--r--arch/riscv/include/asm/ftrace.h21
-rw-r--r--arch/riscv/include/asm/hugetlb.h3
-rw-r--r--arch/riscv/include/asm/hwcap.h9
-rw-r--r--arch/riscv/include/asm/insn.h29
-rw-r--r--arch/riscv/include/asm/irq.h2
-rw-r--r--arch/riscv/include/asm/irq_stack.h30
-rw-r--r--arch/riscv/include/asm/kfence.h33
-rw-r--r--arch/riscv/include/asm/kvm_aia.h107
-rw-r--r--arch/riscv/include/asm/kvm_aia_aplic.h58
-rw-r--r--arch/riscv/include/asm/kvm_aia_imsic.h38
-rw-r--r--arch/riscv/include/asm/kvm_host.h6
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h11
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_vector.h82
-rw-r--r--arch/riscv/include/asm/perf_event.h7
-rw-r--r--arch/riscv/include/asm/pgtable.h3
-rw-r--r--arch/riscv/include/asm/processor.h14
-rw-r--r--arch/riscv/include/asm/smp.h2
-rw-r--r--arch/riscv/include/asm/switch_to.h9
-rw-r--r--arch/riscv/include/asm/thread_info.h17
-rw-r--r--arch/riscv/include/asm/timex.h2
-rw-r--r--arch/riscv/include/asm/vector.h218
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h1
-rw-r--r--arch/riscv/include/uapi/asm/bitsperlong.h14
-rw-r--r--arch/riscv/include/uapi/asm/hwcap.h1
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h4
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h81
-rw-r--r--arch/riscv/include/uapi/asm/ptrace.h39
-rw-r--r--arch/riscv/include/uapi/asm/sigcontext.h20
-rw-r--r--arch/riscv/kernel/Makefile6
-rw-r--r--arch/riscv/kernel/acpi.c251
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c14
-rw-r--r--arch/riscv/kernel/cpu.c67
-rw-r--r--arch/riscv/kernel/cpufeature.c217
-rw-r--r--arch/riscv/kernel/entry.S8
-rw-r--r--arch/riscv/kernel/head.S41
-rw-r--r--arch/riscv/kernel/hibernate-asm.S5
-rw-r--r--arch/riscv/kernel/hibernate.c1
-rw-r--r--arch/riscv/kernel/irq.c68
-rw-r--r--arch/riscv/kernel/mcount.S7
-rw-r--r--arch/riscv/kernel/probes/uprobes.c2
-rw-r--r--arch/riscv/kernel/process.c20
-rw-r--r--arch/riscv/kernel/ptrace.c70
-rw-r--r--arch/riscv/kernel/setup.c14
-rw-r--r--arch/riscv/kernel/signal.c220
-rw-r--r--arch/riscv/kernel/smp.c1
-rw-r--r--arch/riscv/kernel/smpboot.c91
-rw-r--r--arch/riscv/kernel/sys_riscv.c52
-rw-r--r--arch/riscv/kernel/time.c25
-rw-r--r--arch/riscv/kernel/traps.c69
-rw-r--r--arch/riscv/kernel/vdso.c1
-rw-r--r--arch/riscv/kernel/vdso/rt_sigreturn.S2
-rw-r--r--arch/riscv/kernel/vector.c277
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S6
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S6
-rw-r--r--arch/riscv/kvm/Kconfig4
-rw-r--r--arch/riscv/kvm/Makefile4
-rw-r--r--arch/riscv/kvm/aia.c274
-rw-r--r--arch/riscv/kvm/aia_aplic.c619
-rw-r--r--arch/riscv/kvm/aia_device.c673
-rw-r--r--arch/riscv/kvm/aia_imsic.c1084
-rw-r--r--arch/riscv/kvm/main.c3
-rw-r--r--arch/riscv/kvm/tlb.c2
-rw-r--r--arch/riscv/kvm/vcpu.c29
-rw-r--r--arch/riscv/kvm/vcpu_exit.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c80
-rw-r--r--arch/riscv/kvm/vcpu_vector.c186
-rw-r--r--arch/riscv/kvm/vm.c118
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/cacheflush.c8
-rw-r--r--arch/riscv/mm/dma-noncoherent.c2
-rw-r--r--arch/riscv/mm/fault.c78
-rw-r--r--arch/riscv/mm/hugetlbpage.c40
-rw-r--r--arch/riscv/mm/init.c118
-rw-r--r--arch/riscv/net/bpf_jit.h6
-rw-r--r--arch/riscv/net/bpf_jit_core.c19
-rw-r--r--arch/riscv/purgatory/Makefile7
100 files changed, 6292 insertions, 441 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 348c0fa1fc8c..4c07b9189c86 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -12,6 +12,8 @@ config 32BIT
config RISCV
def_bool y
+ select ACPI_GENERIC_GSI if ACPI
+ select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ARCH_DMA_DEFAULT_COHERENT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
@@ -26,6 +28,7 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP if MMU
@@ -42,6 +45,7 @@ config RISCV
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
+ select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
@@ -96,11 +100,18 @@ config RISCV
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
select HAVE_CONTEXT_TRACKING_USER
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
+ select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
select HAVE_EBPF_JIT if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
@@ -110,7 +121,8 @@ config RISCV
select HAVE_KPROBES if !XIP_KERNEL
select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
select HAVE_KRETPROBES if !XIP_KERNEL
- select HAVE_RETHOOK if !XIP_KERNEL
+ # https://github.com/ClangBuiltLinux/linux/issues/1881
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
select HAVE_PCI
@@ -119,12 +131,15 @@ config RISCV
select HAVE_PERF_USER_STACK_DUMP
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RETHOOK if !XIP_KERNEL
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
@@ -142,11 +157,6 @@ config RISCV
select TRACE_IRQFLAGS_SUPPORT
select UACCESS_MEMCPY if !MMU
select ZONE_DMA32 if 64BIT
- select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
- select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
- select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
- select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
config CLANG_SUPPORTS_DYNAMIC_FTRACE
def_bool CC_IS_CLANG
@@ -262,6 +272,12 @@ config RISCV_DMA_NONCOHERENT
config AS_HAS_INSN
def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
+config AS_HAS_OPTION_ARCH
+ # https://reviews.llvm.org/D123515
+ def_bool y
+ depends on $(as-instr, .option arch$(comma) +m)
+ depends on !$(as-instr, .option arch$(comma) -i)
+
source "arch/riscv/Kconfig.socs"
source "arch/riscv/Kconfig.errata"
@@ -460,13 +476,44 @@ config RISCV_ISA_SVPBMT
If you don't know what to do here, say Y.
+config TOOLCHAIN_HAS_V
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64iv)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32iv)
+ depends on LLD_VERSION >= 140000 || LD_VERSION >= 23800
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_V
+ bool "VECTOR extension support"
+ depends on TOOLCHAIN_HAS_V
+ depends on FPU
+ select DYNAMIC_SIGFRAME
+ default y
+ help
+ Say N here if you want to disable all vector related procedure
+ in the kernel.
+
+ If you don't know what to do here, say Y.
+
+config RISCV_ISA_V_DEFAULT_ENABLE
+ bool "Enable userspace Vector by default"
+ depends on RISCV_ISA_V
+ default y
+ help
+ Say Y here if you want to enable Vector in userspace by default.
+ Otherwise, userspace has to make explicit prctl() call to enable
+ Vector, or enable it via the sysctl interface.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
- depends on AS_IS_GNU
+ depends on AS_HAS_OPTION_ARCH
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
@@ -551,6 +598,25 @@ config FPU
If you don't know what to do here, say Y.
+config IRQ_STACKS
+ bool "Independent irq & softirq stacks" if EXPERT
+ default y
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ select HAVE_SOFTIRQ_ON_OWN_STACK
+ help
+ Add independent irq & softirq stacks for percpu to prevent kernel stack
+ overflows. We may save some memory footprint by disabling IRQ_STACKS.
+
+config THREAD_SIZE_ORDER
+ int "Kernel stack size (in power-of-two numbers of page size)" if VMAP_STACK && EXPERT
+ range 0 4
+ default 1 if 32BIT && !KASAN
+ default 3 if 64BIT && KASAN
+ default 2
+ help
+ Specify the Pages of thread stack size (from 4KB to 64KB), which also
+ affects irq stack size, which is equal to thread stack size.
+
endmenu # "Platform type"
menu "Kernel features"
@@ -707,6 +773,7 @@ config EFI
depends on OF && !XIP_KERNEL
depends on MMU
default y
+ select ARCH_SUPPORTS_ACPI if 64BIT
select EFI_GENERIC_STUB
select EFI_PARAMS_FROM_FDT
select EFI_RUNTIME_WRAPPERS
@@ -799,12 +866,18 @@ menu "Power management options"
source "kernel/power/Kconfig"
+# Hibernation is only possible on systems where the SBI implementation has
+# marked its reserved memory as not accessible from, or does not run
+# from the same memory as, Linux
config ARCH_HIBERNATION_POSSIBLE
- def_bool y
+ def_bool NONPORTABLE
config ARCH_HIBERNATION_HEADER
def_bool HIBERNATION
+config ARCH_SUSPEND_POSSIBLE
+ def_bool y
+
endmenu # "Power management options"
menu "CPU Power Management"
@@ -816,3 +889,5 @@ source "drivers/cpufreq/Kconfig"
endmenu # "CPU Power Management"
source "arch/riscv/kvm/Kconfig"
+
+source "drivers/acpi/Kconfig"
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 1cf69f958f10..6833d01e2e70 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -41,6 +41,13 @@ config ARCH_SUNXI
This enables support for Allwinner sun20i platform hardware,
including boards based on the D1 and D1s SoCs.
+config ARCH_THEAD
+ bool "T-HEAD RISC-V SoCs"
+ depends on MMU && !XIP_KERNEL
+ select ERRATA_THEAD
+ help
+ This enables support for the RISC-V based T-HEAD SoCs.
+
config ARCH_VIRT
def_bool SOC_VIRT
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0fb256bf8270..6ec6d52a4180 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -60,6 +60,7 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
+riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v
ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
KBUILD_CFLAGS += -Wa,-misa-spec=2.2
@@ -71,7 +72,10 @@ endif
# Check if the toolchain supports Zihintpause extension
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
-KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
+# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
+# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
+KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
+
KBUILD_AFLAGS += -march=$(riscv-march-y)
KBUILD_CFLAGS += -mno-save-restore
diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
index f0d9f89054f8..f60a280abb15 100644
--- a/arch/riscv/boot/dts/Makefile
+++ b/arch/riscv/boot/dts/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
subdir-y += allwinner
-subdir-y += sifive
-subdir-y += starfive
subdir-y += canaan
subdir-y += microchip
subdir-y += renesas
+subdir-y += sifive
+subdir-y += starfive
+subdir-y += thead
obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
diff --git a/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi b/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi
index 922e8e0e2c09..1bb1e5cae602 100644
--- a/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi
+++ b/arch/riscv/boot/dts/allwinner/sunxi-d1s-t113.dtsi
@@ -109,6 +109,12 @@
};
/omit-if-no-ref/
+ spi0_pins: spi0-pins {
+ pins = "PC2", "PC3", "PC4", "PC5";
+ function = "spi0";
+ };
+
+ /omit-if-no-ref/
uart1_pg6_pins: uart1-pg6-pins {
pins = "PG6", "PG7";
function = "uart1";
@@ -447,6 +453,37 @@
#size-cells = <0>;
};
+ spi0: spi@4025000 {
+ compatible = "allwinner,sun20i-d1-spi",
+ "allwinner,sun50i-r329-spi";
+ reg = <0x04025000 0x1000>;
+ interrupts = <SOC_PERIPHERAL_IRQ(15) IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
+ clock-names = "ahb", "mod";
+ dmas = <&dma 22>, <&dma 22>;
+ dma-names = "rx", "tx";
+ resets = <&ccu RST_BUS_SPI0>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ spi1: spi@4026000 {
+ compatible = "allwinner,sun20i-d1-spi-dbi",
+ "allwinner,sun50i-r329-spi-dbi",
+ "allwinner,sun50i-r329-spi";
+ reg = <0x04026000 0x1000>;
+ interrupts = <SOC_PERIPHERAL_IRQ(16) IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&ccu CLK_BUS_SPI1>, <&ccu CLK_SPI1>;
+ clock-names = "ahb", "mod";
+ dmas = <&dma 23>, <&dma 23>;
+ dma-names = "rx", "tx";
+ resets = <&ccu RST_BUS_SPI1>;
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
usb_otg: usb@4100000 {
compatible = "allwinner,sun20i-d1-musb",
"allwinner,sun8i-a33-musb";
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi
index 000447482aca..4218621ea3b9 100644
--- a/arch/riscv/boot/dts/starfive/jh7100.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -238,5 +238,15 @@
#size-cells = <0>;
status = "disabled";
};
+
+ watchdog@12480000 {
+ compatible = "starfive,jh7100-wdt";
+ reg = <0x0 0x12480000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_WDTIMER_APB>,
+ <&clkgen JH7100_CLK_WDT_CORE>;
+ clock-names = "apb", "core";
+ resets = <&rstgen JH7100_RSTN_WDTIMER_APB>,
+ <&rstgen JH7100_RSTN_WDT>;
+ };
};
};
diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
index 2a6d81609284..fa0061eb33a7 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
@@ -114,6 +114,23 @@
pinctrl-names = "default";
pinctrl-0 = <&i2c5_pins>;
status = "okay";
+
+ axp15060: pmic@36 {
+ compatible = "x-powers,axp15060";
+ reg = <0x36>;
+ interrupts = <0>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ regulators {
+ vdd_cpu: dcdc2 {
+ regulator-always-on;
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1540000>;
+ regulator-name = "vdd-cpu";
+ };
+ };
+ };
};
&i2c6 {
@@ -213,3 +230,19 @@
pinctrl-0 = <&uart0_pins>;
status = "okay";
};
+
+&U74_1 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&U74_2 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&U74_3 {
+ cpu-supply = <&vdd_cpu>;
+};
+
+&U74_4 {
+ cpu-supply = <&vdd_cpu>;
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi
index 4c5fdb905da8..ec2e70011a73 100644
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -53,6 +53,9 @@
next-level-cache = <&ccache>;
riscv,isa = "rv64imafdc_zba_zbb";
tlb-split;
+ operating-points-v2 = <&cpu_opp>;
+ clocks = <&syscrg JH7110_SYSCLK_CPU_CORE>;
+ clock-names = "cpu";
cpu1_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
@@ -79,6 +82,9 @@
next-level-cache = <&ccache>;
riscv,isa = "rv64imafdc_zba_zbb";
tlb-split;
+ operating-points-v2 = <&cpu_opp>;
+ clocks = <&syscrg JH7110_SYSCLK_CPU_CORE>;
+ clock-names = "cpu";
cpu2_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
@@ -105,6 +111,9 @@
next-level-cache = <&ccache>;
riscv,isa = "rv64imafdc_zba_zbb";
tlb-split;
+ operating-points-v2 = <&cpu_opp>;
+ clocks = <&syscrg JH7110_SYSCLK_CPU_CORE>;
+ clock-names = "cpu";
cpu3_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
@@ -131,6 +140,9 @@
next-level-cache = <&ccache>;
riscv,isa = "rv64imafdc_zba_zbb";
tlb-split;
+ operating-points-v2 = <&cpu_opp>;
+ clocks = <&syscrg JH7110_SYSCLK_CPU_CORE>;
+ clock-names = "cpu";
cpu4_intc: interrupt-controller {
compatible = "riscv,cpu-intc";
@@ -164,6 +176,27 @@
};
};
+ cpu_opp: opp-table-0 {
+ compatible = "operating-points-v2";
+ opp-shared;
+ opp-375000000 {
+ opp-hz = /bits/ 64 <375000000>;
+ opp-microvolt = <800000>;
+ };
+ opp-500000000 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <800000>;
+ };
+ opp-750000000 {
+ opp-hz = /bits/ 64 <750000000>;
+ opp-microvolt = <800000>;
+ };
+ opp-1500000000 {
+ opp-hz = /bits/ 64 <1500000000>;
+ opp-microvolt = <1040000>;
+ };
+ };
+
gmac0_rgmii_rxin: gmac0-rgmii-rxin-clock {
compatible = "fixed-clock";
clock-output-names = "gmac0_rgmii_rxin";
@@ -469,6 +502,16 @@
#gpio-cells = <2>;
};
+ watchdog@13070000 {
+ compatible = "starfive,jh7110-wdt";
+ reg = <0x0 0x13070000 0x0 0x10000>;
+ clocks = <&syscrg JH7110_SYSCLK_WDT_APB>,
+ <&syscrg JH7110_SYSCLK_WDT_CORE>;
+ clock-names = "apb", "core";
+ resets = <&syscrg JH7110_SYSRST_WDT_APB>,
+ <&syscrg JH7110_SYSRST_WDT_CORE>;
+ };
+
aoncrg: clock-controller@17000000 {
compatible = "starfive,jh7110-aoncrg";
reg = <0x0 0x17000000 0x0 0x10000>;
@@ -496,5 +539,12 @@
gpio-controller;
#gpio-cells = <2>;
};
+
+ pwrc: power-controller@17030000 {
+ compatible = "starfive,jh7110-pmu";
+ reg = <0x0 0x17030000 0x0 0x10000>;
+ interrupts = <111>;
+ #power-domain-cells = <1>;
+ };
};
};
diff --git a/arch/riscv/boot/dts/thead/Makefile b/arch/riscv/boot/dts/thead/Makefile
new file mode 100644
index 000000000000..e311fc9a5939
--- /dev/null
+++ b/arch/riscv/boot/dts/thead/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_ARCH_THEAD) += th1520-lichee-pi-4a.dtb
diff --git a/arch/riscv/boot/dts/thead/th1520-lichee-module-4a.dtsi b/arch/riscv/boot/dts/thead/th1520-lichee-module-4a.dtsi
new file mode 100644
index 000000000000..4b0249ac710f
--- /dev/null
+++ b/arch/riscv/boot/dts/thead/th1520-lichee-module-4a.dtsi
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Jisheng Zhang <jszhang@kernel.org>
+ */
+
+/dts-v1/;
+
+#include "th1520.dtsi"
+
+/ {
+ model = "Sipeed Lichee Module 4A";
+ compatible = "sipeed,lichee-module-4a", "thead,th1520";
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x00000000 0x2 0x00000000>;
+ };
+};
+
+&osc {
+ clock-frequency = <24000000>;
+};
+
+&osc_32k {
+ clock-frequency = <32768>;
+};
+
+&apb_clk {
+ clock-frequency = <62500000>;
+};
+
+&uart_sclk {
+ clock-frequency = <100000000>;
+};
+
+&dmac0 {
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/thead/th1520-lichee-pi-4a.dts b/arch/riscv/boot/dts/thead/th1520-lichee-pi-4a.dts
new file mode 100644
index 000000000000..a1248b2ee3a3
--- /dev/null
+++ b/arch/riscv/boot/dts/thead/th1520-lichee-pi-4a.dts
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Jisheng Zhang <jszhang@kernel.org>
+ */
+
+#include "th1520-lichee-module-4a.dtsi"
+
+/ {
+ model = "Sipeed Lichee Pi 4A";
+ compatible = "sipeed,lichee-pi-4a", "sipeed,lichee-module-4a", "thead,th1520";
+
+ aliases {
+ gpio0 = &gpio0;
+ gpio1 = &gpio1;
+ gpio2 = &gpio2;
+ gpio3 = &gpio3;
+ serial0 = &uart0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ serial3 = &uart3;
+ serial4 = &uart4;
+ serial5 = &uart5;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi
new file mode 100644
index 000000000000..56a73134b49e
--- /dev/null
+++ b/arch/riscv/boot/dts/thead/th1520.dtsi
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Alibaba Group Holding Limited.
+ * Copyright (C) 2023 Jisheng Zhang <jszhang@kernel.org>
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ compatible = "thead,th1520";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus: cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ timebase-frequency = <3000000>;
+
+ c910_0: cpu@0 {
+ compatible = "thead,c910", "riscv";
+ device_type = "cpu";
+ riscv,isa = "rv64imafdc";
+ reg = <0>;
+ i-cache-block-size = <64>;
+ i-cache-size = <65536>;
+ i-cache-sets = <512>;
+ d-cache-block-size = <64>;
+ d-cache-size = <65536>;
+ d-cache-sets = <512>;
+ next-level-cache = <&l2_cache>;
+ mmu-type = "riscv,sv39";
+
+ cpu0_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ c910_1: cpu@1 {
+ compatible = "thead,c910", "riscv";
+ device_type = "cpu";
+ riscv,isa = "rv64imafdc";
+ reg = <1>;
+ i-cache-block-size = <64>;
+ i-cache-size = <65536>;
+ i-cache-sets = <512>;
+ d-cache-block-size = <64>;
+ d-cache-size = <65536>;
+ d-cache-sets = <512>;
+ next-level-cache = <&l2_cache>;
+ mmu-type = "riscv,sv39";
+
+ cpu1_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ c910_2: cpu@2 {
+ compatible = "thead,c910", "riscv";
+ device_type = "cpu";
+ riscv,isa = "rv64imafdc";
+ reg = <2>;
+ i-cache-block-size = <64>;
+ i-cache-size = <65536>;
+ i-cache-sets = <512>;
+ d-cache-block-size = <64>;
+ d-cache-size = <65536>;
+ d-cache-sets = <512>;
+ next-level-cache = <&l2_cache>;
+ mmu-type = "riscv,sv39";
+
+ cpu2_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ c910_3: cpu@3 {
+ compatible = "thead,c910", "riscv";
+ device_type = "cpu";
+ riscv,isa = "rv64imafdc";
+ reg = <3>;
+ i-cache-block-size = <64>;
+ i-cache-size = <65536>;
+ i-cache-sets = <512>;
+ d-cache-block-size = <64>;
+ d-cache-size = <65536>;
+ d-cache-sets = <512>;
+ next-level-cache = <&l2_cache>;
+ mmu-type = "riscv,sv39";
+
+ cpu3_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ l2_cache: l2-cache {
+ compatible = "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-size = <1048576>;
+ cache-sets = <1024>;
+ cache-unified;
+ };
+ };
+
+ osc: oscillator {
+ compatible = "fixed-clock";
+ clock-output-names = "osc_24m";
+ #clock-cells = <0>;
+ };
+
+ osc_32k: 32k-oscillator {
+ compatible = "fixed-clock";
+ clock-output-names = "osc_32k";
+ #clock-cells = <0>;
+ };
+
+ apb_clk: apb-clk-clock {
+ compatible = "fixed-clock";
+ clock-output-names = "apb_clk";
+ #clock-cells = <0>;
+ };
+
+ uart_sclk: uart-sclk-clock {
+ compatible = "fixed-clock";
+ clock-output-names = "uart_sclk";
+ #clock-cells = <0>;
+ };
+
+ soc {
+ compatible = "simple-bus";
+ interrupt-parent = <&plic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ plic: interrupt-controller@ffd8000000 {
+ compatible = "thead,th1520-plic", "thead,c900-plic";
+ reg = <0xff 0xd8000000 0x0 0x01000000>;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>,
+ <&cpu2_intc 11>, <&cpu2_intc 9>,
+ <&cpu3_intc 11>, <&cpu3_intc 9>;
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ riscv,ndev = <240>;
+ };
+
+ clint: timer@ffdc000000 {
+ compatible = "thead,th1520-clint", "thead,c900-clint";
+ reg = <0xff 0xdc000000 0x0 0x00010000>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>,
+ <&cpu2_intc 3>, <&cpu2_intc 7>,
+ <&cpu3_intc 3>, <&cpu3_intc 7>;
+ };
+
+ uart0: serial@ffe7014000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xe7014000 0x0 0x100>;
+ interrupts = <36 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ uart1: serial@ffe7f00000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xe7f00000 0x0 0x100>;
+ interrupts = <37 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ uart3: serial@ffe7f04000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xe7f04000 0x0 0x100>;
+ interrupts = <39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ gpio2: gpio@ffe7f34000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xe7f34000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ portc: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <58 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+ gpio3: gpio@ffe7f38000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xe7f38000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ portd: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <59 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+ gpio0: gpio@ffec005000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xec005000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ porta: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <56 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+ gpio1: gpio@ffec006000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xec006000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ portb: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <57 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+ uart2: serial@ffec010000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xec010000 0x0 0x4000>;
+ interrupts = <38 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ dmac0: dma-controller@ffefc00000 {
+ compatible = "snps,axi-dma-1.01a";
+ reg = <0xff 0xefc00000 0x0 0x1000>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&apb_clk>, <&apb_clk>;
+ clock-names = "core-clk", "cfgr-clk";
+ #dma-cells = <1>;
+ dma-channels = <4>;
+ snps,block-size = <65536 65536 65536 65536>;
+ snps,priority = <0 1 2 3>;
+ snps,dma-masters = <1>;
+ snps,data-width = <4>;
+ snps,axi-max-burst-len = <16>;
+ status = "disabled";
+ };
+
+ timer0: timer@ffefc32000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xefc32000 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer1: timer@ffefc32014 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xefc32014 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer2: timer@ffefc32028 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xefc32028 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <18 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer3: timer@ffefc3203c {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xefc3203c 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ uart4: serial@fff7f08000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xf7f08000 0x0 0x4000>;
+ interrupts = <40 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ uart5: serial@fff7f0c000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xff 0xf7f0c000 0x0 0x4000>;
+ interrupts = <41 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&uart_sclk>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ status = "disabled";
+ };
+
+ timer4: timer@ffffc33000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xffc33000 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <20 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer5: timer@ffffc33014 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xffc33014 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <21 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer6: timer@ffffc33028 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xffc33028 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ timer7: timer@ffffc3303c {
+ compatible = "snps,dw-apb-timer";
+ reg = <0xff 0xffc3303c 0x0 0x14>;
+ clocks = <&apb_clk>;
+ clock-names = "timer";
+ interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ ao_gpio0: gpio@fffff41000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xfff41000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ porte: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <76 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
+ ao_gpio1: gpio@fffff52000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0xff 0xfff52000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ portf: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <55 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+ };
+};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d98d6e90b2b8..0a0107460a5c 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -27,6 +27,7 @@ CONFIG_EXPERT=y
CONFIG_PROFILING=y
CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_ARCH_RENESAS=y
+CONFIG_ARCH_THEAD=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_STARFIVE=y
CONFIG_ARCH_SUNXI=y
@@ -37,6 +38,7 @@ CONFIG_PM=y
CONFIG_CPU_IDLE=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=m
+CONFIG_ACPI=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile
index a1055965fbee..7b2637c8c332 100644
--- a/arch/riscv/errata/Makefile
+++ b/arch/riscv/errata/Makefile
@@ -1,2 +1,6 @@
+ifdef CONFIG_RELOCATABLE
+KBUILD_CFLAGS += -fno-pie
+endif
+
obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
obj-$(CONFIG_ERRATA_THEAD) += thead/
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index c259dc925ec1..be84b14f0118 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -45,8 +45,11 @@ static bool errata_probe_cmo(unsigned int stage,
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return false;
- riscv_cbom_block_size = L1_CACHE_BYTES;
- riscv_noncoherent_supported();
+ if (stage == RISCV_ALTERNATIVES_BOOT) {
+ riscv_cbom_block_size = L1_CACHE_BYTES;
+ riscv_noncoherent_supported();
+ }
+
return true;
}
diff --git a/arch/riscv/include/asm/acenv.h b/arch/riscv/include/asm/acenv.h
new file mode 100644
index 000000000000..43ae2e32c779
--- /dev/null
+++ b/arch/riscv/include/asm/acenv.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * RISC-V specific ACPICA environments and implementation
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* This header is required unconditionally by the ACPI core */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
new file mode 100644
index 000000000000..f71ce21ff684
--- /dev/null
+++ b/arch/riscv/include/asm/acpi.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013-2014, Linaro Ltd.
+ * Author: Al Stone <al.stone@linaro.org>
+ * Author: Graeme Gregory <graeme.gregory@linaro.org>
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ * Copyright (C) 2021-2023, Ventana Micro Systems Inc.
+ * Author: Sunil V L <sunilvl@ventanamicro.com>
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+/* Basic configuration for ACPI */
+#ifdef CONFIG_ACPI
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HARTID
+
+/* ACPI table mapping after acpi_permanent_mmap is set */
+void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+
+#define acpi_strict 1 /* No out-of-spec workarounds on RISC-V */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+static inline void disable_acpi(void)
+{
+ acpi_disabled = 1;
+ acpi_pci_disabled = 1;
+ acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+ acpi_disabled = 0;
+ acpi_pci_disabled = 0;
+ acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out whether this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpuid_to_hartid_map(cpu)
+
+/*
+ * Since MADT must provide at least one RINTC structure, the
+ * CPU will be always available in MADT on RISC-V.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+ return true;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+
+void acpi_init_rintc_map(void);
+struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu);
+u32 get_acpi_id_for_cpu(int cpu);
+int acpi_get_riscv_isa(struct acpi_table_header *table,
+ unsigned int cpu, const char **isa);
+
+static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
+#else
+static inline void acpi_init_rintc_map(void) { }
+static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
+{
+ return NULL;
+}
+
+static inline int acpi_get_riscv_isa(struct acpi_table_header *table,
+ unsigned int cpu, const char **isa)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_ACPI */
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
index 14be0673f5b5..00a96e7a9664 100644
--- a/arch/riscv/include/asm/asm-extable.h
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -7,6 +7,8 @@
#define EX_TYPE_BPF 2
#define EX_TYPE_UACCESS_ERR_ZERO 3
+#ifdef CONFIG_MMU
+
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
@@ -62,4 +64,8 @@
#endif /* __ASSEMBLY__ */
+#else /* CONFIG_MMU */
+ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)
+#endif /* CONFIG_MMU */
+
#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index bba472928b53..f5dfef6c2153 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -238,78 +238,6 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
-/*
- * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a full barrier.
- */
-#define ATOMIC_OP(c_t, prefix, size) \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_relaxed(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_acquire(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_release(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
-{ \
- return __arch_xchg(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_relaxed(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_acquire(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_release(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
-{ \
- return __cmpxchg(&(v->counter), o, n, size); \
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS() \
- ATOMIC_OP(int, , 4)
-#else
-#define ATOMIC_OPS() \
- ATOMIC_OP(int, , 4) \
- ATOMIC_OP(s64, 64, 8)
-#endif
-
-ATOMIC_OPS()
-
-#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic_xchg_release arch_atomic_xchg_release
-#define arch_atomic_xchg arch_atomic_xchg
-#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic_cmpxchg arch_atomic_cmpxchg
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{
int prev, rc;
diff --git a/arch/riscv/include/asm/cpu.h b/arch/riscv/include/asm/cpu.h
new file mode 100644
index 000000000000..28d45a6678ce
--- /dev/null
+++ b/arch/riscv/include/asm/cpu.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_CPU_H
+#define _ASM_CPU_H
+
+/* This header is required unconditionally by the ACPI core */
+
+#endif /* _ASM_CPU_H */
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 808d5403f2ac..23fed53b8815 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -6,6 +6,9 @@
#ifndef _ASM_CPUFEATURE_H
#define _ASM_CPUFEATURE_H
+#include <linux/bitmap.h>
+#include <asm/hwcap.h>
+
/*
* These are probed via a device_initcall(), via either the SBI or directly
* from the corresponding CSRs.
@@ -16,8 +19,15 @@ struct riscv_cpuinfo {
unsigned long mimpid;
};
+struct riscv_isainfo {
+ DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX);
+};
+
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
DECLARE_PER_CPU(long, misaligned_access_speed);
+/* Per-cpu ISA extensions. */
+extern struct riscv_isainfo hart_isa[NR_CPUS];
+
#endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index b6acb7ed115f..7bac43a3176e 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -24,16 +24,24 @@
#define SR_FS_CLEAN _AC(0x00004000, UL)
#define SR_FS_DIRTY _AC(0x00006000, UL)
+#define SR_VS _AC(0x00000600, UL) /* Vector Status */
+#define SR_VS_OFF _AC(0x00000000, UL)
+#define SR_VS_INITIAL _AC(0x00000200, UL)
+#define SR_VS_CLEAN _AC(0x00000400, UL)
+#define SR_VS_DIRTY _AC(0x00000600, UL)
+
#define SR_XS _AC(0x00018000, UL) /* Extension Status */
#define SR_XS_OFF _AC(0x00000000, UL)
#define SR_XS_INITIAL _AC(0x00008000, UL)
#define SR_XS_CLEAN _AC(0x00010000, UL)
#define SR_XS_DIRTY _AC(0x00018000, UL)
+#define SR_FS_VS (SR_FS | SR_VS) /* Vector and Floating-Point Unit */
+
#ifndef CONFIG_64BIT
-#define SR_SD _AC(0x80000000, UL) /* FS/XS dirty */
+#define SR_SD _AC(0x80000000, UL) /* FS/VS/XS dirty */
#else
-#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */
+#define SR_SD _AC(0x8000000000000000, UL) /* FS/VS/XS dirty */
#endif
#ifdef CONFIG_64BIT
@@ -82,7 +90,9 @@
#define EXC_INST_ACCESS 1
#define EXC_INST_ILLEGAL 2
#define EXC_BREAKPOINT 3
+#define EXC_LOAD_MISALIGNED 4
#define EXC_LOAD_ACCESS 5
+#define EXC_STORE_MISALIGNED 6
#define EXC_STORE_ACCESS 7
#define EXC_SYSCALL 8
#define EXC_HYPERVISOR_SYSCALL 9
@@ -375,6 +385,12 @@
#define CSR_MVIPH 0x319
#define CSR_MIPH 0x354
+#define CSR_VSTART 0x8
+#define CSR_VCSR 0xf
+#define CSR_VL 0xc20
+#define CSR_VTYPE 0xc21
+#define CSR_VLENB 0xc22
+
#ifdef CONFIG_RISCV_M_MODE
# define CSR_STATUS CSR_MSTATUS
# define CSR_IE CSR_MIE
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index 30e7d2455960..c24280774caf 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -66,7 +66,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
* via a bitmap that coorespends to each single-letter ISA extension. This is
* essentially defunct, but will remain for compatibility with userspace.
*/
-#define ELF_HWCAP (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1))
+#define ELF_HWCAP riscv_get_elf_hwcap()
extern unsigned long elf_hwcap;
/*
@@ -105,6 +105,15 @@ do { \
get_cache_size(3, CACHE_TYPE_UNIFIED)); \
NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, \
get_cache_geometry(3, CACHE_TYPE_UNIFIED)); \
+ /* \
+ * Should always be nonzero unless there's a kernel bug. \
+ * If we haven't determined a sensible value to give to \
+ * userspace, omit the entry: \
+ */ \
+ if (likely(signal_minsigstksz)) \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \
+ else \
+ NEW_AUX_ENT(AT_IGNORE, 0); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h
index 512012d193dc..3eb5c1f7bf34 100644
--- a/arch/riscv/include/asm/extable.h
+++ b/arch/riscv/include/asm/extable.h
@@ -32,7 +32,11 @@ do { \
(b)->data = (tmp).data; \
} while (0)
+#ifdef CONFIG_MMU
bool fixup_exception(struct pt_regs *regs);
+#else
+static inline bool fixup_exception(struct pt_regs *regs) { return false; }
+#endif
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index d47d87c2d7e3..740a979171e5 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -111,4 +111,25 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#endif /* CONFIG_DYNAMIC_FTRACE */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+struct fgraph_ret_regs {
+ unsigned long a1;
+ unsigned long a0;
+ unsigned long s0;
+ unsigned long ra;
+};
+
+static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+{
+ return ret_regs->a0;
+}
+
+static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+{
+ return ret_regs->s0;
+}
+#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
+#endif
+
#endif /* _ASM_RISCV_FTRACE_H */
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index fe6f23006641..ce1ebda1a49a 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -36,6 +36,9 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+pte_t huge_ptep_get(pte_t *ptep);
+
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e0c40a4c63d5..f041bfa7f6a0 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -22,6 +22,7 @@
#define RISCV_ISA_EXT_m ('m' - 'a')
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
+#define RISCV_ISA_EXT_v ('v' - 'a')
/*
* These macros represent the logical IDs of each multi-letter RISC-V ISA
@@ -46,6 +47,12 @@
#define RISCV_ISA_EXT_ZICBOZ 34
#define RISCV_ISA_EXT_SMAIA 35
#define RISCV_ISA_EXT_SSAIA 36
+#define RISCV_ISA_EXT_ZBA 37
+#define RISCV_ISA_EXT_ZBS 38
+#define RISCV_ISA_EXT_ZICNTR 39
+#define RISCV_ISA_EXT_ZICSR 40
+#define RISCV_ISA_EXT_ZIFENCEI 41
+#define RISCV_ISA_EXT_ZIHPM 42
#define RISCV_ISA_EXT_MAX 64
#define RISCV_ISA_EXT_NAME_LEN_MAX 32
@@ -60,6 +67,8 @@
#include <linux/jump_label.h>
+unsigned long riscv_get_elf_hwcap(void);
+
struct riscv_isa_ext_data {
/* Name of the extension displayed to userspace via /proc/cpuinfo */
char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 8d5c84f2d5ef..4e1505cef8aa 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -137,6 +137,26 @@
#define RVG_OPCODE_JALR 0x67
#define RVG_OPCODE_JAL 0x6f
#define RVG_OPCODE_SYSTEM 0x73
+#define RVG_SYSTEM_CSR_OFF 20
+#define RVG_SYSTEM_CSR_MASK GENMASK(12, 0)
+
+/* parts of opcode for RVF, RVD and RVQ */
+#define RVFDQ_FL_FS_WIDTH_OFF 12
+#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0)
+#define RVFDQ_FL_FS_WIDTH_W 2
+#define RVFDQ_FL_FS_WIDTH_D 3
+#define RVFDQ_LS_FS_WIDTH_Q 4
+#define RVFDQ_OPCODE_FL 0x07
+#define RVFDQ_OPCODE_FS 0x27
+
+/* parts of opcode for RVV */
+#define RVV_OPCODE_VECTOR 0x57
+#define RVV_VL_VS_WIDTH_8 0
+#define RVV_VL_VS_WIDTH_16 5
+#define RVV_VL_VS_WIDTH_32 6
+#define RVV_VL_VS_WIDTH_64 7
+#define RVV_OPCODE_VL RVFDQ_OPCODE_FL
+#define RVV_OPCODE_VS RVFDQ_OPCODE_FS
/* parts of opcode for RVC*/
#define RVC_OPCODE_C0 0x0
@@ -304,6 +324,15 @@ static __always_inline bool riscv_insn_is_branch(u32 code)
(RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \
(RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); })
+#define RVG_EXTRACT_SYSTEM_CSR(x) \
+ ({typeof(x) x_ = (x); RV_X(x_, RVG_SYSTEM_CSR_OFF, RVG_SYSTEM_CSR_MASK); })
+
+#define RVFDQ_EXTRACT_FL_FS_WIDTH(x) \
+ ({typeof(x) x_ = (x); RV_X(x_, RVFDQ_FL_FS_WIDTH_OFF, \
+ RVFDQ_FL_FS_WIDTH_MASK); })
+
+#define RVV_EXRACT_VL_VS_WIDTH(x) RVFDQ_EXTRACT_FL_FS_WIDTH(x)
+
/*
* Get the immediate from a J-type instruction.
*
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 43b9ebfbd943..8e10a94430a2 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -16,6 +16,4 @@ void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void));
struct fwnode_handle *riscv_get_intc_hwnode(void);
-extern void __init init_IRQ(void);
-
#endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h
new file mode 100644
index 000000000000..e4042d297580
--- /dev/null
+++ b/arch/riscv/include/asm/irq_stack.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_IRQ_STACK_H
+#define _ASM_RISCV_IRQ_STACK_H
+
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/kconfig.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <asm/thread_info.h>
+
+DECLARE_PER_CPU(ulong *, irq_stack_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+/*
+ * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
+ * stacks need to have the same alignment.
+ */
+static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
+{
+ void *p;
+
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ __builtin_return_address(0));
+ return kasan_reset_tag(p);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#endif /* _ASM_RISCV_IRQ_STACK_H */
diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
index d887a54042aa..0bbffd528096 100644
--- a/arch/riscv/include/asm/kfence.h
+++ b/arch/riscv/include/asm/kfence.h
@@ -8,41 +8,8 @@
#include <asm-generic/pgalloc.h>
#include <asm/pgtable.h>
-static inline int split_pmd_page(unsigned long addr)
-{
- int i;
- unsigned long pfn = PFN_DOWN(__pa((addr & PMD_MASK)));
- pmd_t *pmd = pmd_off_k(addr);
- pte_t *pte = pte_alloc_one_kernel(&init_mm);
-
- if (!pte)
- return -ENOMEM;
-
- for (i = 0; i < PTRS_PER_PTE; i++)
- set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL));
- set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(pte)), PAGE_TABLE));
-
- flush_tlb_kernel_range(addr, addr + PMD_SIZE);
- return 0;
-}
-
static inline bool arch_kfence_init_pool(void)
{
- int ret;
- unsigned long addr;
- pmd_t *pmd;
-
- for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
- addr += PAGE_SIZE) {
- pmd = pmd_off_k(addr);
-
- if (pmd_leaf(*pmd)) {
- ret = split_pmd_page(addr);
- if (ret)
- return false;
- }
- }
-
return true;
}
diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h
index 1de0717112e5..1f37b600ca47 100644
--- a/arch/riscv/include/asm/kvm_aia.h
+++ b/arch/riscv/include/asm/kvm_aia.h
@@ -20,6 +20,33 @@ struct kvm_aia {
/* In-kernel irqchip initialized */
bool initialized;
+
+ /* Virtualization mode (Emulation, HW Accelerated, or Auto) */
+ u32 mode;
+
+ /* Number of MSIs */
+ u32 nr_ids;
+
+ /* Number of wired IRQs */
+ u32 nr_sources;
+
+ /* Number of group bits in IMSIC address */
+ u32 nr_group_bits;
+
+ /* Position of group bits in IMSIC address */
+ u32 nr_group_shift;
+
+ /* Number of hart bits in IMSIC address */
+ u32 nr_hart_bits;
+
+ /* Number of guest bits in IMSIC address */
+ u32 nr_guest_bits;
+
+ /* Guest physical address of APLIC */
+ gpa_t aplic_addr;
+
+ /* Internal state of APLIC */
+ void *aplic_state;
};
struct kvm_vcpu_aia_csr {
@@ -38,25 +65,53 @@ struct kvm_vcpu_aia {
/* CPU AIA CSR context upon Guest VCPU reset */
struct kvm_vcpu_aia_csr guest_reset_csr;
+
+ /* Guest physical address of IMSIC for this VCPU */
+ gpa_t imsic_addr;
+
+ /* HART index of IMSIC extacted from guest physical address */
+ u32 hart_index;
+
+ /* Internal state of IMSIC for this VCPU */
+ void *imsic_state;
};
+#define KVM_RISCV_AIA_UNDEF_ADDR (-1)
+
#define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized)
#define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel)
+extern unsigned int kvm_riscv_aia_nr_hgei;
+extern unsigned int kvm_riscv_aia_max_ids;
DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
#define kvm_riscv_aia_available() \
static_branch_unlikely(&kvm_riscv_aia_available)
+extern struct kvm_device_ops kvm_riscv_aia_device_ops;
+
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu);
+
#define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1)
-static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu,
- unsigned long isel,
- unsigned long *val,
- unsigned long new_val,
- unsigned long wr_mask)
-{
- return 0;
-}
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask);
+int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
+ bool write, unsigned long *val);
+int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type);
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+ u32 guest_index, u32 offset, u32 iid);
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu);
+
+int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v);
+int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v);
+int kvm_riscv_aia_aplic_has_attr(struct kvm *kvm, unsigned long type);
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level);
+int kvm_riscv_aia_aplic_init(struct kvm *kvm);
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm);
#ifdef CONFIG_32BIT
void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu);
@@ -93,31 +148,23 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
{ .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \
{ .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei },
-static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
-{
- return 1;
-}
-
-static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
+int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu);
-static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
-{
-}
+int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
+ u32 guest_index, u32 iid);
+int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
+int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level);
-static inline void kvm_riscv_aia_init_vm(struct kvm *kvm)
-{
-}
+void kvm_riscv_aia_init_vm(struct kvm *kvm);
+void kvm_riscv_aia_destroy_vm(struct kvm *kvm);
-static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
-{
-}
+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+ void __iomem **hgei_va, phys_addr_t *hgei_pa);
+void kvm_riscv_aia_free_hgei(int cpu, int hgei);
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable);
void kvm_riscv_aia_enable(void);
void kvm_riscv_aia_disable(void);
diff --git a/arch/riscv/include/asm/kvm_aia_aplic.h b/arch/riscv/include/asm/kvm_aia_aplic.h
new file mode 100644
index 000000000000..6dd1a4809ec1
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_aia_aplic.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __KVM_RISCV_AIA_IMSIC_H
+#define __KVM_RISCV_AIA_IMSIC_H
+
+#include <linux/bitops.h>
+
+#define APLIC_MAX_IDC BIT(14)
+#define APLIC_MAX_SOURCE 1024
+
+#define APLIC_DOMAINCFG 0x0000
+#define APLIC_DOMAINCFG_RDONLY 0x80000000
+#define APLIC_DOMAINCFG_IE BIT(8)
+#define APLIC_DOMAINCFG_DM BIT(2)
+#define APLIC_DOMAINCFG_BE BIT(0)
+
+#define APLIC_SOURCECFG_BASE 0x0004
+#define APLIC_SOURCECFG_D BIT(10)
+#define APLIC_SOURCECFG_CHILDIDX_MASK 0x000003ff
+#define APLIC_SOURCECFG_SM_MASK 0x00000007
+#define APLIC_SOURCECFG_SM_INACTIVE 0x0
+#define APLIC_SOURCECFG_SM_DETACH 0x1
+#define APLIC_SOURCECFG_SM_EDGE_RISE 0x4
+#define APLIC_SOURCECFG_SM_EDGE_FALL 0x5
+#define APLIC_SOURCECFG_SM_LEVEL_HIGH 0x6
+#define APLIC_SOURCECFG_SM_LEVEL_LOW 0x7
+
+#define APLIC_IRQBITS_PER_REG 32
+
+#define APLIC_SETIP_BASE 0x1c00
+#define APLIC_SETIPNUM 0x1cdc
+
+#define APLIC_CLRIP_BASE 0x1d00
+#define APLIC_CLRIPNUM 0x1ddc
+
+#define APLIC_SETIE_BASE 0x1e00
+#define APLIC_SETIENUM 0x1edc
+
+#define APLIC_CLRIE_BASE 0x1f00
+#define APLIC_CLRIENUM 0x1fdc
+
+#define APLIC_SETIPNUM_LE 0x2000
+#define APLIC_SETIPNUM_BE 0x2004
+
+#define APLIC_GENMSI 0x3000
+
+#define APLIC_TARGET_BASE 0x3004
+#define APLIC_TARGET_HART_IDX_SHIFT 18
+#define APLIC_TARGET_HART_IDX_MASK 0x3fff
+#define APLIC_TARGET_GUEST_IDX_SHIFT 12
+#define APLIC_TARGET_GUEST_IDX_MASK 0x3f
+#define APLIC_TARGET_IPRIO_MASK 0xff
+#define APLIC_TARGET_EIID_MASK 0x7ff
+
+#endif
diff --git a/arch/riscv/include/asm/kvm_aia_imsic.h b/arch/riscv/include/asm/kvm_aia_imsic.h
new file mode 100644
index 000000000000..da5881d2bde0
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_aia_imsic.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __KVM_RISCV_AIA_IMSIC_H
+#define __KVM_RISCV_AIA_IMSIC_H
+
+#include <linux/types.h>
+#include <asm/csr.h>
+
+#define IMSIC_MMIO_PAGE_SHIFT 12
+#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_PAGE_LE 0x00
+#define IMSIC_MMIO_PAGE_BE 0x04
+
+#define IMSIC_MIN_ID 63
+#define IMSIC_MAX_ID 2048
+
+#define IMSIC_EIDELIVERY 0x70
+
+#define IMSIC_EITHRESHOLD 0x72
+
+#define IMSIC_EIP0 0x80
+#define IMSIC_EIP63 0xbf
+#define IMSIC_EIPx_BITS 32
+
+#define IMSIC_EIE0 0xc0
+#define IMSIC_EIE63 0xff
+#define IMSIC_EIEx_BITS 32
+
+#define IMSIC_FIRST IMSIC_EIDELIVERY
+#define IMSIC_LAST IMSIC_EIE63
+
+#define IMSIC_MMIO_SETIPNUM_LE 0x00
+#define IMSIC_MMIO_SETIPNUM_BE 0x04
+
+#endif
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index ee0acccb1d3b..2d8ee53b66c7 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <asm/hwcap.h>
#include <asm/kvm_aia.h>
+#include <asm/ptrace.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_insn.h>
#include <asm/kvm_vcpu_sbi.h>
@@ -27,6 +28,8 @@
#define KVM_VCPU_MAX_FEATURES 0
+#define KVM_IRQCHIP_NUM_PINS 1024
+
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
@@ -145,6 +148,7 @@ struct kvm_cpu_context {
unsigned long sstatus;
unsigned long hstatus;
union __riscv_fp_state fp;
+ struct __riscv_v_ext_state vector;
};
struct kvm_vcpu_csr {
@@ -318,6 +322,8 @@ int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
+int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines);
+
void __kvm_riscv_unpriv_trap(void);
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index 4278125a38a5..cdcf0ff07be7 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -14,9 +14,15 @@
#define KVM_SBI_VERSION_MAJOR 1
#define KVM_SBI_VERSION_MINOR 0
+enum kvm_riscv_sbi_ext_status {
+ KVM_RISCV_SBI_EXT_UNINITIALIZED,
+ KVM_RISCV_SBI_EXT_AVAILABLE,
+ KVM_RISCV_SBI_EXT_UNAVAILABLE,
+};
+
struct kvm_vcpu_sbi_context {
int return_handled;
- bool extension_disabled[KVM_RISCV_SBI_EXT_MAX];
+ enum kvm_riscv_sbi_ext_status ext_status[KVM_RISCV_SBI_EXT_MAX];
};
struct kvm_vcpu_sbi_return {
@@ -66,4 +72,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
+#ifdef CONFIG_RISCV_PMU_SBI
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu;
+#endif
#endif /* __RISCV_KVM_VCPU_SBI_H__ */
diff --git a/arch/riscv/include/asm/kvm_vcpu_vector.h b/arch/riscv/include/asm/kvm_vcpu_vector.h
new file mode 100644
index 000000000000..ff994fdd6d0d
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_vector.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 SiFive
+ *
+ * Authors:
+ * Vincent Chen <vincent.chen@sifive.com>
+ * Greentime Hu <greentime.hu@sifive.com>
+ */
+
+#ifndef __KVM_VCPU_RISCV_VECTOR_H
+#define __KVM_VCPU_RISCV_VECTOR_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/kvm_host.h>
+
+static __always_inline void __kvm_riscv_vector_save(struct kvm_cpu_context *context)
+{
+ __riscv_v_vstate_save(&context->vector, context->vector.datap);
+}
+
+static __always_inline void __kvm_riscv_vector_restore(struct kvm_cpu_context *context)
+{
+ __riscv_v_vstate_restore(&context->vector, context->vector.datap);
+}
+
+void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx,
+ unsigned long *isa);
+void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx,
+ unsigned long *isa);
+void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx);
+void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx);
+int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_context *cntx);
+void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu);
+#else
+
+struct kvm_cpu_context;
+
+static inline void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx,
+ unsigned long *isa)
+{
+}
+
+static inline void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx,
+ unsigned long *isa)
+{
+}
+
+static inline void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx)
+{
+}
+
+static inline void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx)
+{
+}
+
+static inline int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_context *cntx)
+{
+ return 0;
+}
+
+static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype);
+#endif
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index d42c901f9a97..665bbc9b2f84 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -10,4 +10,11 @@
#include <linux/perf_event.h>
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
+
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->epc = (__ip); \
+ (regs)->s0 = (unsigned long) __builtin_frame_address(0); \
+ (regs)->sp = current_stack_pointer; \
+ (regs)->status = SR_PP; \
+}
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2258b27173b0..75970ee2bda2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -165,8 +165,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
_PAGE_EXEC | _PAGE_WRITE)
#define PAGE_COPY PAGE_READ
-#define PAGE_COPY_EXEC PAGE_EXEC
-#define PAGE_COPY_READ_EXEC PAGE_READ_EXEC
+#define PAGE_COPY_EXEC PAGE_READ_EXEC
#define PAGE_SHARED PAGE_WRITE
#define PAGE_SHARED_EXEC PAGE_WRITE_EXEC
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 94a0590c6971..c950a8d9edef 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -7,6 +7,7 @@
#define _ASM_RISCV_PROCESSOR_H
#include <linux/const.h>
+#include <linux/cache.h>
#include <vdso/processor.h>
@@ -39,6 +40,8 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
+ unsigned long vstate_ctrl;
+ struct __riscv_v_ext_state vstate;
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -75,11 +78,22 @@ static inline void wait_for_interrupt(void)
struct device_node;
int riscv_of_processor_hartid(struct device_node *node, unsigned long *hartid);
+int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hartid);
int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid);
extern void riscv_fill_hwcap(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+extern unsigned long signal_minsigstksz __ro_after_init;
+
+#ifdef CONFIG_RISCV_ISA_V
+/* Userspace interface for PR_RISCV_V_{SET,GET}_VS prctl()s: */
+#define RISCV_V_SET_CONTROL(arg) riscv_v_vstate_ctrl_set_current(arg)
+#define RISCV_V_GET_CONTROL() riscv_v_vstate_ctrl_get_current()
+extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
+extern long riscv_v_vstate_ctrl_get_current(void);
+#endif /* CONFIG_RISCV_ISA_V */
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index c4b77017ec58..0d555847cde6 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -70,7 +70,7 @@ asmlinkage void smp_callin(void);
#if defined CONFIG_HOTPLUG_CPU
int __cpu_disable(void);
-void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
#endif /* CONFIG_HOTPLUG_CPU */
#else
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 60f8ca01d36e..a727be723c56 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -8,6 +8,7 @@
#include <linux/jump_label.h>
#include <linux/sched/task_stack.h>
+#include <asm/vector.h>
#include <asm/hwcap.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
@@ -46,7 +47,7 @@ static inline void fstate_restore(struct task_struct *task,
}
}
-static inline void __switch_to_aux(struct task_struct *prev,
+static inline void __switch_to_fpu(struct task_struct *prev,
struct task_struct *next)
{
struct pt_regs *regs;
@@ -66,7 +67,7 @@ static __always_inline bool has_fpu(void)
static __always_inline bool has_fpu(void) { return false; }
#define fstate_save(task, regs) do { } while (0)
#define fstate_restore(task, regs) do { } while (0)
-#define __switch_to_aux(__prev, __next) do { } while (0)
+#define __switch_to_fpu(__prev, __next) do { } while (0)
#endif
extern struct task_struct *__switch_to(struct task_struct *,
@@ -77,7 +78,9 @@ do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
if (has_fpu()) \
- __switch_to_aux(__prev, __next); \
+ __switch_to_fpu(__prev, __next); \
+ if (has_vector()) \
+ __switch_to_vector(__prev, __next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index e0d202134b44..1833beb00489 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -11,18 +11,8 @@
#include <asm/page.h>
#include <linux/const.h>
-#ifdef CONFIG_KASAN
-#define KASAN_STACK_ORDER 1
-#else
-#define KASAN_STACK_ORDER 0
-#endif
-
/* thread information allocation */
-#ifdef CONFIG_64BIT
-#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
-#else
-#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER)
-#endif
+#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
/*
@@ -40,6 +30,8 @@
#define OVERFLOW_STACK_SIZE SZ_4K
#define SHADOW_OVERFLOW_STACK_SIZE (1024)
+#define IRQ_STACK_SIZE THREAD_SIZE
+
#ifndef __ASSEMBLY__
extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
@@ -81,6 +73,9 @@ struct thread_info {
.preempt_count = INIT_PREEMPT_COUNT, \
}
+void arch_release_task_struct(struct task_struct *tsk);
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index d6a7428f6248..a06697846e69 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -88,6 +88,4 @@ static inline int read_current_timer(unsigned long *timer_val)
return 0;
}
-extern void time_init(void);
-
#endif /* _ASM_RISCV_TIMEX_H */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
new file mode 100644
index 000000000000..3d78930cab51
--- /dev/null
+++ b/arch/riscv/include/asm/vector.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#ifndef __ASM_RISCV_VECTOR_H
+#define __ASM_RISCV_VECTOR_H
+
+#include <linux/types.h>
+#include <uapi/asm-generic/errno.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+
+#include <linux/stringify.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <asm/ptrace.h>
+#include <asm/hwcap.h>
+#include <asm/csr.h>
+#include <asm/asm.h>
+
+extern unsigned long riscv_v_vsize;
+int riscv_v_setup_vsize(void);
+bool riscv_v_first_use_handler(struct pt_regs *regs);
+
+static __always_inline bool has_vector(void)
+{
+ return riscv_has_extension_unlikely(RISCV_ISA_EXT_v);
+}
+
+static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
+{
+ regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN;
+}
+
+static inline void __riscv_v_vstate_dirty(struct pt_regs *regs)
+{
+ regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY;
+}
+
+static inline void riscv_v_vstate_off(struct pt_regs *regs)
+{
+ regs->status = (regs->status & ~SR_VS) | SR_VS_OFF;
+}
+
+static inline void riscv_v_vstate_on(struct pt_regs *regs)
+{
+ regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL;
+}
+
+static inline bool riscv_v_vstate_query(struct pt_regs *regs)
+{
+ return (regs->status & SR_VS) != 0;
+}
+
+static __always_inline void riscv_v_enable(void)
+{
+ csr_set(CSR_SSTATUS, SR_VS);
+}
+
+static __always_inline void riscv_v_disable(void)
+{
+ csr_clear(CSR_SSTATUS, SR_VS);
+}
+
+static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
+{
+ asm volatile (
+ "csrr %0, " __stringify(CSR_VSTART) "\n\t"
+ "csrr %1, " __stringify(CSR_VTYPE) "\n\t"
+ "csrr %2, " __stringify(CSR_VL) "\n\t"
+ "csrr %3, " __stringify(CSR_VCSR) "\n\t"
+ : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
+ "=r" (dest->vcsr) : :);
+}
+
+static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
+{
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvl x0, %2, %1\n\t"
+ ".option pop\n\t"
+ "csrw " __stringify(CSR_VSTART) ", %0\n\t"
+ "csrw " __stringify(CSR_VCSR) ", %3\n\t"
+ : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
+ "r" (src->vcsr) :);
+}
+
+static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
+ void *datap)
+{
+ unsigned long vl;
+
+ riscv_v_enable();
+ __vstate_csr_save(save_to);
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %0, x0, e8, m8, ta, ma\n\t"
+ "vse8.v v0, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v8, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v16, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v24, (%1)\n\t"
+ ".option pop\n\t"
+ : "=&r" (vl) : "r" (datap) : "memory");
+ riscv_v_disable();
+}
+
+static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_from,
+ void *datap)
+{
+ unsigned long vl;
+
+ riscv_v_enable();
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %0, x0, e8, m8, ta, ma\n\t"
+ "vle8.v v0, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vle8.v v8, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vle8.v v16, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vle8.v v24, (%1)\n\t"
+ ".option pop\n\t"
+ : "=&r" (vl) : "r" (datap) : "memory");
+ __vstate_csr_restore(restore_from);
+ riscv_v_disable();
+}
+
+static inline void __riscv_v_vstate_discard(void)
+{
+ unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1);
+
+ riscv_v_enable();
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %0, x0, e8, m8, ta, ma\n\t"
+ "vmv.v.i v0, -1\n\t"
+ "vmv.v.i v8, -1\n\t"
+ "vmv.v.i v16, -1\n\t"
+ "vmv.v.i v24, -1\n\t"
+ "vsetvl %0, x0, %1\n\t"
+ ".option pop\n\t"
+ : "=&r" (vl) : "r" (vtype_inval) : "memory");
+ riscv_v_disable();
+}
+
+static inline void riscv_v_vstate_discard(struct pt_regs *regs)
+{
+ if ((regs->status & SR_VS) == SR_VS_OFF)
+ return;
+
+ __riscv_v_vstate_discard();
+ __riscv_v_vstate_dirty(regs);
+}
+
+static inline void riscv_v_vstate_save(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if ((regs->status & SR_VS) == SR_VS_DIRTY) {
+ struct __riscv_v_ext_state *vstate = &task->thread.vstate;
+
+ __riscv_v_vstate_save(vstate, vstate->datap);
+ __riscv_v_vstate_clean(regs);
+ }
+}
+
+static inline void riscv_v_vstate_restore(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if ((regs->status & SR_VS) != SR_VS_OFF) {
+ struct __riscv_v_ext_state *vstate = &task->thread.vstate;
+
+ __riscv_v_vstate_restore(vstate, vstate->datap);
+ __riscv_v_vstate_clean(regs);
+ }
+}
+
+static inline void __switch_to_vector(struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct pt_regs *regs;
+
+ regs = task_pt_regs(prev);
+ riscv_v_vstate_save(prev, regs);
+ riscv_v_vstate_restore(next, task_pt_regs(next));
+}
+
+void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
+bool riscv_v_vstate_ctrl_user_allowed(void);
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+struct pt_regs;
+
+static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
+static __always_inline bool has_vector(void) { return false; }
+static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
+static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
+static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
+#define riscv_v_vsize (0)
+#define riscv_v_vstate_discard(regs) do {} while (0)
+#define riscv_v_vstate_save(task, regs) do {} while (0)
+#define riscv_v_vstate_restore(task, regs) do {} while (0)
+#define __switch_to_vector(__prev, __next) do {} while (0)
+#define riscv_v_vstate_off(regs) do {} while (0)
+#define riscv_v_vstate_on(regs) do {} while (0)
+
+#endif /* CONFIG_RISCV_ISA_V */
+
+#endif /* ! __ASM_RISCV_VECTOR_H */
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index fb187a33ce58..10aaa83db89e 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -35,5 +35,6 @@
/* entries in ARCH_DLINFO */
#define AT_VECTOR_SIZE_ARCH 9
+#define AT_MINSIGSTKSZ 51
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 7d0b32e3b701..000000000000
--- a/arch/riscv/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Copyright (C) 2015 Regents of the University of California
- */
-
-#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
-#define _UAPI_ASM_RISCV_BITSPERLONG_H
-
-#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
-
-#include <asm-generic/bitsperlong.h>
-
-#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h
index 46dc3f5ee99f..c52bb7bbbabe 100644
--- a/arch/riscv/include/uapi/asm/hwcap.h
+++ b/arch/riscv/include/uapi/asm/hwcap.h
@@ -21,5 +21,6 @@
#define COMPAT_HWCAP_ISA_F (1 << ('F' - 'A'))
#define COMPAT_HWCAP_ISA_D (1 << ('D' - 'A'))
#define COMPAT_HWCAP_ISA_C (1 << ('C' - 'A'))
+#define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A'))
#endif /* _UAPI_ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 8d745a4ad8a2..006bfb48343d 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -25,6 +25,10 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_FD (1 << 0)
#define RISCV_HWPROBE_IMA_C (1 << 1)
+#define RISCV_HWPROBE_IMA_V (1 << 2)
+#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
+#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
+#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index f92790c9481a..930fdc4101cd 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -15,6 +15,7 @@
#include <asm/bitsperlong.h>
#include <asm/ptrace.h>
+#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -121,6 +122,8 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZICBOZ,
KVM_RISCV_ISA_EXT_ZBB,
KVM_RISCV_ISA_EXT_SSAIA,
+ KVM_RISCV_ISA_EXT_V,
+ KVM_RISCV_ISA_EXT_SVNAPOT,
KVM_RISCV_ISA_EXT_MAX,
};
@@ -203,6 +206,84 @@ enum KVM_RISCV_SBI_EXT_ID {
#define KVM_REG_RISCV_SBI_MULTI_REG_LAST \
KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1)
+/* V extension registers are mapped as type 9 */
+#define KVM_REG_RISCV_VECTOR (0x09 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_VECTOR_CSR_REG(name) \
+ (offsetof(struct __riscv_v_ext_state, name) / sizeof(unsigned long))
+#define KVM_REG_RISCV_VECTOR_REG(n) \
+ ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
+
+/* Device Control API: RISC-V AIA */
+#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000
+#define KVM_DEV_RISCV_APLIC_SIZE 0x4000
+#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000
+#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000
+#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000
+
+#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0
+#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0
+#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1
+#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2
+#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3
+#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4
+#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5
+#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6
+
+/*
+ * Modes of RISC-V AIA device:
+ * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC
+ * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files
+ * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever
+ * available otherwise fallback to trap-n-emulation
+ */
+#define KVM_DEV_RISCV_AIA_MODE_EMUL 0
+#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1
+#define KVM_DEV_RISCV_AIA_MODE_AUTO 2
+
+#define KVM_DEV_RISCV_AIA_IDS_MIN 63
+#define KVM_DEV_RISCV_AIA_IDS_MAX 2048
+#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024
+#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8
+#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24
+#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56
+#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16
+#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8
+
+#define KVM_DEV_RISCV_AIA_GRP_ADDR 1
+#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0
+#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu))
+#define KVM_DEV_RISCV_AIA_ADDR_MAX \
+ (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS)
+
+#define KVM_DEV_RISCV_AIA_GRP_CTRL 2
+#define KVM_DEV_RISCV_AIA_CTRL_INIT 0
+
+/*
+ * The device attribute type contains the memory mapped offset of the
+ * APLIC register (range 0x0000-0x3FFF) and it must be 4-byte aligned.
+ */
+#define KVM_DEV_RISCV_AIA_GRP_APLIC 3
+
+/*
+ * The lower 12-bits of the device attribute type contains the iselect
+ * value of the IMSIC register (range 0x70-0xFF) whereas the higher order
+ * bits contains the VCPU id.
+ */
+#define KVM_DEV_RISCV_AIA_GRP_IMSIC 4
+#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS 12
+#define KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK \
+ ((1U << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) - 1)
+#define KVM_DEV_RISCV_AIA_IMSIC_MKATTR(__vcpu, __isel) \
+ (((__vcpu) << KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS) | \
+ ((__isel) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK))
+#define KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(__attr) \
+ ((__attr) & KVM_DEV_RISCV_AIA_IMSIC_ISEL_MASK)
+#define KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(__attr) \
+ ((__attr) >> KVM_DEV_RISCV_AIA_IMSIC_ISEL_BITS)
+
+/* One single KVM irqchip, ie. the AIA */
+#define KVM_NR_IRQCHIPS 1
+
#endif
#endif /* __LINUX_KVM_RISCV_H */
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h
index 882547f6bd5c..e17c550986a6 100644
--- a/arch/riscv/include/uapi/asm/ptrace.h
+++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -71,12 +71,51 @@ struct __riscv_q_ext_state {
__u32 reserved[3];
};
+struct __riscv_ctx_hdr {
+ __u32 magic;
+ __u32 size;
+};
+
+struct __riscv_extra_ext_header {
+ __u32 __padding[129] __attribute__((aligned(16)));
+ /*
+ * Reserved for expansion of sigcontext structure. Currently zeroed
+ * upon signal, and must be zero upon sigreturn.
+ */
+ __u32 reserved;
+ struct __riscv_ctx_hdr hdr;
+};
+
union __riscv_fp_state {
struct __riscv_f_ext_state f;
struct __riscv_d_ext_state d;
struct __riscv_q_ext_state q;
};
+struct __riscv_v_ext_state {
+ unsigned long vstart;
+ unsigned long vl;
+ unsigned long vtype;
+ unsigned long vcsr;
+ void *datap;
+ /*
+ * In signal handler, datap will be set a correct user stack offset
+ * and vector registers will be copied to the address of datap
+ * pointer.
+ *
+ * In ptrace syscall, datap will be set to zero and the vector
+ * registers will be copied to the address right after this
+ * structure.
+ */
+};
+
+/*
+ * According to spec: The number of bits in a single vector register,
+ * VLEN >= ELEN, which must be a power of 2, and must be no greater than
+ * 2^16 = 65536bits = 8192bytes
+ */
+#define RISCV_MAX_VLENB (8192)
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI_ASM_RISCV_PTRACE_H */
diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h
index 84f2dfcfdbce..8c8712aa9551 100644
--- a/arch/riscv/include/uapi/asm/sigcontext.h
+++ b/arch/riscv/include/uapi/asm/sigcontext.h
@@ -8,6 +8,19 @@
#include <asm/ptrace.h>
+/* The Magic number for signal context frame header. */
+#define RISCV_V_MAGIC 0x53465457
+#define END_MAGIC 0x0
+
+/* The size of END signal context header. */
+#define END_HDR_SIZE 0x0
+
+#ifndef __ASSEMBLY__
+
+struct __sc_riscv_v_state {
+ struct __riscv_v_ext_state v_state;
+} __attribute__((aligned(16)));
+
/*
* Signal context structure
*
@@ -16,7 +29,12 @@
*/
struct sigcontext {
struct user_regs_struct sc_regs;
- union __riscv_fp_state sc_fpregs;
+ union {
+ union __riscv_fp_state sc_fpregs;
+ struct __riscv_extra_ext_header sc_extdesc;
+ };
};
+#endif /*!__ASSEMBLY__*/
+
#endif /* _UAPI_ASM_RISCV_SIGCONTEXT_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fbdccc21418a..506cc4a9a45a 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -23,6 +23,10 @@ ifdef CONFIG_FTRACE
CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE)
endif
+ifdef CONFIG_RELOCATABLE
+CFLAGS_alternative.o += -fno-pie
+CFLAGS_cpufeature.o += -fno-pie
+endif
ifdef CONFIG_KASAN
KASAN_SANITIZE_alternative.o := n
KASAN_SANITIZE_cpufeature.o := n
@@ -56,6 +60,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
+obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
@@ -92,3 +97,4 @@ obj-$(CONFIG_COMPAT) += compat_signal.o
obj-$(CONFIG_COMPAT) += compat_vdso/
obj-$(CONFIG_64BIT) += pi/
+obj-$(CONFIG_ACPI) += acpi.o
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
new file mode 100644
index 000000000000..5ee03ebab80e
--- /dev/null
+++ b/arch/riscv/kernel/acpi.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RISC-V Specific Low-Level ACPI Boot Support
+ *
+ * Copyright (C) 2013-2014, Linaro Ltd.
+ * Author: Al Stone <al.stone@linaro.org>
+ * Author: Graeme Gregory <graeme.gregory@linaro.org>
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ * Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ * Author: Naresh Bhat <naresh.bhat@linaro.org>
+ *
+ * Copyright (C) 2021-2023, Ventana Micro Systems Inc.
+ * Author: Sunil V L <sunilvl@ventanamicro.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/efi.h>
+
+int acpi_noirq = 1; /* skip ACPI IRQ initialization */
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+
+int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+
+static bool param_acpi_off __initdata;
+static bool param_acpi_on __initdata;
+static bool param_acpi_force __initdata;
+
+static struct acpi_madt_rintc cpu_madt_rintc[NR_CPUS];
+
+static int __init parse_acpi(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ /* "acpi=off" disables both ACPI table parsing and interpreter */
+ if (strcmp(arg, "off") == 0)
+ param_acpi_off = true;
+ else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */
+ param_acpi_on = true;
+ else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
+ param_acpi_force = true;
+ else
+ return -EINVAL; /* Core will print when we return error */
+
+ return 0;
+}
+early_param("acpi", parse_acpi);
+
+/*
+ * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity
+ * checks on it
+ *
+ * Return 0 on success, <0 on failure
+ */
+static int __init acpi_fadt_sanity_check(void)
+{
+ struct acpi_table_header *table;
+ struct acpi_table_fadt *fadt;
+ acpi_status status;
+ int ret = 0;
+
+ /*
+ * FADT is required on riscv; retrieve it to check its presence
+ * and carry out revision and ACPI HW reduced compliancy tests
+ */
+ status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ const char *msg = acpi_format_exception(status);
+
+ pr_err("Failed to get FADT table, %s\n", msg);
+ return -ENODEV;
+ }
+
+ fadt = (struct acpi_table_fadt *)table;
+
+ /*
+ * The revision in the table header is the FADT's Major revision. The
+ * FADT also has a minor revision, which is stored in the FADT itself.
+ *
+ * TODO: Currently, we check for 6.5 as the minimum version to check
+ * for HW_REDUCED flag. However, once RISC-V updates are released in
+ * the ACPI spec, we need to update this check for exact minor revision
+ */
+ if (table->revision < 6 || (table->revision == 6 && fadt->minor_revision < 5))
+ pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 6.5+\n",
+ table->revision, fadt->minor_revision);
+
+ if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
+ pr_err("FADT not ACPI hardware reduced compliant\n");
+ ret = -EINVAL;
+ }
+
+ /*
+ * acpi_get_table() creates FADT table mapping that
+ * should be released after parsing and before resuming boot
+ */
+ acpi_put_table(table);
+ return ret;
+}
+
+/*
+ * acpi_boot_table_init() called from setup_arch(), always.
+ * 1. find RSDP and get its address, and then find XSDT
+ * 2. extract all tables and checksums them all
+ * 3. check ACPI FADT HW reduced flag
+ *
+ * We can parse ACPI boot-time tables such as MADT after
+ * this function is called.
+ *
+ * On return ACPI is enabled if either:
+ *
+ * - ACPI tables are initialized and sanity checks passed
+ * - acpi=force was passed in the command line and ACPI was not disabled
+ * explicitly through acpi=off command line parameter
+ *
+ * ACPI is disabled on function return otherwise
+ */
+void __init acpi_boot_table_init(void)
+{
+ /*
+ * Enable ACPI instead of device tree unless
+ * - ACPI has been disabled explicitly (acpi=off), or
+ * - firmware has not populated ACPI ptr in EFI system table
+ * and ACPI has not been [force] enabled (acpi=on|force)
+ */
+ if (param_acpi_off ||
+ (!param_acpi_on && !param_acpi_force &&
+ efi.acpi20 == EFI_INVALID_TABLE_ADDR))
+ return;
+
+ /*
+ * ACPI is disabled at this point. Enable it in order to parse
+ * the ACPI tables and carry out sanity checks
+ */
+ enable_acpi();
+
+ /*
+ * If ACPI tables are initialized and FADT sanity checks passed,
+ * leave ACPI enabled and carry on booting; otherwise disable ACPI
+ * on initialization error.
+ * If acpi=force was passed on the command line it forces ACPI
+ * to be enabled even if its initialization failed.
+ */
+ if (acpi_table_init() || acpi_fadt_sanity_check()) {
+ pr_err("Failed to init ACPI tables\n");
+ if (!param_acpi_force)
+ disable_acpi();
+ }
+}
+
+static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, const unsigned long end)
+{
+ struct acpi_madt_rintc *rintc = (struct acpi_madt_rintc *)header;
+ int cpuid;
+
+ if (!(rintc->flags & ACPI_MADT_ENABLED))
+ return 0;
+
+ cpuid = riscv_hartid_to_cpuid(rintc->hart_id);
+ /*
+ * When CONFIG_SMP is disabled, mapping won't be created for
+ * all cpus.
+ * CPUs more than num_possible_cpus, will be ignored.
+ */
+ if (cpuid >= 0 && cpuid < num_possible_cpus())
+ cpu_madt_rintc[cpuid] = *rintc;
+
+ return 0;
+}
+
+/*
+ * Instead of parsing (and freeing) the ACPI table, cache
+ * the RINTC structures since they are frequently used
+ * like in cpuinfo.
+ */
+void __init acpi_init_rintc_map(void)
+{
+ if (acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_madt_rintc, 0) <= 0) {
+ pr_err("No valid RINTC entries exist\n");
+ BUG();
+ }
+}
+
+struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
+{
+ return &cpu_madt_rintc[cpu];
+}
+
+u32 get_acpi_id_for_cpu(int cpu)
+{
+ return acpi_cpu_get_madt_rintc(cpu)->uid;
+}
+
+/*
+ * __acpi_map_table() will be called before paging_init(), so early_ioremap()
+ * or early_memremap() should be called here to for ACPI table mapping.
+ */
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+ if (!size)
+ return NULL;
+
+ return early_ioremap(phys, size);
+}
+
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
+{
+ if (!map || !size)
+ return;
+
+ early_iounmap(map, size);
+}
+
+void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ return memremap(phys, size, MEMREMAP_WB);
+}
+
+#ifdef CONFIG_PCI
+
+/*
+ * These interfaces are defined just to enable building ACPI core.
+ * TODO: Update it with actual implementation when external interrupt
+ * controller support is added in RISC-V ACPI.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
+{
+ return -1;
+}
+
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+ return NULL;
+}
+#endif /* CONFIG_PCI */
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index a941adc7cbf2..457a18efcb11 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/err.h>
#include <linux/irq.h>
+#include <linux/cpuhotplug.h>
#include <linux/cpu.h>
#include <linux/sched/hotplug.h>
#include <asm/irq.h>
@@ -49,17 +50,15 @@ int __cpu_disable(void)
return ret;
}
+#ifdef CONFIG_HOTPLUG_CPU
/*
- * Called on the thread which is asking for a CPU to be shutdown.
+ * Called on the thread which is asking for a CPU to be shutdown, if the
+ * CPU reported dead to the hotplug core.
*/
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
int ret = 0;
- if (!cpu_wait_death(cpu, 5)) {
- pr_err("CPU %u: didn't die\n", cpu);
- return;
- }
pr_notice("CPU%u: off\n", cpu);
/* Verify from the firmware if the cpu is really stopped*/
@@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void)
{
idle_task_exit();
- (void)cpu_report_death();
+ cpuhp_ap_report_dead();
cpu_ops[smp_processor_id()]->cpu_stop();
/* It should never reach here */
BUG();
}
+#endif
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index c96aa56cf1c7..a2fc952318e9 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -3,10 +3,13 @@
* Copyright (C) 2012 Regents of the University of California
*/
+#include <linux/acpi.h>
#include <linux/cpu.h>
+#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/acpi.h>
#include <asm/cpufeature.h>
#include <asm/csr.h>
#include <asm/hwcap.h>
@@ -20,6 +23,26 @@
*/
int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
{
+ int cpu;
+
+ *hart = (unsigned long)of_get_cpu_hwid(node, 0);
+ if (*hart == ~0UL) {
+ pr_warn("Found CPU without hart ID\n");
+ return -ENODEV;
+ }
+
+ cpu = riscv_hartid_to_cpuid(*hart);
+ if (cpu < 0)
+ return cpu;
+
+ if (!cpu_possible(cpu))
+ return -ENODEV;
+
+ return 0;
+}
+
+int riscv_early_of_processor_hartid(struct device_node *node, unsigned long *hart)
+{
const char *isa;
if (!of_device_is_compatible(node, "riscv")) {
@@ -27,7 +50,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
return -ENODEV;
}
- *hart = (unsigned long) of_get_cpu_hwid(node, 0);
+ *hart = (unsigned long)of_get_cpu_hwid(node, 0);
if (*hart == ~0UL) {
pr_warn("Found CPU without hart ID\n");
return -ENODEV;
@@ -42,10 +65,12 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
return -ENODEV;
}
- if (isa[0] != 'r' || isa[1] != 'v') {
- pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
+
+ if (IS_ENABLED(CONFIG_32BIT) && strncasecmp(isa, "rv32ima", 7))
+ return -ENODEV;
+
+ if (IS_ENABLED(CONFIG_64BIT) && strncasecmp(isa, "rv64ima", 7))
return -ENODEV;
- }
return 0;
}
@@ -183,8 +208,14 @@ arch_initcall(riscv_cpuinfo_init);
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
__RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
+ __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
+ __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
+ __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
+ __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
+ __RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
+ __RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS),
__RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
__RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
@@ -283,23 +314,35 @@ static void c_stop(struct seq_file *m, void *v)
static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
- struct device_node *node = of_get_cpu_node(cpu_id, NULL);
struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
+ struct device_node *node;
const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
- if (!of_property_read_string(node, "riscv,isa", &isa))
- print_isa(m, isa);
- print_mmu(m);
- if (!of_property_read_string(node, "compatible", &compat)
- && strcmp(compat, "riscv"))
- seq_printf(m, "uarch\t\t: %s\n", compat);
+
+ if (acpi_disabled) {
+ node = of_get_cpu_node(cpu_id, NULL);
+ if (!of_property_read_string(node, "riscv,isa", &isa))
+ print_isa(m, isa);
+
+ print_mmu(m);
+ if (!of_property_read_string(node, "compatible", &compat) &&
+ strcmp(compat, "riscv"))
+ seq_printf(m, "uarch\t\t: %s\n", compat);
+
+ of_node_put(node);
+ } else {
+ if (!acpi_get_riscv_isa(NULL, cpu_id, &isa))
+ print_isa(m, isa);
+
+ print_mmu(m);
+ }
+
seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid);
seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid);
seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid);
seq_puts(m, "\n");
- of_node_put(node);
return 0;
}
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b1d6b7e4b829..a8f66c015229 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -6,18 +6,22 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/bitmap.h>
#include <linux/ctype.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/acpi.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwcap.h>
#include <asm/patch.h>
#include <asm/processor.h>
+#include <asm/vector.h>
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
@@ -26,6 +30,9 @@ unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+/* Per-cpu ISA extensions. */
+struct riscv_isainfo hart_isa[NR_CPUS];
+
/* Performance information */
DEFINE_PER_CPU(long, misaligned_access_speed);
@@ -71,10 +78,10 @@ static bool riscv_isa_extension_check(int id)
switch (id) {
case RISCV_ISA_EXT_ZICBOM:
if (!riscv_cbom_block_size) {
- pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n");
+ pr_err("Zicbom detected in ISA string, disabling as no cbom-block-size found\n");
return false;
} else if (!is_power_of_2(riscv_cbom_block_size)) {
- pr_err("cbom-block-size present, but is not a power-of-2\n");
+ pr_err("Zicbom disabled as cbom-block-size present, but is not a power-of-2\n");
return false;
}
return true;
@@ -99,7 +106,9 @@ void __init riscv_fill_hwcap(void)
char print_str[NUM_ALPHA_EXTS + 1];
int i, j, rc;
unsigned long isa2hwcap[26] = {0};
- unsigned long hartid;
+ struct acpi_table_header *rhct;
+ acpi_status status;
+ unsigned int cpu;
isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
@@ -107,45 +116,59 @@ void __init riscv_fill_hwcap(void)
isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
+ isa2hwcap['v' - 'a'] = COMPAT_HWCAP_ISA_V;
elf_hwcap = 0;
bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
- for_each_of_cpu_node(node) {
+ if (!acpi_disabled) {
+ status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
+ if (ACPI_FAILURE(status))
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct riscv_isainfo *isainfo = &hart_isa[cpu];
unsigned long this_hwcap = 0;
- DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
- const char *temp;
- rc = riscv_of_processor_hartid(node, &hartid);
- if (rc < 0)
- continue;
+ if (acpi_disabled) {
+ node = of_cpu_device_node_get(cpu);
+ if (!node) {
+ pr_warn("Unable to find cpu node\n");
+ continue;
+ }
- if (of_property_read_string(node, "riscv,isa", &isa)) {
- pr_warn("Unable to find \"riscv,isa\" devicetree entry\n");
- continue;
+ rc = of_property_read_string(node, "riscv,isa", &isa);
+ of_node_put(node);
+ if (rc) {
+ pr_warn("Unable to find \"riscv,isa\" devicetree entry\n");
+ continue;
+ }
+ } else {
+ rc = acpi_get_riscv_isa(rhct, cpu, &isa);
+ if (rc < 0) {
+ pr_warn("Unable to get ISA for the hart - %d\n", cpu);
+ continue;
+ }
}
- temp = isa;
-#if IS_ENABLED(CONFIG_32BIT)
- if (!strncmp(isa, "rv32", 4))
- isa += 4;
-#elif IS_ENABLED(CONFIG_64BIT)
- if (!strncmp(isa, "rv64", 4))
- isa += 4;
-#endif
- /* The riscv,isa DT property must start with rv64 or rv32 */
- if (temp == isa)
- continue;
- bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
- for (; *isa; ++isa) {
+ /*
+ * For all possible cpus, we have already validated in
+ * the boot process that they at least contain "rv" and
+ * whichever of "32"/"64" this kernel supports, and so this
+ * section can be skipped.
+ */
+ isa += 4;
+
+ while (*isa) {
const char *ext = isa++;
const char *ext_end = isa;
bool ext_long = false, ext_err = false;
switch (*ext) {
case 's':
- /**
+ /*
* Workaround for invalid single-letter 's' & 'u'(QEMU).
* No need to set the bit in riscv_isa as 's' & 'u' are
* not valid ISA extensions. It works until multi-letter
@@ -157,75 +180,123 @@ void __init riscv_fill_hwcap(void)
break;
}
fallthrough;
+ case 'S':
case 'x':
+ case 'X':
case 'z':
+ case 'Z':
+ /*
+ * Before attempting to parse the extension itself, we find its end.
+ * As multi-letter extensions must be split from other multi-letter
+ * extensions with an "_", the end of a multi-letter extension will
+ * either be the null character or the "_" at the start of the next
+ * multi-letter extension.
+ *
+ * Next, as the extensions version is currently ignored, we
+ * eliminate that portion. This is done by parsing backwards from
+ * the end of the extension, removing any numbers. This may be a
+ * major or minor number however, so the process is repeated if a
+ * minor number was found.
+ *
+ * ext_end is intended to represent the first character *after* the
+ * name portion of an extension, but will be decremented to the last
+ * character itself while eliminating the extensions version number.
+ * A simple re-increment solves this problem.
+ */
ext_long = true;
- /* Multi-letter extension must be delimited */
for (; *isa && *isa != '_'; ++isa)
- if (unlikely(!islower(*isa)
- && !isdigit(*isa)))
+ if (unlikely(!isalnum(*isa)))
ext_err = true;
- /* Parse backwards */
+
ext_end = isa;
if (unlikely(ext_err))
break;
+
if (!isdigit(ext_end[-1]))
break;
- /* Skip the minor version */
+
while (isdigit(*--ext_end))
;
- if (ext_end[0] != 'p'
- || !isdigit(ext_end[-1])) {
- /* Advance it to offset the pre-decrement */
+
+ if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
++ext_end;
break;
}
- /* Skip the major version */
+
while (isdigit(*--ext_end))
;
+
++ext_end;
break;
default:
- if (unlikely(!islower(*ext))) {
+ /*
+ * Things are a little easier for single-letter extensions, as they
+ * are parsed forwards.
+ *
+ * After checking that our starting position is valid, we need to
+ * ensure that, when isa was incremented at the start of the loop,
+ * that it arrived at the start of the next extension.
+ *
+ * If we are already on a non-digit, there is nothing to do. Either
+ * we have a multi-letter extension's _, or the start of an
+ * extension.
+ *
+ * Otherwise we have found the current extension's major version
+ * number. Parse past it, and a subsequent p/minor version number
+ * if present. The `p` extension must not appear immediately after
+ * a number, so there is no fear of missing it.
+ *
+ */
+ if (unlikely(!isalpha(*ext))) {
ext_err = true;
break;
}
- /* Find next extension */
+
if (!isdigit(*isa))
break;
- /* Skip the minor version */
+
while (isdigit(*++isa))
;
- if (*isa != 'p')
+
+ if (tolower(*isa) != 'p')
break;
+
if (!isdigit(*++isa)) {
--isa;
break;
}
- /* Skip the major version */
+
while (isdigit(*++isa))
;
+
break;
}
- if (*isa != '_')
- --isa;
-#define SET_ISA_EXT_MAP(name, bit) \
- do { \
- if ((ext_end - ext == sizeof(name) - 1) && \
- !memcmp(ext, name, sizeof(name) - 1) && \
- riscv_isa_extension_check(bit)) \
- set_bit(bit, this_isa); \
- } while (false) \
+ /*
+ * The parser expects that at the start of an iteration isa points to the
+ * first character of the next extension. As we stop parsing an extension
+ * on meeting a non-alphanumeric character, an extra increment is needed
+ * where the succeeding extension is a multi-letter prefixed with an "_".
+ */
+ if (*isa == '_')
+ ++isa;
+
+#define SET_ISA_EXT_MAP(name, bit) \
+ do { \
+ if ((ext_end - ext == sizeof(name) - 1) && \
+ !strncasecmp(ext, name, sizeof(name) - 1) && \
+ riscv_isa_extension_check(bit)) \
+ set_bit(bit, isainfo->isa); \
+ } while (false) \
if (unlikely(ext_err))
continue;
if (!ext_long) {
- int nr = *ext - 'a';
+ int nr = tolower(*ext) - 'a';
if (riscv_isa_extension_check(nr)) {
this_hwcap |= isa2hwcap[nr];
- set_bit(nr, this_isa);
+ set_bit(nr, isainfo->isa);
}
} else {
/* sorted alphabetically */
@@ -236,7 +307,9 @@ void __init riscv_fill_hwcap(void)
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
+ SET_ISA_EXT_MAP("zba", RISCV_ISA_EXT_ZBA);
SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
+ SET_ISA_EXT_MAP("zbs", RISCV_ISA_EXT_ZBS);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ);
SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
@@ -245,6 +318,18 @@ void __init riscv_fill_hwcap(void)
}
/*
+ * These ones were as they were part of the base ISA when the
+ * port & dt-bindings were upstreamed, and so can be set
+ * unconditionally where `i` is in riscv,isa on DT systems.
+ */
+ if (acpi_disabled) {
+ set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa);
+ set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa);
+ set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa);
+ set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa);
+ }
+
+ /*
* All "okay" hart should have same isa. Set HWCAP based on
* common capabilities of every "okay" hart, in case they don't
* have.
@@ -255,11 +340,14 @@ void __init riscv_fill_hwcap(void)
elf_hwcap = this_hwcap;
if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
- bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+ bitmap_copy(riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
else
- bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+ bitmap_and(riscv_isa, riscv_isa, isainfo->isa, RISCV_ISA_EXT_MAX);
}
+ if (!acpi_disabled && rhct)
+ acpi_put_table((struct acpi_table_header *)rhct);
+
/* We don't support systems with F but without D, so mask those out
* here. */
if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
@@ -267,6 +355,17 @@ void __init riscv_fill_hwcap(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
}
+ if (elf_hwcap & COMPAT_HWCAP_ISA_V) {
+ riscv_v_setup_vsize();
+ /*
+ * ISA string in device tree might have 'v' flag, but
+ * CONFIG_RISCV_ISA_V is disabled in kernel.
+ * Clear V flag in elf_hwcap if CONFIG_RISCV_ISA_V is disabled.
+ */
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_V))
+ elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
+ }
+
memset(print_str, 0, sizeof(print_str));
for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (riscv_isa[0] & BIT_MASK(i))
@@ -280,6 +379,18 @@ void __init riscv_fill_hwcap(void)
pr_info("riscv: ELF capabilities %s\n", print_str);
}
+unsigned long riscv_get_elf_hwcap(void)
+{
+ unsigned long hwcap;
+
+ hwcap = (elf_hwcap & ((1UL << RISCV_ISA_EXT_BASE) - 1));
+
+ if (!riscv_v_vstate_ctrl_user_allowed())
+ hwcap &= ~COMPAT_HWCAP_ISA_V;
+
+ return hwcap;
+}
+
#ifdef CONFIG_RISCV_ALTERNATIVE
/*
* Alternative patch sites consider 48 bits when determining when to patch
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 3fbb100bc9e4..143a2bb3e697 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -48,10 +48,10 @@ _save_context:
* Disable user-mode memory access as it should only be set in the
* actual user copy routines.
*
- * Disable the FPU to detect illegal usage of floating point in kernel
- * space.
+ * Disable the FPU/Vector to detect illegal usage of floating point
+ * or vector in kernel space.
*/
- li t0, SR_SUM | SR_FS
+ li t0, SR_SUM | SR_FS_VS
REG_L s0, TASK_TI_USER_SP(tp)
csrrc s1, CSR_STATUS, t0
@@ -348,6 +348,6 @@ SYM_CODE_END(excp_vect_table)
#ifndef CONFIG_MMU
SYM_CODE_START(__user_rt_sigreturn)
li a7, __NR_rt_sigreturn
- scall
+ ecall
SYM_CODE_END(__user_rt_sigreturn)
#endif
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 4bf6c449d78b..11c3b94c4534 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -140,10 +140,10 @@ secondary_start_sbi:
.option pop
/*
- * Disable FPU to detect illegal usage of
- * floating point in kernel space
+ * Disable FPU & VECTOR to detect illegal usage of
+ * floating point or vector in kernel space
*/
- li t0, SR_FS
+ li t0, SR_FS_VS
csrc CSR_STATUS, t0
/* Set trap vector to spin forever to help debug */
@@ -234,10 +234,10 @@ pmp_done:
.option pop
/*
- * Disable FPU to detect illegal usage of
- * floating point in kernel space
+ * Disable FPU & VECTOR to detect illegal usage of
+ * floating point or vector in kernel space
*/
- li t0, SR_FS
+ li t0, SR_FS_VS
csrc CSR_STATUS, t0
#ifdef CONFIG_RISCV_BOOT_SPINWAIT
@@ -301,6 +301,7 @@ clear_bss_done:
la tp, init_task
la sp, init_thread_union + THREAD_SIZE
XIP_FIXUP_OFFSET sp
+ addi sp, sp, -PT_SIZE_ON_STACK
#ifdef CONFIG_BUILTIN_DTB
la a0, __dtb_start
XIP_FIXUP_OFFSET a0
@@ -318,6 +319,7 @@ clear_bss_done:
/* Restore C environment */
la tp, init_task
la sp, init_thread_union + THREAD_SIZE
+ addi sp, sp, -PT_SIZE_ON_STACK
#ifdef CONFIG_KASAN
call kasan_early_init
@@ -392,7 +394,7 @@ ENTRY(reset_regs)
#ifdef CONFIG_FPU
csrr t0, CSR_MISA
andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
- beqz t0, .Lreset_regs_done
+ beqz t0, .Lreset_regs_done_fpu
li t1, SR_FS
csrs CSR_STATUS, t1
@@ -430,8 +432,31 @@ ENTRY(reset_regs)
fmv.s.x f31, zero
csrw fcsr, 0
/* note that the caller must clear SR_FS */
+.Lreset_regs_done_fpu:
#endif /* CONFIG_FPU */
-.Lreset_regs_done:
+
+#ifdef CONFIG_RISCV_ISA_V
+ csrr t0, CSR_MISA
+ li t1, COMPAT_HWCAP_ISA_V
+ and t0, t0, t1
+ beqz t0, .Lreset_regs_done_vector
+
+ /*
+ * Clear vector registers and reset vcsr
+ * VLMAX has a defined value, VLEN is a constant,
+ * and this form of vsetvli is defined to set vl to VLMAX.
+ */
+ li t1, SR_VS
+ csrs CSR_STATUS, t1
+ csrs CSR_VCSR, x0
+ vsetvli t1, x0, e8, m8, ta, ma
+ vmv.v.i v0, 0
+ vmv.v.i v8, 0
+ vmv.v.i v16, 0
+ vmv.v.i v24, 0
+ /* note that the caller must clear SR_VS */
+.Lreset_regs_done_vector:
+#endif /* CONFIG_RISCV_ISA_V */
ret
END(reset_regs)
#endif /* CONFIG_RISCV_M_MODE */
diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S
index effaf5ca5da0..d698dd7df637 100644
--- a/arch/riscv/kernel/hibernate-asm.S
+++ b/arch/riscv/kernel/hibernate-asm.S
@@ -28,7 +28,6 @@ ENTRY(__hibernate_cpu_resume)
REG_L a0, hibernate_cpu_context
- suspend_restore_csrs
suspend_restore_regs
/* Return zero value. */
@@ -50,7 +49,7 @@ ENTRY(hibernate_restore_image)
REG_L s4, restore_pblist
REG_L a1, relocated_restore_code
- jalr a1
+ jr a1
END(hibernate_restore_image)
/*
@@ -73,5 +72,5 @@ ENTRY(hibernate_core_restore_code)
REG_L s4, HIBERN_PBE_NEXT(s4)
bnez s4, .Lcopy
- jalr s2
+ jr s2
END(hibernate_core_restore_code)
diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
index 264b2dcdd67e..671b686c0158 100644
--- a/arch/riscv/kernel/hibernate.c
+++ b/arch/riscv/kernel/hibernate.c
@@ -80,7 +80,6 @@ int pfn_is_nosave(unsigned long pfn)
void notrace save_processor_state(void)
{
- WARN_ON(num_online_cpus() != 1);
}
void notrace restore_processor_state(void)
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index eb9a68a539e6..d0577cc6a081 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -11,6 +11,9 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/sbi.h>
+#include <asm/smp.h>
+#include <asm/softirq_stack.h>
+#include <asm/stacktrace.h>
static struct fwnode_handle *(*__get_intc_node)(void);
@@ -28,6 +31,70 @@ struct fwnode_handle *riscv_get_intc_hwnode(void)
}
EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode);
+#ifdef CONFIG_IRQ_STACKS
+#include <asm/irq_stack.h>
+
+DEFINE_PER_CPU(ulong *, irq_stack_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+ int cpu;
+ ulong *p;
+
+ for_each_possible_cpu(cpu) {
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ per_cpu(irq_stack_ptr, cpu) = p;
+ }
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(ulong [IRQ_STACK_SIZE/sizeof(ulong)], irq_stack);
+
+static void init_irq_stacks(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+void do_softirq_own_stack(void)
+{
+#ifdef CONFIG_IRQ_STACKS
+ if (on_thread_stack()) {
+ ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
+ + IRQ_STACK_SIZE/sizeof(ulong);
+ __asm__ __volatile(
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" ra, (sp) \n"
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" s0, (sp) \n"
+ "addi s0, sp, 2*"RISCV_SZPTR "\n"
+ "move sp, %[sp] \n"
+ "call __do_softirq \n"
+ "addi sp, s0, -2*"RISCV_SZPTR"\n"
+ REG_L" s0, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ REG_L" ra, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ :
+ : [sp] "r" (sp)
+ : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+ "memory");
+ } else
+#endif
+ __do_softirq();
+}
+#endif /* CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK */
+
+#else
+static void init_irq_stacks(void) {}
+#endif /* CONFIG_IRQ_STACKS */
+
int arch_show_interrupts(struct seq_file *p, int prec)
{
show_ipi_stats(p, prec);
@@ -36,6 +103,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
void __init init_IRQ(void)
{
+ init_irq_stacks();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 30102aadc4d7..8a6e5a9e842a 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -65,13 +65,8 @@ ENTRY(return_to_handler)
* So alternatively we check the *old* frame pointer position, that is, the
* value stored in -16(s0) on entry, and the s0 on return.
*/
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv t6, s0
-#endif
SAVE_RET_ABI_STATE
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv a0, t6
-#endif
+ mv a0, sp
call ftrace_return_to_handler
mv a2, a0
RESTORE_RET_ABI_STATE
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
index c976a21cd4bd..194f166b2cc4 100644
--- a/arch/riscv/kernel/probes/uprobes.c
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -67,6 +67,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR);
+ current->thread.bad_cause = utask->autask.saved_cause;
instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size);
@@ -102,6 +103,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
struct uprobe_task *utask = current->utask;
+ current->thread.bad_cause = utask->autask.saved_cause;
/*
* Task has received a fatal signal, so reset back to probbed
* address.
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index e2a060066730..e32d737e039f 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -24,6 +24,7 @@
#include <asm/switch_to.h>
#include <asm/thread_info.h>
#include <asm/cpuidle.h>
+#include <asm/vector.h>
register unsigned long gp_in_global __asm__("gp");
@@ -146,12 +147,29 @@ void flush_thread(void)
fstate_off(current, task_pt_regs(current));
memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
#endif
+#ifdef CONFIG_RISCV_ISA_V
+ /* Reset vector state */
+ riscv_v_vstate_ctrl_init(current);
+ riscv_v_vstate_off(task_pt_regs(current));
+ kfree(current->thread.vstate.datap);
+ memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+#endif
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+ /* Free the vector context of datap. */
+ if (has_vector())
+ kfree(tsk->thread.vstate.datap);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
fstate_save(src, task_pt_regs(src));
*dst = *src;
+ /* clear entire V context, including datap for a new task */
+ memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+
return 0;
}
@@ -176,6 +194,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
*childregs = *(current_pt_regs());
+ /* Turn off status.VS */
+ riscv_v_vstate_off(childregs);
if (usp) /* User fork */
childregs->sp = usp;
if (clone_flags & CLONE_SETTLS)
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 23c48b14a0e7..1d572cf3140f 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -7,6 +7,7 @@
* Copied from arch/tile/kernel/ptrace.c
*/
+#include <asm/vector.h>
#include <asm/ptrace.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
@@ -24,6 +25,9 @@ enum riscv_regset {
#ifdef CONFIG_FPU
REGSET_F,
#endif
+#ifdef CONFIG_RISCV_ISA_V
+ REGSET_V,
+#endif
};
static int riscv_gpr_get(struct task_struct *target,
@@ -80,6 +84,61 @@ static int riscv_fpr_set(struct task_struct *target,
}
#endif
+#ifdef CONFIG_RISCV_ISA_V
+static int riscv_vr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct __riscv_v_ext_state *vstate = &target->thread.vstate;
+
+ if (!riscv_v_vstate_query(task_pt_regs(target)))
+ return -EINVAL;
+
+ /*
+ * Ensure the vector registers have been saved to the memory before
+ * copying them to membuf.
+ */
+ if (target == current)
+ riscv_v_vstate_save(current, task_pt_regs(current));
+
+ /* Copy vector header from vstate. */
+ membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap));
+ membuf_zero(&to, sizeof(vstate->datap));
+
+ /* Copy all the vector registers from vstate. */
+ return membuf_write(&to, vstate->datap, riscv_v_vsize);
+}
+
+static int riscv_vr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret, size;
+ struct __riscv_v_ext_state *vstate = &target->thread.vstate;
+
+ if (!riscv_v_vstate_query(task_pt_regs(target)))
+ return -EINVAL;
+
+ /* Copy rest of the vstate except datap */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0,
+ offsetof(struct __riscv_v_ext_state, datap));
+ if (unlikely(ret))
+ return ret;
+
+ /* Skip copy datap. */
+ size = sizeof(vstate->datap);
+ count -= size;
+ ubuf += size;
+
+ /* Copy all the vector registers. */
+ pos = 0;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
+ 0, riscv_v_vsize);
+ return ret;
+}
+#endif
+
static const struct user_regset riscv_user_regset[] = {
[REGSET_X] = {
.core_note_type = NT_PRSTATUS,
@@ -99,6 +158,17 @@ static const struct user_regset riscv_user_regset[] = {
.set = riscv_fpr_set,
},
#endif
+#ifdef CONFIG_RISCV_ISA_V
+ [REGSET_V] = {
+ .core_note_type = NT_RISCV_VECTOR,
+ .align = 16,
+ .n = ((32 * RISCV_MAX_VLENB) +
+ sizeof(struct __riscv_v_ext_state)) / sizeof(__u32),
+ .size = sizeof(__u32),
+ .regset_get = riscv_vr_get,
+ .set = riscv_vr_set,
+ },
+#endif
};
static const struct user_regset_view riscv_user_native_view = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 36b026057503..971fe776e2f8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -8,6 +8,7 @@
* Nick Kossifidis <mick@ics.forth.gr>
*/
+#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/mm.h>
@@ -21,6 +22,7 @@
#include <linux/efi.h>
#include <linux/crash_dump.h>
+#include <asm/acpi.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpu_ops.h>
@@ -262,6 +264,8 @@ static void __init parse_dtb(void)
#endif
}
+extern void __init init_rt_signal_env(void);
+
void __init setup_arch(char **cmdline_p)
{
parse_dtb();
@@ -270,11 +274,16 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
early_ioremap_setup();
+ sbi_init();
jump_label_init();
parse_early_param();
efi_init();
paging_init();
+
+ /* Parse the ACPI tables for possible boot-time configuration */
+ acpi_boot_table_init();
+
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
@@ -283,7 +292,6 @@ void __init setup_arch(char **cmdline_p)
misc_mem_init();
init_resources();
- sbi_init();
#ifdef CONFIG_KASAN
kasan_init();
@@ -293,8 +301,12 @@ void __init setup_arch(char **cmdline_p)
setup_smp();
#endif
+ if (!acpi_disabled)
+ acpi_init_rintc_map();
+
riscv_init_cbo_blocksizes();
riscv_fill_hwcap();
+ init_rt_signal_env();
apply_boot_alternatives();
if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
riscv_isa_extension_available(NULL, ZICBOM))
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 9aff9d720590..180d951d3624 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -19,10 +19,14 @@
#include <asm/signal.h>
#include <asm/signal32.h>
#include <asm/switch_to.h>
+#include <asm/vector.h>
#include <asm/csr.h>
#include <asm/cacheflush.h>
+unsigned long signal_minsigstksz __ro_after_init;
+
extern u32 __user_rt_sigreturn[2];
+static size_t riscv_v_sc_size __ro_after_init;
#define DEBUG_SIG 0
@@ -40,26 +44,13 @@ static long restore_fp_state(struct pt_regs *regs,
{
long err;
struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
- size_t i;
err = __copy_from_user(&current->thread.fstate, state, sizeof(*state));
if (unlikely(err))
return err;
fstate_restore(current, regs);
-
- /* We support no other extension state at this time. */
- for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
- u32 value;
-
- err = __get_user(value, &sc_fpregs->q.reserved[i]);
- if (unlikely(err))
- break;
- if (value != 0)
- return -EINVAL;
- }
-
- return err;
+ return 0;
}
static long save_fp_state(struct pt_regs *regs,
@@ -67,52 +58,186 @@ static long save_fp_state(struct pt_regs *regs,
{
long err;
struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
- size_t i;
fstate_save(current, regs);
err = __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+ return err;
+}
+#else
+#define save_fp_state(task, regs) (0)
+#define restore_fp_state(task, regs) (0)
+#endif
+
+#ifdef CONFIG_RISCV_ISA_V
+
+static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
+{
+ struct __riscv_ctx_hdr __user *hdr;
+ struct __sc_riscv_v_state __user *state;
+ void __user *datap;
+ long err;
+
+ hdr = *sc_vec;
+ /* Place state to the user's signal context space after the hdr */
+ state = (struct __sc_riscv_v_state __user *)(hdr + 1);
+ /* Point datap right after the end of __sc_riscv_v_state */
+ datap = state + 1;
+
+ /* datap is designed to be 16 byte aligned for better performance */
+ WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
+
+ riscv_v_vstate_save(current, regs);
+ /* Copy everything of vstate but datap. */
+ err = __copy_to_user(&state->v_state, &current->thread.vstate,
+ offsetof(struct __riscv_v_ext_state, datap));
+ /* Copy the pointer datap itself. */
+ err |= __put_user(datap, &state->v_state.datap);
+ /* Copy the whole vector content to user space datap. */
+ err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize);
+ /* Copy magic to the user space after saving all vector conetext */
+ err |= __put_user(RISCV_V_MAGIC, &hdr->magic);
+ err |= __put_user(riscv_v_sc_size, &hdr->size);
if (unlikely(err))
return err;
- /* We support no other extension state at this time. */
- for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
- err = __put_user(0, &sc_fpregs->q.reserved[i]);
- if (unlikely(err))
- break;
- }
+ /* Only progress the sv_vec if everything has done successfully */
+ *sc_vec += riscv_v_sc_size;
+ return 0;
+}
+
+/*
+ * Restore Vector extension context from the user's signal frame. This function
+ * assumes a valid extension header. So magic and size checking must be done by
+ * the caller.
+ */
+static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
+{
+ long err;
+ struct __sc_riscv_v_state __user *state = sc_vec;
+ void __user *datap;
+
+ /* Copy everything of __sc_riscv_v_state except datap. */
+ err = __copy_from_user(&current->thread.vstate, &state->v_state,
+ offsetof(struct __riscv_v_ext_state, datap));
+ if (unlikely(err))
+ return err;
+
+ /* Copy the pointer datap itself. */
+ err = __get_user(datap, &state->v_state.datap);
+ if (unlikely(err))
+ return err;
+ /*
+ * Copy the whole vector content from user space datap. Use
+ * copy_from_user to prevent information leak.
+ */
+ err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
+ if (unlikely(err))
+ return err;
+
+ riscv_v_vstate_restore(current, regs);
return err;
}
#else
-#define save_fp_state(task, regs) (0)
-#define restore_fp_state(task, regs) (0)
+#define save_v_state(task, regs) (0)
+#define __restore_v_state(task, regs) (0)
#endif
static long restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc)
{
+ void __user *sc_ext_ptr = &sc->sc_extdesc.hdr;
+ __u32 rsvd;
long err;
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
+ if (unlikely(err))
+ return err;
+
/* Restore the floating-point state. */
- if (has_fpu())
- err |= restore_fp_state(regs, &sc->sc_fpregs);
+ if (has_fpu()) {
+ err = restore_fp_state(regs, &sc->sc_fpregs);
+ if (unlikely(err))
+ return err;
+ }
+
+ /* Check the reserved word before extensions parsing */
+ err = __get_user(rsvd, &sc->sc_extdesc.reserved);
+ if (unlikely(err))
+ return err;
+ if (unlikely(rsvd))
+ return -EINVAL;
+
+ while (!err) {
+ __u32 magic, size;
+ struct __riscv_ctx_hdr __user *head = sc_ext_ptr;
+
+ err |= __get_user(magic, &head->magic);
+ err |= __get_user(size, &head->size);
+ if (unlikely(err))
+ return err;
+
+ sc_ext_ptr += sizeof(*head);
+ switch (magic) {
+ case END_MAGIC:
+ if (size != END_HDR_SIZE)
+ return -EINVAL;
+
+ return 0;
+ case RISCV_V_MAGIC:
+ if (!has_vector() || !riscv_v_vstate_query(regs) ||
+ size != riscv_v_sc_size)
+ return -EINVAL;
+
+ err = __restore_v_state(regs, sc_ext_ptr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ sc_ext_ptr = (void __user *)head + size;
+ }
return err;
}
+static size_t get_rt_frame_size(bool cal_all)
+{
+ struct rt_sigframe __user *frame;
+ size_t frame_size;
+ size_t total_context_size = 0;
+
+ frame_size = sizeof(*frame);
+
+ if (has_vector()) {
+ if (cal_all || riscv_v_vstate_query(task_pt_regs(current)))
+ total_context_size += riscv_v_sc_size;
+ }
+ /*
+ * Preserved a __riscv_ctx_hdr for END signal context header if an
+ * extension uses __riscv_extra_ext_header
+ */
+ if (total_context_size)
+ total_context_size += sizeof(struct __riscv_ctx_hdr);
+
+ frame_size += total_context_size;
+
+ frame_size = round_up(frame_size, 16);
+ return frame_size;
+}
+
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
struct task_struct *task;
sigset_t set;
+ size_t frame_size = get_rt_frame_size(false);
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
frame = (struct rt_sigframe __user *)regs->sp;
- if (!access_ok(frame, sizeof(*frame)))
+ if (!access_ok(frame, frame_size))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
@@ -146,12 +271,23 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
struct pt_regs *regs)
{
struct sigcontext __user *sc = &frame->uc.uc_mcontext;
+ struct __riscv_ctx_hdr __user *sc_ext_ptr = &sc->sc_extdesc.hdr;
long err;
+
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
/* Save the floating-point state. */
if (has_fpu())
err |= save_fp_state(regs, &sc->sc_fpregs);
+ /* Save the vector state. */
+ if (has_vector() && riscv_v_vstate_query(regs))
+ err |= save_v_state(regs, (void __user **)&sc_ext_ptr);
+ /* Write zero to fp-reserved space and check it on restore_sigcontext */
+ err |= __put_user(0, &sc->sc_extdesc.reserved);
+ /* And put END __riscv_ctx_hdr at the end. */
+ err |= __put_user(END_MAGIC, &sc_ext_ptr->magic);
+ err |= __put_user(END_HDR_SIZE, &sc_ext_ptr->size);
+
return err;
}
@@ -175,6 +311,13 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
/* Align the stack frame. */
sp &= ~0xfUL;
+ /*
+ * Fail if the size of the altstack is not large enough for the
+ * sigframe construction.
+ */
+ if (current->sas_ss_size && sp < current->sas_ss_sp)
+ return (void __user __force *)-1UL;
+
return (void __user *)sp;
}
@@ -184,9 +327,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
struct rt_sigframe __user *frame;
long err = 0;
unsigned long __maybe_unused addr;
+ size_t frame_size = get_rt_frame_size(false);
- frame = get_sigframe(ksig, regs, sizeof(*frame));
- if (!access_ok(frame, sizeof(*frame)))
+ frame = get_sigframe(ksig, regs, frame_size);
+ if (!access_ok(frame, frame_size))
return -EFAULT;
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
@@ -319,3 +463,23 @@ void arch_do_signal_or_restart(struct pt_regs *regs)
*/
restore_saved_sigmask();
}
+
+void init_rt_signal_env(void);
+void __init init_rt_signal_env(void)
+{
+ riscv_v_sc_size = sizeof(struct __riscv_ctx_hdr) +
+ sizeof(struct __sc_riscv_v_state) + riscv_v_vsize;
+ /*
+ * Determine the stack space required for guaranteed signal delivery.
+ * The signal_minsigstksz will be populated into the AT_MINSIGSTKSZ entry
+ * in the auxiliary array at process startup.
+ */
+ signal_minsigstksz = get_rt_frame_size(true);
+}
+
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+bool sigaltstack_size_valid(size_t ss_size)
+{
+ return ss_size > get_rt_frame_size(false);
+}
+#endif /* CONFIG_DYNAMIC_SIGFRAME */
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 23e533766a49..85bbce0f758c 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -58,7 +58,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
if (cpuid_to_hartid_map(i) == hartid)
return i;
- pr_err("Couldn't find cpu id for hartid [%lu]\n", hartid);
return -ENOENT;
}
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 445a4efee267..f4d6acb38dd0 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -8,6 +8,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/arch_topology.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -31,6 +32,8 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/smp.h>
+#include <uapi/asm/hwcap.h>
+#include <asm/vector.h>
#include "head.h"
@@ -70,7 +73,73 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-void __init setup_smp(void)
+#ifdef CONFIG_ACPI
+static unsigned int cpu_count = 1;
+
+static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const unsigned long end)
+{
+ unsigned long hart;
+ static bool found_boot_cpu;
+ struct acpi_madt_rintc *processor = (struct acpi_madt_rintc *)header;
+
+ /*
+ * Each RINTC structure in MADT will have a flag. If ACPI_MADT_ENABLED
+ * bit in the flag is not enabled, it means OS should not try to enable
+ * the cpu to which RINTC belongs.
+ */
+ if (!(processor->flags & ACPI_MADT_ENABLED))
+ return 0;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(&header->common);
+
+ hart = processor->hart_id;
+ if (hart == INVALID_HARTID) {
+ pr_warn("Invalid hartid\n");
+ return 0;
+ }
+
+ if (hart == cpuid_to_hartid_map(0)) {
+ BUG_ON(found_boot_cpu);
+ found_boot_cpu = true;
+ early_map_cpu_to_node(0, acpi_numa_get_nid(cpu_count));
+ return 0;
+ }
+
+ if (cpu_count >= NR_CPUS) {
+ pr_warn("NR_CPUS is too small for the number of ACPI tables.\n");
+ return 0;
+ }
+
+ cpuid_to_hartid_map(cpu_count) = hart;
+ early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count));
+ cpu_count++;
+
+ return 0;
+}
+
+static void __init acpi_parse_and_init_cpus(void)
+{
+ int cpuid;
+
+ cpu_set_ops(0);
+
+ acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_rintc, 0);
+
+ for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) {
+ if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) {
+ cpu_set_ops(cpuid);
+ set_cpu_possible(cpuid, true);
+ }
+ }
+}
+#else
+#define acpi_parse_and_init_cpus(...) do { } while (0)
+#endif
+
+static void __init of_parse_and_init_cpus(void)
{
struct device_node *dn;
unsigned long hart;
@@ -81,7 +150,7 @@ void __init setup_smp(void)
cpu_set_ops(0);
for_each_of_cpu_node(dn) {
- rc = riscv_of_processor_hartid(dn, &hart);
+ rc = riscv_early_of_processor_hartid(dn, &hart);
if (rc < 0)
continue;
@@ -116,6 +185,14 @@ void __init setup_smp(void)
}
}
+void __init setup_smp(void)
+{
+ if (acpi_disabled)
+ of_parse_and_init_cpus();
+ else
+ acpi_parse_and_init_cpus();
+}
+
static int start_secondary_cpu(int cpu, struct task_struct *tidle)
{
if (cpu_ops[cpu]->cpu_start)
@@ -161,14 +238,20 @@ asmlinkage __visible void smp_callin(void)
mmgrab(mm);
current->active_mm = mm;
- riscv_ipi_enable();
-
store_cpu_topology(curr_cpuid);
notify_cpu_starting(curr_cpuid);
+
+ riscv_ipi_enable();
+
numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, 1);
probe_vendor_features(curr_cpuid);
+ if (has_vector()) {
+ if (riscv_v_setup_vsize())
+ elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
+ }
+
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 5db29683ebee..26ef5526bfb4 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -10,6 +10,7 @@
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
#include <asm/sbi.h>
+#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -121,6 +122,49 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
pair->value = id;
}
+static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ int cpu;
+ u64 missing = 0;
+
+ pair->value = 0;
+ if (has_fpu())
+ pair->value |= RISCV_HWPROBE_IMA_FD;
+
+ if (riscv_isa_extension_available(NULL, c))
+ pair->value |= RISCV_HWPROBE_IMA_C;
+
+ if (has_vector())
+ pair->value |= RISCV_HWPROBE_IMA_V;
+
+ /*
+ * Loop through and record extensions that 1) anyone has, and 2) anyone
+ * doesn't have.
+ */
+ for_each_cpu(cpu, cpus) {
+ struct riscv_isainfo *isainfo = &hart_isa[cpu];
+
+ if (riscv_isa_extension_available(isainfo->isa, ZBA))
+ pair->value |= RISCV_HWPROBE_EXT_ZBA;
+ else
+ missing |= RISCV_HWPROBE_EXT_ZBA;
+
+ if (riscv_isa_extension_available(isainfo->isa, ZBB))
+ pair->value |= RISCV_HWPROBE_EXT_ZBB;
+ else
+ missing |= RISCV_HWPROBE_EXT_ZBB;
+
+ if (riscv_isa_extension_available(isainfo->isa, ZBS))
+ pair->value |= RISCV_HWPROBE_EXT_ZBS;
+ else
+ missing |= RISCV_HWPROBE_EXT_ZBS;
+ }
+
+ /* Now turn off reporting features if any CPU is missing it. */
+ pair->value &= ~missing;
+}
+
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
int cpu;
@@ -164,13 +208,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
break;
case RISCV_HWPROBE_KEY_IMA_EXT_0:
- pair->value = 0;
- if (has_fpu())
- pair->value |= RISCV_HWPROBE_IMA_FD;
-
- if (riscv_isa_extension_available(NULL, c))
- pair->value |= RISCV_HWPROBE_IMA_C;
-
+ hwprobe_isa_ext0(pair, cpus);
break;
case RISCV_HWPROBE_KEY_CPUPERF_0:
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index babaf3b48ba8..23641e82a9df 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -4,6 +4,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/of_clk.h>
#include <linux/clockchips.h>
#include <linux/clocksource.h>
@@ -18,17 +19,29 @@ EXPORT_SYMBOL_GPL(riscv_timebase);
void __init time_init(void)
{
struct device_node *cpu;
+ struct acpi_table_rhct *rhct;
+ acpi_status status;
u32 prop;
- cpu = of_find_node_by_path("/cpus");
- if (!cpu || of_property_read_u32(cpu, "timebase-frequency", &prop))
- panic(KERN_WARNING "RISC-V system with no 'timebase-frequency' in DTS\n");
- of_node_put(cpu);
- riscv_timebase = prop;
+ if (acpi_disabled) {
+ cpu = of_find_node_by_path("/cpus");
+ if (!cpu || of_property_read_u32(cpu, "timebase-frequency", &prop))
+ panic("RISC-V system with no 'timebase-frequency' in DTS\n");
+
+ of_node_put(cpu);
+ riscv_timebase = prop;
+ of_clk_init(NULL);
+ } else {
+ status = acpi_get_table(ACPI_SIG_RHCT, 0, (struct acpi_table_header **)&rhct);
+ if (ACPI_FAILURE(status))
+ panic("RISC-V ACPI system with no RHCT table\n");
+
+ riscv_timebase = rhct->time_base_freq;
+ acpi_put_table((struct acpi_table_header *)rhct);
+ }
lpj_fine = riscv_timebase / HZ;
- of_clk_init(NULL);
timer_probe();
tick_setup_hrtimer_broadcast();
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 8c258b78c925..f910dfccbf5d 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -26,6 +26,8 @@
#include <asm/ptrace.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
+#include <asm/vector.h>
+#include <asm/irq_stack.h>
int show_unhandled_signals = 1;
@@ -145,8 +147,35 @@ DO_ERROR_INFO(do_trap_insn_misaligned,
SIGBUS, BUS_ADRALN, "instruction address misaligned");
DO_ERROR_INFO(do_trap_insn_fault,
SIGSEGV, SEGV_ACCERR, "instruction access fault");
-DO_ERROR_INFO(do_trap_insn_illegal,
- SIGILL, ILL_ILLOPC, "illegal instruction");
+
+asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *regs)
+{
+ bool handled;
+
+ if (user_mode(regs)) {
+ irqentry_enter_from_user_mode(regs);
+
+ local_irq_enable();
+
+ handled = riscv_v_first_use_handler(regs);
+
+ local_irq_disable();
+
+ if (!handled)
+ do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
+ "Oops - illegal instruction");
+
+ irqentry_exit_to_user_mode(regs);
+ } else {
+ irqentry_state_t state = irqentry_nmi_enter(regs);
+
+ do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
+ "Oops - illegal instruction");
+
+ irqentry_nmi_exit(regs, state);
+ }
+}
+
DO_ERROR_INFO(do_trap_load_fault,
SIGSEGV, SEGV_ACCERR, "load access fault");
#ifndef CONFIG_RISCV_M_MODE
@@ -273,6 +302,8 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
regs->epc += 4;
regs->orig_a0 = regs->a0;
+ riscv_v_vstate_discard(regs);
+
syscall = syscall_enter_from_user_mode(regs, syscall);
if (syscall < NR_syscalls)
@@ -305,16 +336,46 @@ asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
}
#endif
-asmlinkage __visible noinstr void do_irq(struct pt_regs *regs)
+static void noinstr handle_riscv_irq(struct pt_regs *regs)
{
struct pt_regs *old_regs;
- irqentry_state_t state = irqentry_enter(regs);
irq_enter_rcu();
old_regs = set_irq_regs(regs);
handle_arch_irq(regs);
set_irq_regs(old_regs);
irq_exit_rcu();
+}
+
+asmlinkage void noinstr do_irq(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+#ifdef CONFIG_IRQ_STACKS
+ if (on_thread_stack()) {
+ ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
+ + IRQ_STACK_SIZE/sizeof(ulong);
+ __asm__ __volatile(
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" ra, (sp) \n"
+ "addi sp, sp, -"RISCV_SZPTR "\n"
+ REG_S" s0, (sp) \n"
+ "addi s0, sp, 2*"RISCV_SZPTR "\n"
+ "move sp, %[sp] \n"
+ "move a0, %[regs] \n"
+ "call handle_riscv_irq \n"
+ "addi sp, s0, -2*"RISCV_SZPTR"\n"
+ REG_L" s0, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ REG_L" ra, (sp) \n"
+ "addi sp, sp, "RISCV_SZPTR "\n"
+ :
+ : [sp] "r" (sp), [regs] "r" (regs)
+ : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+ "memory");
+ } else
+#endif
+ handle_riscv_irq(regs);
irqentry_exit(regs, state);
}
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 9a68e7eaae4d..2cf76218a5bd 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -15,6 +15,7 @@
#include <asm/vdso.h>
#include <linux/time_namespace.h>
#include <vdso/datapage.h>
+#include <vdso/vsyscall.h>
enum vvar_pages {
VVAR_DATA_PAGE_OFFSET,
diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S
index 0573705eac76..10438c7c626a 100644
--- a/arch/riscv/kernel/vdso/rt_sigreturn.S
+++ b/arch/riscv/kernel/vdso/rt_sigreturn.S
@@ -11,6 +11,6 @@ ENTRY(__vdso_rt_sigreturn)
.cfi_startproc
.cfi_signal_frame
li a7, __NR_rt_sigreturn
- scall
+ ecall
.cfi_endproc
ENDPROC(__vdso_rt_sigreturn)
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
new file mode 100644
index 000000000000..8d92fb6c522c
--- /dev/null
+++ b/arch/riscv/kernel/vector.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/export.h>
+#include <linux/sched/signal.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/prctl.h>
+
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/insn.h>
+#include <asm/vector.h>
+#include <asm/csr.h>
+#include <asm/elf.h>
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+
+static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+
+unsigned long riscv_v_vsize __read_mostly;
+EXPORT_SYMBOL_GPL(riscv_v_vsize);
+
+int riscv_v_setup_vsize(void)
+{
+ unsigned long this_vsize;
+
+ /* There are 32 vector registers with vlenb length. */
+ riscv_v_enable();
+ this_vsize = csr_read(CSR_VLENB) * 32;
+ riscv_v_disable();
+
+ if (!riscv_v_vsize) {
+ riscv_v_vsize = this_vsize;
+ return 0;
+ }
+
+ if (riscv_v_vsize != this_vsize) {
+ WARN(1, "RISCV_ISA_V only supports one vlenb on SMP systems");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool insn_is_vector(u32 insn_buf)
+{
+ u32 opcode = insn_buf & __INSN_OPCODE_MASK;
+ u32 width, csr;
+
+ /*
+ * All V-related instructions, including CSR operations are 4-Byte. So,
+ * do not handle if the instruction length is not 4-Byte.
+ */
+ if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
+ return false;
+
+ switch (opcode) {
+ case RVV_OPCODE_VECTOR:
+ return true;
+ case RVV_OPCODE_VL:
+ case RVV_OPCODE_VS:
+ width = RVV_EXRACT_VL_VS_WIDTH(insn_buf);
+ if (width == RVV_VL_VS_WIDTH_8 || width == RVV_VL_VS_WIDTH_16 ||
+ width == RVV_VL_VS_WIDTH_32 || width == RVV_VL_VS_WIDTH_64)
+ return true;
+
+ break;
+ case RVG_OPCODE_SYSTEM:
+ csr = RVG_EXTRACT_SYSTEM_CSR(insn_buf);
+ if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
+ (csr >= CSR_VL && csr <= CSR_VLENB))
+ return true;
+ }
+
+ return false;
+}
+
+static int riscv_v_thread_zalloc(void)
+{
+ void *datap;
+
+ datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+ if (!datap)
+ return -ENOMEM;
+
+ current->thread.vstate.datap = datap;
+ memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
+ datap));
+ return 0;
+}
+
+#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
+#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
+#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
+#define VSTATE_CTRL_GET_INHERIT(x) (!!((x) & PR_RISCV_V_VSTATE_CTRL_INHERIT))
+static inline int riscv_v_ctrl_get_cur(struct task_struct *tsk)
+{
+ return VSTATE_CTRL_GET_CUR(tsk->thread.vstate_ctrl);
+}
+
+static inline int riscv_v_ctrl_get_next(struct task_struct *tsk)
+{
+ return VSTATE_CTRL_GET_NEXT(tsk->thread.vstate_ctrl);
+}
+
+static inline bool riscv_v_ctrl_test_inherit(struct task_struct *tsk)
+{
+ return VSTATE_CTRL_GET_INHERIT(tsk->thread.vstate_ctrl);
+}
+
+static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
+ bool inherit)
+{
+ unsigned long ctrl;
+
+ ctrl = cur & PR_RISCV_V_VSTATE_CTRL_CUR_MASK;
+ ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
+ if (inherit)
+ ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ tsk->thread.vstate_ctrl = ctrl;
+}
+
+bool riscv_v_vstate_ctrl_user_allowed(void)
+{
+ return riscv_v_ctrl_get_cur(current) == PR_RISCV_V_VSTATE_CTRL_ON;
+}
+EXPORT_SYMBOL_GPL(riscv_v_vstate_ctrl_user_allowed);
+
+bool riscv_v_first_use_handler(struct pt_regs *regs)
+{
+ u32 __user *epc = (u32 __user *)regs->epc;
+ u32 insn = (u32)regs->badaddr;
+
+ /* Do not handle if V is not supported, or disabled */
+ if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
+ return false;
+
+ /* If V has been enabled then it is not the first-use trap */
+ if (riscv_v_vstate_query(regs))
+ return false;
+
+ /* Get the instruction */
+ if (!insn) {
+ if (__get_user(insn, epc))
+ return false;
+ }
+
+ /* Filter out non-V instructions */
+ if (!insn_is_vector(insn))
+ return false;
+
+ /* Sanity check. datap should be null by the time of the first-use trap */
+ WARN_ON(current->thread.vstate.datap);
+
+ /*
+ * Now we sure that this is a V instruction. And it executes in the
+ * context where VS has been off. So, try to allocate the user's V
+ * context and resume execution.
+ */
+ if (riscv_v_thread_zalloc()) {
+ force_sig(SIGBUS);
+ return true;
+ }
+ riscv_v_vstate_on(regs);
+ riscv_v_vstate_restore(current, regs);
+ return true;
+}
+
+void riscv_v_vstate_ctrl_init(struct task_struct *tsk)
+{
+ bool inherit;
+ int cur, next;
+
+ if (!has_vector())
+ return;
+
+ next = riscv_v_ctrl_get_next(tsk);
+ if (!next) {
+ if (READ_ONCE(riscv_v_implicit_uacc))
+ cur = PR_RISCV_V_VSTATE_CTRL_ON;
+ else
+ cur = PR_RISCV_V_VSTATE_CTRL_OFF;
+ } else {
+ cur = next;
+ }
+ /* Clear next mask if inherit-bit is not set */
+ inherit = riscv_v_ctrl_test_inherit(tsk);
+ if (!inherit)
+ next = PR_RISCV_V_VSTATE_CTRL_DEFAULT;
+
+ riscv_v_ctrl_set(tsk, cur, next, inherit);
+}
+
+long riscv_v_vstate_ctrl_get_current(void)
+{
+ if (!has_vector())
+ return -EINVAL;
+
+ return current->thread.vstate_ctrl & PR_RISCV_V_VSTATE_CTRL_MASK;
+}
+
+long riscv_v_vstate_ctrl_set_current(unsigned long arg)
+{
+ bool inherit;
+ int cur, next;
+
+ if (!has_vector())
+ return -EINVAL;
+
+ if (arg & ~PR_RISCV_V_VSTATE_CTRL_MASK)
+ return -EINVAL;
+
+ cur = VSTATE_CTRL_GET_CUR(arg);
+ switch (cur) {
+ case PR_RISCV_V_VSTATE_CTRL_OFF:
+ /* Do not allow user to turn off V if current is not off */
+ if (riscv_v_ctrl_get_cur(current) != PR_RISCV_V_VSTATE_CTRL_OFF)
+ return -EPERM;
+
+ break;
+ case PR_RISCV_V_VSTATE_CTRL_ON:
+ break;
+ case PR_RISCV_V_VSTATE_CTRL_DEFAULT:
+ cur = riscv_v_ctrl_get_cur(current);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ next = VSTATE_CTRL_GET_NEXT(arg);
+ inherit = VSTATE_CTRL_GET_INHERIT(arg);
+ switch (next) {
+ case PR_RISCV_V_VSTATE_CTRL_DEFAULT:
+ case PR_RISCV_V_VSTATE_CTRL_OFF:
+ case PR_RISCV_V_VSTATE_CTRL_ON:
+ riscv_v_ctrl_set(current, cur, next, inherit);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_SYSCTL
+
+static struct ctl_table riscv_v_default_vstate_table[] = {
+ {
+ .procname = "riscv_v_default_allow",
+ .data = &riscv_v_implicit_uacc,
+ .maxlen = sizeof(riscv_v_implicit_uacc),
+ .mode = 0644,
+ .proc_handler = proc_dobool,
+ },
+ { }
+};
+
+static int __init riscv_v_sysctl_init(void)
+{
+ if (has_vector())
+ if (!register_sysctl("abi", riscv_v_default_vstate_table))
+ return -EINVAL;
+ return 0;
+}
+
+#else /* ! CONFIG_SYSCTL */
+static int __init riscv_v_sysctl_init(void) { return 0; }
+#endif /* ! CONFIG_SYSCTL */
+
+static int riscv_v_init(void)
+{
+ return riscv_v_sysctl_init();
+}
+core_initcall(riscv_v_init);
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index eab9edc3b631..50767647fbc6 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -98,12 +98,6 @@ SECTIONS
__soc_builtin_dtb_table_end = .;
}
- . = ALIGN(8);
- .alternative : {
- __alt_start = .;
- *(.alternative)
- __alt_end = .;
- }
__init_end = .;
. = ALIGN(16);
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index e5f9f4677bbf..492dd4b8f3d6 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -85,11 +85,11 @@ SECTIONS
INIT_DATA_SECTION(16)
.init.pi : {
- *(.init.pi*)
+ KEEP(*(.init.pi*))
}
.init.bss : {
- *(.init.bss) /* from the EFI stub */
+ KEEP(*(.init.bss*)) /* from the EFI stub */
}
.exit.data :
{
@@ -112,7 +112,7 @@ SECTIONS
. = ALIGN(8);
.alternative : {
__alt_start = .;
- *(.alternative)
+ KEEP(*(.alternative))
__alt_end = .;
}
__init_end = .;
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 28891e583259..dfc237d7875b 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -21,6 +21,10 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_MSI
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 8031b8912a0d..fee0671e2dc1 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -17,6 +17,7 @@ kvm-y += mmu.o
kvm-y += vcpu.o
kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
+kvm-y += vcpu_vector.o
kvm-y += vcpu_insn.o
kvm-y += vcpu_switch.o
kvm-y += vcpu_sbi.o
@@ -27,3 +28,6 @@ kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
+kvm-y += aia_device.o
+kvm-y += aia_aplic.o
+kvm-y += aia_imsic.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index 4f1286fc7f17..585a3b42c52c 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -8,11 +8,49 @@
*/
#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
#include <asm/hwcap.h>
+#include <asm/kvm_aia_imsic.h>
+struct aia_hgei_control {
+ raw_spinlock_t lock;
+ unsigned long free_bitmap;
+ struct kvm_vcpu *owners[BITS_PER_LONG];
+};
+static DEFINE_PER_CPU(struct aia_hgei_control, aia_hgei);
+static int hgei_parent_irq;
+
+unsigned int kvm_riscv_aia_nr_hgei;
+unsigned int kvm_riscv_aia_max_ids;
DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available);
+static int aia_find_hgei(struct kvm_vcpu *owner)
+{
+ int i, hgei;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ hgei = -1;
+ for (i = 1; i <= kvm_riscv_aia_nr_hgei; i++) {
+ if (hgctrl->owners[i] == owner) {
+ hgei = i;
+ break;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return hgei;
+}
+
static void aia_set_hvictl(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -56,6 +94,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
{
+ int hgei;
unsigned long seip;
if (!kvm_riscv_aia_available())
@@ -74,6 +113,10 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip)
return false;
+ hgei = aia_find_hgei(vcpu);
+ if (hgei > 0)
+ return !!(csr_read(CSR_HGEIP) & BIT(hgei));
+
return false;
}
@@ -323,8 +366,6 @@ static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel,
return KVM_INSN_CONTINUE_NEXT_SEPC;
}
-#define IMSIC_FIRST 0x70
-#define IMSIC_LAST 0xff
int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
unsigned long *val, unsigned long new_val,
unsigned long wr_mask)
@@ -348,6 +389,143 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
return KVM_INSN_EXIT_TO_USER_SPACE;
}
+int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
+ void __iomem **hgei_va, phys_addr_t *hgei_pa)
+{
+ int ret = -ENOENT;
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return -ENODEV;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgctrl->free_bitmap) {
+ ret = __ffs(hgctrl->free_bitmap);
+ hgctrl->free_bitmap &= ~BIT(ret);
+ hgctrl->owners[ret] = owner;
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* TODO: To be updated later by AIA IMSIC HW guest file support */
+ if (hgei_va)
+ *hgei_va = NULL;
+ if (hgei_pa)
+ *hgei_pa = 0;
+
+ return ret;
+}
+
+void kvm_riscv_aia_free_hgei(int cpu, int hgei)
+{
+ unsigned long flags;
+ struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+
+ if (!kvm_riscv_aia_available() || !hgctrl)
+ return;
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ if (hgei > 0 && hgei <= kvm_riscv_aia_nr_hgei) {
+ if (!(hgctrl->free_bitmap & BIT(hgei))) {
+ hgctrl->free_bitmap |= BIT(hgei);
+ hgctrl->owners[hgei] = NULL;
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+}
+
+void kvm_riscv_aia_wakeon_hgei(struct kvm_vcpu *owner, bool enable)
+{
+ int hgei;
+
+ if (!kvm_riscv_aia_available())
+ return;
+
+ hgei = aia_find_hgei(owner);
+ if (hgei > 0) {
+ if (enable)
+ csr_set(CSR_HGEIE, BIT(hgei));
+ else
+ csr_clear(CSR_HGEIE, BIT(hgei));
+ }
+}
+
+static irqreturn_t hgei_interrupt(int irq, void *dev_id)
+{
+ int i;
+ unsigned long hgei_mask, flags;
+ struct aia_hgei_control *hgctrl = get_cpu_ptr(&aia_hgei);
+
+ hgei_mask = csr_read(CSR_HGEIP) & csr_read(CSR_HGEIE);
+ csr_clear(CSR_HGEIE, hgei_mask);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for_each_set_bit(i, &hgei_mask, BITS_PER_LONG) {
+ if (hgctrl->owners[i])
+ kvm_vcpu_kick(hgctrl->owners[i]);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
+ return IRQ_HANDLED;
+}
+
+static int aia_hgei_init(void)
+{
+ int cpu, rc;
+ struct irq_domain *domain;
+ struct aia_hgei_control *hgctrl;
+
+ /* Initialize per-CPU guest external interrupt line management */
+ for_each_possible_cpu(cpu) {
+ hgctrl = per_cpu_ptr(&aia_hgei, cpu);
+ raw_spin_lock_init(&hgctrl->lock);
+ if (kvm_riscv_aia_nr_hgei) {
+ hgctrl->free_bitmap =
+ BIT(kvm_riscv_aia_nr_hgei + 1) - 1;
+ hgctrl->free_bitmap &= ~BIT(0);
+ } else
+ hgctrl->free_bitmap = 0;
+ }
+
+ /* Find INTC irq domain */
+ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
+ DOMAIN_BUS_ANY);
+ if (!domain) {
+ kvm_err("unable to find INTC domain\n");
+ return -ENOENT;
+ }
+
+ /* Map per-CPU SGEI interrupt from INTC domain */
+ hgei_parent_irq = irq_create_mapping(domain, IRQ_S_GEXT);
+ if (!hgei_parent_irq) {
+ kvm_err("unable to map SGEI IRQ\n");
+ return -ENOMEM;
+ }
+
+ /* Request per-CPU SGEI interrupt */
+ rc = request_percpu_irq(hgei_parent_irq, hgei_interrupt,
+ "riscv-kvm", &aia_hgei);
+ if (rc) {
+ kvm_err("failed to request SGEI IRQ\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void aia_hgei_exit(void)
+{
+ /* Free per-CPU SGEI interrupt */
+ free_percpu_irq(hgei_parent_irq, &aia_hgei);
+}
+
void kvm_riscv_aia_enable(void)
{
if (!kvm_riscv_aia_available())
@@ -362,21 +540,105 @@ void kvm_riscv_aia_enable(void)
csr_write(CSR_HVIPRIO1H, 0x0);
csr_write(CSR_HVIPRIO2H, 0x0);
#endif
+
+ /* Enable per-CPU SGEI interrupt */
+ enable_percpu_irq(hgei_parent_irq,
+ irq_get_trigger_type(hgei_parent_irq));
+ csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
}
void kvm_riscv_aia_disable(void)
{
+ int i;
+ unsigned long flags;
+ struct kvm_vcpu *vcpu;
+ struct aia_hgei_control *hgctrl;
+
if (!kvm_riscv_aia_available())
return;
+ hgctrl = get_cpu_ptr(&aia_hgei);
+
+ /* Disable per-CPU SGEI interrupt */
+ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
+ disable_percpu_irq(hgei_parent_irq);
aia_set_hvictl(false);
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+
+ for (i = 0; i <= kvm_riscv_aia_nr_hgei; i++) {
+ vcpu = hgctrl->owners[i];
+ if (!vcpu)
+ continue;
+
+ /*
+ * We release hgctrl->lock before notifying IMSIC
+ * so that we don't have lock ordering issues.
+ */
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ /* Notify IMSIC */
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ /*
+ * Wakeup VCPU if it was blocked so that it can
+ * run on other HARTs
+ */
+ if (csr_read(CSR_HGEIE) & BIT(i)) {
+ csr_clear(CSR_HGEIE, BIT(i));
+ kvm_vcpu_kick(vcpu);
+ }
+
+ raw_spin_lock_irqsave(&hgctrl->lock, flags);
+ }
+
+ raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
+
+ put_cpu_ptr(&aia_hgei);
}
int kvm_riscv_aia_init(void)
{
+ int rc;
+
if (!riscv_isa_extension_available(NULL, SxAIA))
return -ENODEV;
+ /* Figure-out number of bits in HGEIE */
+ csr_write(CSR_HGEIE, -1UL);
+ kvm_riscv_aia_nr_hgei = fls_long(csr_read(CSR_HGEIE));
+ csr_write(CSR_HGEIE, 0);
+ if (kvm_riscv_aia_nr_hgei)
+ kvm_riscv_aia_nr_hgei--;
+
+ /*
+ * Number of usable HGEI lines should be minimum of per-HART
+ * IMSIC guest files and number of bits in HGEIE
+ *
+ * TODO: To be updated later by AIA IMSIC HW guest file support
+ */
+ kvm_riscv_aia_nr_hgei = 0;
+
+ /*
+ * Find number of guest MSI IDs
+ *
+ * TODO: To be updated later by AIA IMSIC HW guest file support
+ */
+ kvm_riscv_aia_max_ids = IMSIC_MAX_ID;
+
+ /* Initialize guest external interrupt line management */
+ rc = aia_hgei_init();
+ if (rc)
+ return rc;
+
+ /* Register device operations */
+ rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops,
+ KVM_DEV_TYPE_RISCV_AIA);
+ if (rc) {
+ aia_hgei_exit();
+ return rc;
+ }
+
/* Enable KVM AIA support */
static_branch_enable(&kvm_riscv_aia_available);
@@ -385,4 +647,12 @@ int kvm_riscv_aia_init(void)
void kvm_riscv_aia_exit(void)
{
+ if (!kvm_riscv_aia_available())
+ return;
+
+ /* Unregister device operations */
+ kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA);
+
+ /* Cleanup the HGEI state */
+ aia_hgei_exit();
}
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
new file mode 100644
index 000000000000..39e72aa016a4
--- /dev/null
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/kvm_aia_aplic.h>
+
+struct aplic_irq {
+ raw_spinlock_t lock;
+ u32 sourcecfg;
+ u32 state;
+#define APLIC_IRQ_STATE_PENDING BIT(0)
+#define APLIC_IRQ_STATE_ENABLED BIT(1)
+#define APLIC_IRQ_STATE_ENPEND (APLIC_IRQ_STATE_PENDING | \
+ APLIC_IRQ_STATE_ENABLED)
+#define APLIC_IRQ_STATE_INPUT BIT(8)
+ u32 target;
+};
+
+struct aplic {
+ struct kvm_io_device iodev;
+
+ u32 domaincfg;
+ u32 genmsi;
+
+ u32 nr_irqs;
+ u32 nr_words;
+ struct aplic_irq *irqs;
+};
+
+static u32 aplic_read_sourcecfg(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->sourcecfg;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_sourcecfg(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ if (val & APLIC_SOURCECFG_D)
+ val = 0;
+ else
+ val &= APLIC_SOURCECFG_SM_MASK;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->sourcecfg = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static u32 aplic_read_target(struct aplic *aplic, u32 irq)
+{
+ u32 ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return 0;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = irqd->target;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_target(struct aplic *aplic, u32 irq, u32 val)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ val &= APLIC_TARGET_EIID_MASK |
+ (APLIC_TARGET_HART_IDX_MASK << APLIC_TARGET_HART_IDX_SHIFT) |
+ (APLIC_TARGET_GUEST_IDX_MASK << APLIC_TARGET_GUEST_IDX_SHIFT);
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ irqd->target = val;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_pending(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_PENDING) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
+{
+ unsigned long flags, sm;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (!pending &&
+ ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
+ (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ goto skip_write_pending;
+
+ if (pending)
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+
+skip_write_pending:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_enabled(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_ENABLED) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
+{
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ if (enabled)
+ irqd->state |= APLIC_IRQ_STATE_ENABLED;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_ENABLED;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+}
+
+static bool aplic_read_input(struct aplic *aplic, u32 irq)
+{
+ bool ret;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+
+ if (!irq || aplic->nr_irqs <= irq)
+ return false;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+ ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ return ret;
+}
+
+static void aplic_inject_msi(struct kvm *kvm, u32 irq, u32 target)
+{
+ u32 hart_idx, guest_idx, eiid;
+
+ hart_idx = target >> APLIC_TARGET_HART_IDX_SHIFT;
+ hart_idx &= APLIC_TARGET_HART_IDX_MASK;
+ guest_idx = target >> APLIC_TARGET_GUEST_IDX_SHIFT;
+ guest_idx &= APLIC_TARGET_GUEST_IDX_MASK;
+ eiid = target & APLIC_TARGET_EIID_MASK;
+ kvm_riscv_aia_inject_msi_by_id(kvm, hart_idx, guest_idx, eiid);
+}
+
+static void aplic_update_irq_range(struct kvm *kvm, u32 first, u32 last)
+{
+ bool inject;
+ u32 irq, target;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!(aplic->domaincfg & APLIC_DOMAINCFG_IE))
+ return;
+
+ for (irq = first; irq <= last; irq++) {
+ if (!irq || aplic->nr_irqs <= irq)
+ continue;
+ irqd = &aplic->irqs[irq];
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ inject = false;
+ target = irqd->target;
+ if ((irqd->state & APLIC_IRQ_STATE_ENPEND) ==
+ APLIC_IRQ_STATE_ENPEND) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, irq, target);
+ }
+}
+
+int kvm_riscv_aia_aplic_inject(struct kvm *kvm, u32 source, bool level)
+{
+ u32 target;
+ bool inject = false, ie;
+ unsigned long flags;
+ struct aplic_irq *irqd;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!aplic || !source || (aplic->nr_irqs <= source))
+ return -ENODEV;
+ irqd = &aplic->irqs[source];
+ ie = (aplic->domaincfg & APLIC_DOMAINCFG_IE) ? true : false;
+
+ raw_spin_lock_irqsave(&irqd->lock, flags);
+
+ if (irqd->sourcecfg & APLIC_SOURCECFG_D)
+ goto skip_unlock;
+
+ switch (irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK) {
+ case APLIC_SOURCECFG_SM_EDGE_RISE:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_EDGE_FALL:
+ if (!level && (irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_HIGH:
+ if (level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ case APLIC_SOURCECFG_SM_LEVEL_LOW:
+ if (!level && !(irqd->state & APLIC_IRQ_STATE_PENDING))
+ irqd->state |= APLIC_IRQ_STATE_PENDING;
+ break;
+ }
+
+ if (level)
+ irqd->state |= APLIC_IRQ_STATE_INPUT;
+ else
+ irqd->state &= ~APLIC_IRQ_STATE_INPUT;
+
+ target = irqd->target;
+ if (ie && ((irqd->state & APLIC_IRQ_STATE_ENPEND) ==
+ APLIC_IRQ_STATE_ENPEND)) {
+ irqd->state &= ~APLIC_IRQ_STATE_PENDING;
+ inject = true;
+ }
+
+skip_unlock:
+ raw_spin_unlock_irqrestore(&irqd->lock, flags);
+
+ if (inject)
+ aplic_inject_msi(kvm, source, target);
+
+ return 0;
+}
+
+static u32 aplic_read_input_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_input(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static u32 aplic_read_pending_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_pending(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_pending_word(struct aplic *aplic, u32 word,
+ u32 val, bool pending)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_pending(aplic, word * 32 + i, pending);
+ }
+}
+
+static u32 aplic_read_enabled_word(struct aplic *aplic, u32 word)
+{
+ u32 i, ret = 0;
+
+ for (i = 0; i < 32; i++)
+ ret |= aplic_read_enabled(aplic, word * 32 + i) ? BIT(i) : 0;
+
+ return ret;
+}
+
+static void aplic_write_enabled_word(struct aplic *aplic, u32 word,
+ u32 val, bool enabled)
+{
+ u32 i;
+
+ for (i = 0; i < 32; i++) {
+ if (val & BIT(i))
+ aplic_write_enabled(aplic, word * 32 + i, enabled);
+ }
+}
+
+static int aplic_mmio_read_offset(struct kvm *kvm, gpa_t off, u32 *val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ *val32 = APLIC_DOMAINCFG_RDONLY |
+ aplic->domaincfg | APLIC_DOMAINCFG_DM;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ *val32 = aplic_read_sourcecfg(aplic, i);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ *val32 = aplic_read_pending_word(aplic, i);
+ } else if (off == APLIC_SETIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ *val32 = aplic_read_input_word(aplic, i);
+ } else if (off == APLIC_CLRIPNUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ *val32 = aplic_read_enabled_word(aplic, i);
+ } else if (off == APLIC_SETIENUM) {
+ *val32 = 0;
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ *val32 = 0;
+ } else if (off == APLIC_CLRIENUM) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_LE) {
+ *val32 = 0;
+ } else if (off == APLIC_SETIPNUM_BE) {
+ *val32 = 0;
+ } else if (off == APLIC_GENMSI) {
+ *val32 = aplic->genmsi;
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ *val32 = aplic_read_target(aplic, i);
+ } else
+ return -ENODEV;
+
+ return 0;
+}
+
+static int aplic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_read_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ val);
+}
+
+static int aplic_mmio_write_offset(struct kvm *kvm, gpa_t off, u32 val32)
+{
+ u32 i;
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if ((off & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ if (off == APLIC_DOMAINCFG) {
+ /* Only IE bit writeable */
+ aplic->domaincfg = val32 & APLIC_DOMAINCFG_IE;
+ } else if ((off >= APLIC_SOURCECFG_BASE) &&
+ (off < (APLIC_SOURCECFG_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_SOURCECFG_BASE) >> 2) + 1;
+ aplic_write_sourcecfg(aplic, i, val32);
+ } else if ((off >= APLIC_SETIP_BASE) &&
+ (off < (APLIC_SETIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIPNUM) {
+ aplic_write_pending(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIP_BASE) &&
+ (off < (APLIC_CLRIP_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIP_BASE) >> 2;
+ aplic_write_pending_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIPNUM) {
+ aplic_write_pending(aplic, val32, false);
+ } else if ((off >= APLIC_SETIE_BASE) &&
+ (off < (APLIC_SETIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_SETIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, true);
+ } else if (off == APLIC_SETIENUM) {
+ aplic_write_enabled(aplic, val32, true);
+ } else if ((off >= APLIC_CLRIE_BASE) &&
+ (off < (APLIC_CLRIE_BASE + aplic->nr_words * 4))) {
+ i = (off - APLIC_CLRIE_BASE) >> 2;
+ aplic_write_enabled_word(aplic, i, val32, false);
+ } else if (off == APLIC_CLRIENUM) {
+ aplic_write_enabled(aplic, val32, false);
+ } else if (off == APLIC_SETIPNUM_LE) {
+ aplic_write_pending(aplic, val32, true);
+ } else if (off == APLIC_SETIPNUM_BE) {
+ aplic_write_pending(aplic, __swab32(val32), true);
+ } else if (off == APLIC_GENMSI) {
+ aplic->genmsi = val32 & ~(APLIC_TARGET_GUEST_IDX_MASK <<
+ APLIC_TARGET_GUEST_IDX_SHIFT);
+ kvm_riscv_aia_inject_msi_by_id(kvm,
+ val32 >> APLIC_TARGET_HART_IDX_SHIFT, 0,
+ val32 & APLIC_TARGET_EIID_MASK);
+ } else if ((off >= APLIC_TARGET_BASE) &&
+ (off < (APLIC_TARGET_BASE + (aplic->nr_irqs - 1) * 4))) {
+ i = ((off - APLIC_TARGET_BASE) >> 2) + 1;
+ aplic_write_target(aplic, i, val32);
+ } else
+ return -ENODEV;
+
+ aplic_update_irq_range(kvm, 1, aplic->nr_irqs - 1);
+
+ return 0;
+}
+
+static int aplic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ if (len != 4)
+ return -EOPNOTSUPP;
+
+ return aplic_mmio_write_offset(vcpu->kvm,
+ addr - vcpu->kvm->arch.aia.aplic_addr,
+ *((const u32 *)val));
+}
+
+static struct kvm_io_device_ops aplic_iodoev_ops = {
+ .read = aplic_mmio_read,
+ .write = aplic_mmio_write,
+};
+
+int kvm_riscv_aia_aplic_set_attr(struct kvm *kvm, unsigned long type, u32 v)
+{
+ int rc;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_write_offset(kvm, type, v);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_get_attr(struct kvm *kvm, unsigned long type, u32 *v)
+{
+ int rc;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_read_offset(kvm, type, v);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_has_attr(struct kvm *kvm, unsigned long type)
+{
+ int rc;
+ u32 val;
+
+ if (!kvm->arch.aia.aplic_state)
+ return -ENODEV;
+
+ rc = aplic_mmio_read_offset(kvm, type, &val);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int kvm_riscv_aia_aplic_init(struct kvm *kvm)
+{
+ int i, ret = 0;
+ struct aplic *aplic;
+
+ /* Do nothing if we have zero sources */
+ if (!kvm->arch.aia.nr_sources)
+ return 0;
+
+ /* Allocate APLIC global state */
+ aplic = kzalloc(sizeof(*aplic), GFP_KERNEL);
+ if (!aplic)
+ return -ENOMEM;
+ kvm->arch.aia.aplic_state = aplic;
+
+ /* Setup APLIC IRQs */
+ aplic->nr_irqs = kvm->arch.aia.nr_sources + 1;
+ aplic->nr_words = DIV_ROUND_UP(aplic->nr_irqs, 32);
+ aplic->irqs = kcalloc(aplic->nr_irqs,
+ sizeof(*aplic->irqs), GFP_KERNEL);
+ if (!aplic->irqs) {
+ ret = -ENOMEM;
+ goto fail_free_aplic;
+ }
+ for (i = 0; i < aplic->nr_irqs; i++)
+ raw_spin_lock_init(&aplic->irqs[i].lock);
+
+ /* Setup IO device */
+ kvm_iodevice_init(&aplic->iodev, &aplic_iodoev_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ kvm->arch.aia.aplic_addr,
+ KVM_DEV_RISCV_APLIC_SIZE,
+ &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret)
+ goto fail_free_aplic_irqs;
+
+ /* Setup default IRQ routing */
+ ret = kvm_riscv_setup_default_irq_routing(kvm, aplic->nr_irqs);
+ if (ret)
+ goto fail_unreg_iodev;
+
+ return 0;
+
+fail_unreg_iodev:
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+fail_free_aplic_irqs:
+ kfree(aplic->irqs);
+fail_free_aplic:
+ kvm->arch.aia.aplic_state = NULL;
+ kfree(aplic);
+ return ret;
+}
+
+void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm)
+{
+ struct aplic *aplic = kvm->arch.aia.aplic_state;
+
+ if (!aplic)
+ return;
+
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &aplic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+
+ kfree(aplic->irqs);
+
+ kvm->arch.aia.aplic_state = NULL;
+ kfree(aplic);
+}
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
new file mode 100644
index 000000000000..0eb689351b7d
--- /dev/null
+++ b/arch/riscv/kvm/aia_device.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_aia_imsic.h>
+
+static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
+{
+ struct kvm_vcpu *tmp_vcpu;
+
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&tmp_vcpu->mutex);
+ }
+}
+
+static void unlock_all_vcpus(struct kvm *kvm)
+{
+ unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
+}
+
+static bool lock_all_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *tmp_vcpu;
+ unsigned long c;
+
+ kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
+ if (!mutex_trylock(&tmp_vcpu->mutex)) {
+ unlock_vcpus(kvm, c - 1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int aia_create(struct kvm_device *dev, u32 type)
+{
+ int ret;
+ unsigned long i;
+ struct kvm *kvm = dev->kvm;
+ struct kvm_vcpu *vcpu;
+
+ if (irqchip_in_kernel(kvm))
+ return -EEXIST;
+
+ ret = -EBUSY;
+ if (!lock_all_vcpus(kvm))
+ return ret;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.ran_atleast_once)
+ goto out_unlock;
+ }
+ ret = 0;
+
+ kvm->arch.aia.in_kernel = true;
+
+out_unlock:
+ unlock_all_vcpus(kvm);
+ return ret;
+}
+
+static void aia_destroy(struct kvm_device *dev)
+{
+ kfree(dev);
+}
+
+static int aia_config(struct kvm *kvm, unsigned long type,
+ u32 *nr, bool write)
+{
+ struct kvm_aia *aia = &kvm->arch.aia;
+
+ /* Writes can only be done before irqchip is initialized */
+ if (write && kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ switch (type) {
+ case KVM_DEV_RISCV_AIA_CONFIG_MODE:
+ if (write) {
+ switch (*nr) {
+ case KVM_DEV_RISCV_AIA_MODE_EMUL:
+ break;
+ case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
+ case KVM_DEV_RISCV_AIA_MODE_AUTO:
+ /*
+ * HW Acceleration and Auto modes only
+ * supported on host with non-zero guest
+ * external interrupts (i.e. non-zero
+ * VS-level IMSIC pages).
+ */
+ if (!kvm_riscv_aia_nr_hgei)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ aia->mode = *nr;
+ } else
+ *nr = aia->mode;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_IDS:
+ if (write) {
+ if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) ||
+ (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) ||
+ ((*nr & KVM_DEV_RISCV_AIA_IDS_MIN) !=
+ KVM_DEV_RISCV_AIA_IDS_MIN) ||
+ (kvm_riscv_aia_max_ids <= *nr))
+ return -EINVAL;
+ aia->nr_ids = *nr;
+ } else
+ *nr = aia->nr_ids;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
+ if (write) {
+ if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) ||
+ (*nr >= kvm_riscv_aia_max_ids))
+ return -EINVAL;
+ aia->nr_sources = *nr;
+ } else
+ *nr = aia->nr_sources;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
+ if (write) {
+ if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX)
+ return -EINVAL;
+ aia->nr_group_bits = *nr;
+ } else
+ *nr = aia->nr_group_bits;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
+ if (write) {
+ if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) ||
+ (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX))
+ return -EINVAL;
+ aia->nr_group_shift = *nr;
+ } else
+ *nr = aia->nr_group_shift;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
+ if (write) {
+ if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX)
+ return -EINVAL;
+ aia->nr_hart_bits = *nr;
+ } else
+ *nr = aia->nr_hart_bits;
+ break;
+ case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
+ if (write) {
+ if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX)
+ return -EINVAL;
+ aia->nr_guest_bits = *nr;
+ } else
+ *nr = aia->nr_guest_bits;
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write)
+{
+ struct kvm_aia *aia = &kvm->arch.aia;
+
+ if (write) {
+ /* Writes can only be done before irqchip is initialized */
+ if (kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1))
+ return -EINVAL;
+
+ aia->aplic_addr = *addr;
+ } else
+ *addr = aia->aplic_addr;
+
+ return 0;
+}
+
+static int aia_imsic_addr(struct kvm *kvm, u64 *addr,
+ unsigned long vcpu_idx, bool write)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_aia *vcpu_aia;
+
+ vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ if (!vcpu)
+ return -EINVAL;
+ vcpu_aia = &vcpu->arch.aia_context;
+
+ if (write) {
+ /* Writes can only be done before irqchip is initialized */
+ if (kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1))
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->mutex);
+ if (write)
+ vcpu_aia->imsic_addr = *addr;
+ else
+ *addr = vcpu_aia->imsic_addr;
+ mutex_unlock(&vcpu->mutex);
+
+ return 0;
+}
+
+static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
+{
+ u32 h, l;
+ gpa_t mask = 0;
+
+ h = aia->nr_hart_bits + aia->nr_guest_bits +
+ IMSIC_MMIO_PAGE_SHIFT - 1;
+ mask = GENMASK_ULL(h, 0);
+
+ if (aia->nr_group_bits) {
+ h = aia->nr_group_bits + aia->nr_group_shift - 1;
+ l = aia->nr_group_shift;
+ mask |= GENMASK_ULL(h, l);
+ }
+
+ return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT;
+}
+
+static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
+{
+ u32 hart, group = 0;
+
+ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_group_bits)
+ group = (addr >> aia->nr_group_shift) &
+ GENMASK_ULL(aia->nr_group_bits - 1, 0);
+
+ return (group << aia->nr_hart_bits) | hart;
+}
+
+static int aia_init(struct kvm *kvm)
+{
+ int ret, i;
+ unsigned long idx;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_aia *vaia;
+ struct kvm_aia *aia = &kvm->arch.aia;
+ gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR;
+
+ /* Irqchip can be initialized only once */
+ if (kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ /* We might be in the middle of creating a VCPU? */
+ if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+ return -EBUSY;
+
+ /* Number of sources should be less than or equals number of IDs */
+ if (aia->nr_ids < aia->nr_sources)
+ return -EINVAL;
+
+ /* APLIC base is required for non-zero number of sources */
+ if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR)
+ return -EINVAL;
+
+ /* Initialize APLIC */
+ ret = kvm_riscv_aia_aplic_init(kvm);
+ if (ret)
+ return ret;
+
+ /* Iterate over each VCPU */
+ kvm_for_each_vcpu(idx, vcpu, kvm) {
+ vaia = &vcpu->arch.aia_context;
+
+ /* IMSIC base is required */
+ if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) {
+ ret = -EINVAL;
+ goto fail_cleanup_imsics;
+ }
+
+ /* All IMSICs should have matching base PPN */
+ if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR)
+ base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr);
+ if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) {
+ ret = -EINVAL;
+ goto fail_cleanup_imsics;
+ }
+
+ /* Update HART index of the IMSIC based on IMSIC base */
+ vaia->hart_index = aia_imsic_hart_index(aia,
+ vaia->imsic_addr);
+
+ /* Initialize IMSIC for this VCPU */
+ ret = kvm_riscv_vcpu_aia_imsic_init(vcpu);
+ if (ret)
+ goto fail_cleanup_imsics;
+ }
+
+ /* Set the initialized flag */
+ kvm->arch.aia.initialized = true;
+
+ return 0;
+
+fail_cleanup_imsics:
+ for (i = idx - 1; i >= 0; i--) {
+ vcpu = kvm_get_vcpu(kvm, i);
+ if (!vcpu)
+ continue;
+ kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
+ }
+ kvm_riscv_aia_aplic_cleanup(kvm);
+ return ret;
+}
+
+static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ u32 nr;
+ u64 addr;
+ int nr_vcpus, r = -ENXIO;
+ unsigned long v, type = (unsigned long)attr->attr;
+ void __user *uaddr = (void __user *)(long)attr->addr;
+
+ switch (attr->group) {
+ case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+ if (copy_from_user(&nr, uaddr, sizeof(nr)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = aia_config(dev->kvm, type, &nr, true);
+ mutex_unlock(&dev->kvm->lock);
+
+ break;
+
+ case KVM_DEV_RISCV_AIA_GRP_ADDR:
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+ mutex_lock(&dev->kvm->lock);
+ if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+ r = aia_aplic_addr(dev->kvm, &addr, true);
+ else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+ r = aia_imsic_addr(dev->kvm, &addr,
+ type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true);
+ mutex_unlock(&dev->kvm->lock);
+
+ break;
+
+ case KVM_DEV_RISCV_AIA_GRP_CTRL:
+ switch (type) {
+ case KVM_DEV_RISCV_AIA_CTRL_INIT:
+ mutex_lock(&dev->kvm->lock);
+ r = aia_init(dev->kvm);
+ mutex_unlock(&dev->kvm->lock);
+ break;
+ }
+
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_APLIC:
+ if (copy_from_user(&nr, uaddr, sizeof(nr)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = kvm_riscv_aia_aplic_set_attr(dev->kvm, type, nr);
+ mutex_unlock(&dev->kvm->lock);
+
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+ if (copy_from_user(&v, uaddr, sizeof(v)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, true, &v);
+ mutex_unlock(&dev->kvm->lock);
+
+ break;
+ }
+
+ return r;
+}
+
+static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ u32 nr;
+ u64 addr;
+ int nr_vcpus, r = -ENXIO;
+ void __user *uaddr = (void __user *)(long)attr->addr;
+ unsigned long v, type = (unsigned long)attr->attr;
+
+ switch (attr->group) {
+ case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+ if (copy_from_user(&nr, uaddr, sizeof(nr)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = aia_config(dev->kvm, type, &nr, false);
+ mutex_unlock(&dev->kvm->lock);
+ if (r)
+ return r;
+
+ if (copy_to_user(uaddr, &nr, sizeof(nr)))
+ return -EFAULT;
+
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_ADDR:
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+ mutex_lock(&dev->kvm->lock);
+ if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+ r = aia_aplic_addr(dev->kvm, &addr, false);
+ else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+ r = aia_imsic_addr(dev->kvm, &addr,
+ type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false);
+ mutex_unlock(&dev->kvm->lock);
+ if (r)
+ return r;
+
+ if (copy_to_user(uaddr, &addr, sizeof(addr)))
+ return -EFAULT;
+
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_APLIC:
+ if (copy_from_user(&nr, uaddr, sizeof(nr)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = kvm_riscv_aia_aplic_get_attr(dev->kvm, type, &nr);
+ mutex_unlock(&dev->kvm->lock);
+ if (r)
+ return r;
+
+ if (copy_to_user(uaddr, &nr, sizeof(nr)))
+ return -EFAULT;
+
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+ if (copy_from_user(&v, uaddr, sizeof(v)))
+ return -EFAULT;
+
+ mutex_lock(&dev->kvm->lock);
+ r = kvm_riscv_aia_imsic_rw_attr(dev->kvm, type, false, &v);
+ mutex_unlock(&dev->kvm->lock);
+ if (r)
+ return r;
+
+ if (copy_to_user(uaddr, &v, sizeof(v)))
+ return -EFAULT;
+
+ break;
+ }
+
+ return r;
+}
+
+static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int nr_vcpus;
+
+ switch (attr->group) {
+ case KVM_DEV_RISCV_AIA_GRP_CONFIG:
+ switch (attr->attr) {
+ case KVM_DEV_RISCV_AIA_CONFIG_MODE:
+ case KVM_DEV_RISCV_AIA_CONFIG_IDS:
+ case KVM_DEV_RISCV_AIA_CONFIG_SRCS:
+ case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS:
+ case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT:
+ case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS:
+ case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS:
+ return 0;
+ }
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_ADDR:
+ nr_vcpus = atomic_read(&dev->kvm->online_vcpus);
+ if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC)
+ return 0;
+ else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus))
+ return 0;
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_RISCV_AIA_CTRL_INIT:
+ return 0;
+ }
+ break;
+ case KVM_DEV_RISCV_AIA_GRP_APLIC:
+ return kvm_riscv_aia_aplic_has_attr(dev->kvm, attr->attr);
+ case KVM_DEV_RISCV_AIA_GRP_IMSIC:
+ return kvm_riscv_aia_imsic_has_attr(dev->kvm, attr->attr);
+ }
+
+ return -ENXIO;
+}
+
+struct kvm_device_ops kvm_riscv_aia_device_ops = {
+ .name = "kvm-riscv-aia",
+ .create = aia_create,
+ .destroy = aia_destroy,
+ .set_attr = aia_set_attr,
+ .get_attr = aia_get_attr,
+ .has_attr = aia_has_attr,
+};
+
+int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
+{
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(vcpu->kvm))
+ return 1;
+
+ /* Update the IMSIC HW state before entering guest mode */
+ return kvm_riscv_vcpu_aia_imsic_update(vcpu);
+}
+
+void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
+ struct kvm_vcpu_aia_csr *reset_csr =
+ &vcpu->arch.aia_context.guest_reset_csr;
+
+ if (!kvm_riscv_aia_available())
+ return;
+ memcpy(csr, reset_csr, sizeof(*csr));
+
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(vcpu->kvm))
+ return;
+
+ /* Reset the IMSIC context */
+ kvm_riscv_vcpu_aia_imsic_reset(vcpu);
+}
+
+int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
+
+ if (!kvm_riscv_aia_available())
+ return 0;
+
+ /*
+ * We don't do any memory allocations over here because these
+ * will be done after AIA device is initialized by the user-space.
+ *
+ * Refer, aia_init() implementation for more details.
+ */
+
+ /* Initialize default values in AIA vcpu context */
+ vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
+ vaia->hart_index = vcpu->vcpu_idx;
+
+ return 0;
+}
+
+void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu)
+{
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(vcpu->kvm))
+ return;
+
+ /* Cleanup IMSIC context */
+ kvm_riscv_vcpu_aia_imsic_cleanup(vcpu);
+}
+
+int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index,
+ u32 guest_index, u32 iid)
+{
+ unsigned long idx;
+ struct kvm_vcpu *vcpu;
+
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ /* Inject MSI to matching VCPU */
+ kvm_for_each_vcpu(idx, vcpu, kvm) {
+ if (vcpu->arch.aia_context.hart_index == hart_index)
+ return kvm_riscv_vcpu_aia_imsic_inject(vcpu,
+ guest_index,
+ 0, iid);
+ }
+
+ return 0;
+}
+
+int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
+{
+ gpa_t tppn, ippn;
+ unsigned long idx;
+ struct kvm_vcpu *vcpu;
+ u32 g, toff, iid = msi->data;
+ struct kvm_aia *aia = &kvm->arch.aia;
+ gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo;
+
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ /* Convert target address to target PPN */
+ tppn = target >> IMSIC_MMIO_PAGE_SHIFT;
+
+ /* Extract and clear Guest ID from target PPN */
+ g = tppn & (BIT(aia->nr_guest_bits) - 1);
+ tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1));
+
+ /* Inject MSI to matching VCPU */
+ kvm_for_each_vcpu(idx, vcpu, kvm) {
+ ippn = vcpu->arch.aia_context.imsic_addr >>
+ IMSIC_MMIO_PAGE_SHIFT;
+ if (ippn == tppn) {
+ toff = target & (IMSIC_MMIO_PAGE_SZ - 1);
+ return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g,
+ toff, iid);
+ }
+ }
+
+ return 0;
+}
+
+int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level)
+{
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(kvm))
+ return -EBUSY;
+
+ /* Inject interrupt level change in APLIC */
+ return kvm_riscv_aia_aplic_inject(kvm, irq, level);
+}
+
+void kvm_riscv_aia_init_vm(struct kvm *kvm)
+{
+ struct kvm_aia *aia = &kvm->arch.aia;
+
+ if (!kvm_riscv_aia_available())
+ return;
+
+ /*
+ * We don't do any memory allocations over here because these
+ * will be done after AIA device is initialized by the user-space.
+ *
+ * Refer, aia_init() implementation for more details.
+ */
+
+ /* Initialize default values in AIA global context */
+ aia->mode = (kvm_riscv_aia_nr_hgei) ?
+ KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL;
+ aia->nr_ids = kvm_riscv_aia_max_ids - 1;
+ aia->nr_sources = 0;
+ aia->nr_group_bits = 0;
+ aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN;
+ aia->nr_hart_bits = 0;
+ aia->nr_guest_bits = 0;
+ aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR;
+}
+
+void kvm_riscv_aia_destroy_vm(struct kvm *kvm)
+{
+ /* Proceed only if AIA was initialized successfully */
+ if (!kvm_riscv_aia_initialized(kvm))
+ return;
+
+ /* Cleanup APLIC context */
+ kvm_riscv_aia_aplic_cleanup(kvm);
+}
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
new file mode 100644
index 000000000000..6cf23b8adb71
--- /dev/null
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -0,0 +1,1084 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/kvm_host.h>
+#include <linux/math.h>
+#include <linux/spinlock.h>
+#include <linux/swab.h>
+#include <kvm/iodev.h>
+#include <asm/csr.h>
+#include <asm/kvm_aia_imsic.h>
+
+#define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64))
+
+struct imsic_mrif_eix {
+ unsigned long eip[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+ unsigned long eie[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+};
+
+struct imsic_mrif {
+ struct imsic_mrif_eix eix[IMSIC_MAX_EIX];
+ unsigned long eithreshold;
+ unsigned long eidelivery;
+};
+
+struct imsic {
+ struct kvm_io_device iodev;
+
+ u32 nr_msis;
+ u32 nr_eix;
+ u32 nr_hw_eix;
+
+ /*
+ * At any point in time, the register state is in
+ * one of the following places:
+ *
+ * 1) Hardware: IMSIC VS-file (vsfile_cpu >= 0)
+ * 2) Software: IMSIC SW-file (vsfile_cpu < 0)
+ */
+
+ /* IMSIC VS-file */
+ rwlock_t vsfile_lock;
+ int vsfile_cpu;
+ int vsfile_hgei;
+ void __iomem *vsfile_va;
+ phys_addr_t vsfile_pa;
+
+ /* IMSIC SW-file */
+ struct imsic_mrif *swfile;
+ phys_addr_t swfile_pa;
+};
+
+#define imsic_vs_csr_read(__c) \
+({ \
+ unsigned long __r; \
+ csr_write(CSR_VSISELECT, __c); \
+ __r = csr_read(CSR_VSIREG); \
+ __r; \
+})
+
+#define imsic_read_switchcase(__ireg) \
+ case __ireg: \
+ return imsic_vs_csr_read(__ireg);
+#define imsic_read_switchcase_2(__ireg) \
+ imsic_read_switchcase(__ireg + 0) \
+ imsic_read_switchcase(__ireg + 1)
+#define imsic_read_switchcase_4(__ireg) \
+ imsic_read_switchcase_2(__ireg + 0) \
+ imsic_read_switchcase_2(__ireg + 2)
+#define imsic_read_switchcase_8(__ireg) \
+ imsic_read_switchcase_4(__ireg + 0) \
+ imsic_read_switchcase_4(__ireg + 4)
+#define imsic_read_switchcase_16(__ireg) \
+ imsic_read_switchcase_8(__ireg + 0) \
+ imsic_read_switchcase_8(__ireg + 8)
+#define imsic_read_switchcase_32(__ireg) \
+ imsic_read_switchcase_16(__ireg + 0) \
+ imsic_read_switchcase_16(__ireg + 16)
+#define imsic_read_switchcase_64(__ireg) \
+ imsic_read_switchcase_32(__ireg + 0) \
+ imsic_read_switchcase_32(__ireg + 32)
+
+static unsigned long imsic_eix_read(int ireg)
+{
+ switch (ireg) {
+ imsic_read_switchcase_64(IMSIC_EIP0)
+ imsic_read_switchcase_64(IMSIC_EIE0)
+ }
+
+ return 0;
+}
+
+#define imsic_vs_csr_swap(__c, __v) \
+({ \
+ unsigned long __r; \
+ csr_write(CSR_VSISELECT, __c); \
+ __r = csr_swap(CSR_VSIREG, __v); \
+ __r; \
+})
+
+#define imsic_swap_switchcase(__ireg, __v) \
+ case __ireg: \
+ return imsic_vs_csr_swap(__ireg, __v);
+#define imsic_swap_switchcase_2(__ireg, __v) \
+ imsic_swap_switchcase(__ireg + 0, __v) \
+ imsic_swap_switchcase(__ireg + 1, __v)
+#define imsic_swap_switchcase_4(__ireg, __v) \
+ imsic_swap_switchcase_2(__ireg + 0, __v) \
+ imsic_swap_switchcase_2(__ireg + 2, __v)
+#define imsic_swap_switchcase_8(__ireg, __v) \
+ imsic_swap_switchcase_4(__ireg + 0, __v) \
+ imsic_swap_switchcase_4(__ireg + 4, __v)
+#define imsic_swap_switchcase_16(__ireg, __v) \
+ imsic_swap_switchcase_8(__ireg + 0, __v) \
+ imsic_swap_switchcase_8(__ireg + 8, __v)
+#define imsic_swap_switchcase_32(__ireg, __v) \
+ imsic_swap_switchcase_16(__ireg + 0, __v) \
+ imsic_swap_switchcase_16(__ireg + 16, __v)
+#define imsic_swap_switchcase_64(__ireg, __v) \
+ imsic_swap_switchcase_32(__ireg + 0, __v) \
+ imsic_swap_switchcase_32(__ireg + 32, __v)
+
+static unsigned long imsic_eix_swap(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_swap_switchcase_64(IMSIC_EIP0, val)
+ imsic_swap_switchcase_64(IMSIC_EIE0, val)
+ }
+
+ return 0;
+}
+
+#define imsic_vs_csr_write(__c, __v) \
+do { \
+ csr_write(CSR_VSISELECT, __c); \
+ csr_write(CSR_VSIREG, __v); \
+} while (0)
+
+#define imsic_write_switchcase(__ireg, __v) \
+ case __ireg: \
+ imsic_vs_csr_write(__ireg, __v); \
+ break;
+#define imsic_write_switchcase_2(__ireg, __v) \
+ imsic_write_switchcase(__ireg + 0, __v) \
+ imsic_write_switchcase(__ireg + 1, __v)
+#define imsic_write_switchcase_4(__ireg, __v) \
+ imsic_write_switchcase_2(__ireg + 0, __v) \
+ imsic_write_switchcase_2(__ireg + 2, __v)
+#define imsic_write_switchcase_8(__ireg, __v) \
+ imsic_write_switchcase_4(__ireg + 0, __v) \
+ imsic_write_switchcase_4(__ireg + 4, __v)
+#define imsic_write_switchcase_16(__ireg, __v) \
+ imsic_write_switchcase_8(__ireg + 0, __v) \
+ imsic_write_switchcase_8(__ireg + 8, __v)
+#define imsic_write_switchcase_32(__ireg, __v) \
+ imsic_write_switchcase_16(__ireg + 0, __v) \
+ imsic_write_switchcase_16(__ireg + 16, __v)
+#define imsic_write_switchcase_64(__ireg, __v) \
+ imsic_write_switchcase_32(__ireg + 0, __v) \
+ imsic_write_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_write(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_write_switchcase_64(IMSIC_EIP0, val)
+ imsic_write_switchcase_64(IMSIC_EIE0, val)
+ }
+}
+
+#define imsic_vs_csr_set(__c, __v) \
+do { \
+ csr_write(CSR_VSISELECT, __c); \
+ csr_set(CSR_VSIREG, __v); \
+} while (0)
+
+#define imsic_set_switchcase(__ireg, __v) \
+ case __ireg: \
+ imsic_vs_csr_set(__ireg, __v); \
+ break;
+#define imsic_set_switchcase_2(__ireg, __v) \
+ imsic_set_switchcase(__ireg + 0, __v) \
+ imsic_set_switchcase(__ireg + 1, __v)
+#define imsic_set_switchcase_4(__ireg, __v) \
+ imsic_set_switchcase_2(__ireg + 0, __v) \
+ imsic_set_switchcase_2(__ireg + 2, __v)
+#define imsic_set_switchcase_8(__ireg, __v) \
+ imsic_set_switchcase_4(__ireg + 0, __v) \
+ imsic_set_switchcase_4(__ireg + 4, __v)
+#define imsic_set_switchcase_16(__ireg, __v) \
+ imsic_set_switchcase_8(__ireg + 0, __v) \
+ imsic_set_switchcase_8(__ireg + 8, __v)
+#define imsic_set_switchcase_32(__ireg, __v) \
+ imsic_set_switchcase_16(__ireg + 0, __v) \
+ imsic_set_switchcase_16(__ireg + 16, __v)
+#define imsic_set_switchcase_64(__ireg, __v) \
+ imsic_set_switchcase_32(__ireg + 0, __v) \
+ imsic_set_switchcase_32(__ireg + 32, __v)
+
+static void imsic_eix_set(int ireg, unsigned long val)
+{
+ switch (ireg) {
+ imsic_set_switchcase_64(IMSIC_EIP0, val)
+ imsic_set_switchcase_64(IMSIC_EIE0, val)
+ }
+}
+
+static unsigned long imsic_mrif_atomic_rmw(struct imsic_mrif *mrif,
+ unsigned long *ptr,
+ unsigned long new_val,
+ unsigned long wr_mask)
+{
+ unsigned long old_val = 0, tmp = 0;
+
+ __asm__ __volatile__ (
+ "0: lr.w.aq %1, %0\n"
+ " and %2, %1, %3\n"
+ " or %2, %2, %4\n"
+ " sc.w.rl %2, %2, %0\n"
+ " bnez %2, 0b"
+ : "+A" (*ptr), "+r" (old_val), "+r" (tmp)
+ : "r" (~wr_mask), "r" (new_val & wr_mask)
+ : "memory");
+
+ return old_val;
+}
+
+static unsigned long imsic_mrif_atomic_or(struct imsic_mrif *mrif,
+ unsigned long *ptr,
+ unsigned long val)
+{
+ return atomic_long_fetch_or(val, (atomic_long_t *)ptr);
+}
+
+#define imsic_mrif_atomic_write(__mrif, __ptr, __new_val) \
+ imsic_mrif_atomic_rmw(__mrif, __ptr, __new_val, -1UL)
+#define imsic_mrif_atomic_read(__mrif, __ptr) \
+ imsic_mrif_atomic_or(__mrif, __ptr, 0)
+
+static u32 imsic_mrif_topei(struct imsic_mrif *mrif, u32 nr_eix, u32 nr_msis)
+{
+ struct imsic_mrif_eix *eix;
+ u32 i, imin, imax, ei, max_msi;
+ unsigned long eipend[BITS_PER_TYPE(u64) / BITS_PER_LONG];
+ unsigned long eithreshold = imsic_mrif_atomic_read(mrif,
+ &mrif->eithreshold);
+
+ max_msi = (eithreshold && (eithreshold <= nr_msis)) ?
+ eithreshold : nr_msis;
+ for (ei = 0; ei < nr_eix; ei++) {
+ eix = &mrif->eix[ei];
+ eipend[0] = imsic_mrif_atomic_read(mrif, &eix->eie[0]) &
+ imsic_mrif_atomic_read(mrif, &eix->eip[0]);
+#ifdef CONFIG_32BIT
+ eipend[1] = imsic_mrif_atomic_read(mrif, &eix->eie[1]) &
+ imsic_mrif_atomic_read(mrif, &eix->eip[1]);
+ if (!eipend[0] && !eipend[1])
+#else
+ if (!eipend[0])
+#endif
+ continue;
+
+ imin = ei * BITS_PER_TYPE(u64);
+ imax = ((imin + BITS_PER_TYPE(u64)) < max_msi) ?
+ imin + BITS_PER_TYPE(u64) : max_msi;
+ for (i = (!imin) ? 1 : imin; i < imax; i++) {
+ if (test_bit(i - imin, eipend))
+ return (i << TOPEI_ID_SHIFT) | i;
+ }
+ }
+
+ return 0;
+}
+
+static int imsic_mrif_isel_check(u32 nr_eix, unsigned long isel)
+{
+ u32 num = 0;
+
+ switch (isel) {
+ case IMSIC_EIDELIVERY:
+ case IMSIC_EITHRESHOLD:
+ break;
+ case IMSIC_EIP0 ... IMSIC_EIP63:
+ num = isel - IMSIC_EIP0;
+ break;
+ case IMSIC_EIE0 ... IMSIC_EIE63:
+ num = isel - IMSIC_EIE0;
+ break;
+ default:
+ return -ENOENT;
+ }
+#ifndef CONFIG_32BIT
+ if (num & 0x1)
+ return -EINVAL;
+#endif
+ if ((num / 2) >= nr_eix)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int imsic_mrif_rmw(struct imsic_mrif *mrif, u32 nr_eix,
+ unsigned long isel, unsigned long *val,
+ unsigned long new_val, unsigned long wr_mask)
+{
+ bool pend;
+ struct imsic_mrif_eix *eix;
+ unsigned long *ei, num, old_val = 0;
+
+ switch (isel) {
+ case IMSIC_EIDELIVERY:
+ old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eidelivery,
+ new_val, wr_mask & 0x1);
+ break;
+ case IMSIC_EITHRESHOLD:
+ old_val = imsic_mrif_atomic_rmw(mrif, &mrif->eithreshold,
+ new_val, wr_mask & (IMSIC_MAX_ID - 1));
+ break;
+ case IMSIC_EIP0 ... IMSIC_EIP63:
+ case IMSIC_EIE0 ... IMSIC_EIE63:
+ if (isel >= IMSIC_EIP0 && isel <= IMSIC_EIP63) {
+ pend = true;
+ num = isel - IMSIC_EIP0;
+ } else {
+ pend = false;
+ num = isel - IMSIC_EIE0;
+ }
+
+ if ((num / 2) >= nr_eix)
+ return -EINVAL;
+ eix = &mrif->eix[num / 2];
+
+#ifndef CONFIG_32BIT
+ if (num & 0x1)
+ return -EINVAL;
+ ei = (pend) ? &eix->eip[0] : &eix->eie[0];
+#else
+ ei = (pend) ? &eix->eip[num & 0x1] : &eix->eie[num & 0x1];
+#endif
+
+ /* Bit0 of EIP0 or EIE0 is read-only */
+ if (!num)
+ wr_mask &= ~BIT(0);
+
+ old_val = imsic_mrif_atomic_rmw(mrif, ei, new_val, wr_mask);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (val)
+ *val = old_val;
+
+ return 0;
+}
+
+struct imsic_vsfile_read_data {
+ int hgei;
+ u32 nr_eix;
+ bool clear;
+ struct imsic_mrif *mrif;
+};
+
+static void imsic_vsfile_local_read(void *data)
+{
+ u32 i;
+ struct imsic_mrif_eix *eix;
+ struct imsic_vsfile_read_data *idata = data;
+ struct imsic_mrif *mrif = idata->mrif;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to store
+ * values in MRIF because imsic_vsfile_read() is always called
+ * with pointer to temporary MRIF on stack.
+ */
+
+ if (idata->clear) {
+ mrif->eidelivery = imsic_vs_csr_swap(IMSIC_EIDELIVERY, 0);
+ mrif->eithreshold = imsic_vs_csr_swap(IMSIC_EITHRESHOLD, 0);
+ for (i = 0; i < idata->nr_eix; i++) {
+ eix = &mrif->eix[i];
+ eix->eip[0] = imsic_eix_swap(IMSIC_EIP0 + i * 2, 0);
+ eix->eie[0] = imsic_eix_swap(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+ eix->eip[1] = imsic_eix_swap(IMSIC_EIP0 + i * 2 + 1, 0);
+ eix->eie[1] = imsic_eix_swap(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+ }
+ } else {
+ mrif->eidelivery = imsic_vs_csr_read(IMSIC_EIDELIVERY);
+ mrif->eithreshold = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
+ for (i = 0; i < idata->nr_eix; i++) {
+ eix = &mrif->eix[i];
+ eix->eip[0] = imsic_eix_read(IMSIC_EIP0 + i * 2);
+ eix->eie[0] = imsic_eix_read(IMSIC_EIE0 + i * 2);
+#ifdef CONFIG_32BIT
+ eix->eip[1] = imsic_eix_read(IMSIC_EIP0 + i * 2 + 1);
+ eix->eie[1] = imsic_eix_read(IMSIC_EIE0 + i * 2 + 1);
+#endif
+ }
+ }
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_read(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
+ bool clear, struct imsic_mrif *mrif)
+{
+ struct imsic_vsfile_read_data idata;
+
+ /* We can only read clear if we have a IMSIC VS-file */
+ if (vsfile_cpu < 0 || vsfile_hgei <= 0)
+ return;
+
+ /* We can only read clear on local CPU */
+ idata.hgei = vsfile_hgei;
+ idata.nr_eix = nr_eix;
+ idata.clear = clear;
+ idata.mrif = mrif;
+ on_each_cpu_mask(cpumask_of(vsfile_cpu),
+ imsic_vsfile_local_read, &idata, 1);
+}
+
+struct imsic_vsfile_rw_data {
+ int hgei;
+ int isel;
+ bool write;
+ unsigned long val;
+};
+
+static void imsic_vsfile_local_rw(void *data)
+{
+ struct imsic_vsfile_rw_data *idata = data;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)idata->hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ switch (idata->isel) {
+ case IMSIC_EIDELIVERY:
+ if (idata->write)
+ imsic_vs_csr_write(IMSIC_EIDELIVERY, idata->val);
+ else
+ idata->val = imsic_vs_csr_read(IMSIC_EIDELIVERY);
+ break;
+ case IMSIC_EITHRESHOLD:
+ if (idata->write)
+ imsic_vs_csr_write(IMSIC_EITHRESHOLD, idata->val);
+ else
+ idata->val = imsic_vs_csr_read(IMSIC_EITHRESHOLD);
+ break;
+ case IMSIC_EIP0 ... IMSIC_EIP63:
+ case IMSIC_EIE0 ... IMSIC_EIE63:
+#ifndef CONFIG_32BIT
+ if (idata->isel & 0x1)
+ break;
+#endif
+ if (idata->write)
+ imsic_eix_write(idata->isel, idata->val);
+ else
+ idata->val = imsic_eix_read(idata->isel);
+ break;
+ default:
+ break;
+ }
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static int imsic_vsfile_rw(int vsfile_hgei, int vsfile_cpu, u32 nr_eix,
+ unsigned long isel, bool write,
+ unsigned long *val)
+{
+ int rc;
+ struct imsic_vsfile_rw_data rdata;
+
+ /* We can only access register if we have a IMSIC VS-file */
+ if (vsfile_cpu < 0 || vsfile_hgei <= 0)
+ return -EINVAL;
+
+ /* Check IMSIC register iselect */
+ rc = imsic_mrif_isel_check(nr_eix, isel);
+ if (rc)
+ return rc;
+
+ /* We can only access register on local CPU */
+ rdata.hgei = vsfile_hgei;
+ rdata.isel = isel;
+ rdata.write = write;
+ rdata.val = (write) ? *val : 0;
+ on_each_cpu_mask(cpumask_of(vsfile_cpu),
+ imsic_vsfile_local_rw, &rdata, 1);
+
+ if (!write)
+ *val = rdata.val;
+
+ return 0;
+}
+
+static void imsic_vsfile_local_clear(int vsfile_hgei, u32 nr_eix)
+{
+ u32 i;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ /* We can only zero-out if we have a IMSIC VS-file */
+ if (vsfile_hgei <= 0)
+ return;
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ imsic_vs_csr_write(IMSIC_EIDELIVERY, 0);
+ imsic_vs_csr_write(IMSIC_EITHRESHOLD, 0);
+ for (i = 0; i < nr_eix; i++) {
+ imsic_eix_write(IMSIC_EIP0 + i * 2, 0);
+ imsic_eix_write(IMSIC_EIE0 + i * 2, 0);
+#ifdef CONFIG_32BIT
+ imsic_eix_write(IMSIC_EIP0 + i * 2 + 1, 0);
+ imsic_eix_write(IMSIC_EIE0 + i * 2 + 1, 0);
+#endif
+ }
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_local_update(int vsfile_hgei, u32 nr_eix,
+ struct imsic_mrif *mrif)
+{
+ u32 i;
+ struct imsic_mrif_eix *eix;
+ unsigned long new_hstatus, old_hstatus, old_vsiselect;
+
+ /* We can only update if we have a HW IMSIC context */
+ if (vsfile_hgei <= 0)
+ return;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to read values
+ * from MRIF in this function because it is always called with
+ * pointer to temporary MRIF on stack.
+ */
+
+ old_vsiselect = csr_read(CSR_VSISELECT);
+ old_hstatus = csr_read(CSR_HSTATUS);
+ new_hstatus = old_hstatus & ~HSTATUS_VGEIN;
+ new_hstatus |= ((unsigned long)vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+ csr_write(CSR_HSTATUS, new_hstatus);
+
+ for (i = 0; i < nr_eix; i++) {
+ eix = &mrif->eix[i];
+ imsic_eix_set(IMSIC_EIP0 + i * 2, eix->eip[0]);
+ imsic_eix_set(IMSIC_EIE0 + i * 2, eix->eie[0]);
+#ifdef CONFIG_32BIT
+ imsic_eix_set(IMSIC_EIP0 + i * 2 + 1, eix->eip[1]);
+ imsic_eix_set(IMSIC_EIE0 + i * 2 + 1, eix->eie[1]);
+#endif
+ }
+ imsic_vs_csr_write(IMSIC_EITHRESHOLD, mrif->eithreshold);
+ imsic_vs_csr_write(IMSIC_EIDELIVERY, mrif->eidelivery);
+
+ csr_write(CSR_HSTATUS, old_hstatus);
+ csr_write(CSR_VSISELECT, old_vsiselect);
+}
+
+static void imsic_vsfile_cleanup(struct imsic *imsic)
+{
+ int old_vsfile_hgei, old_vsfile_cpu;
+ unsigned long flags;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to clear the
+ * SW-file in this function because it is always called when the
+ * VCPU is being destroyed.
+ */
+
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+ imsic->vsfile_va = NULL;
+ imsic->vsfile_pa = 0;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+
+ if (old_vsfile_cpu >= 0)
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ struct imsic_mrif *mrif = imsic->swfile;
+
+ if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
+ imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
+ else
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+}
+
+static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
+ struct imsic_mrif *mrif)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /*
+ * We don't use imsic_mrif_atomic_xyz() functions to read and
+ * write SW-file and MRIF in this function because it is always
+ * called when VCPU is not using SW-file and the MRIF points to
+ * a temporary MRIF on stack.
+ */
+
+ memcpy(mrif, imsic->swfile, sizeof(*mrif));
+ if (clear) {
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
+ }
+}
+
+static void imsic_swfile_update(struct kvm_vcpu *vcpu,
+ struct imsic_mrif *mrif)
+{
+ u32 i;
+ struct imsic_mrif_eix *seix, *eix;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+ struct imsic_mrif *smrif = imsic->swfile;
+
+ imsic_mrif_atomic_write(smrif, &smrif->eidelivery, mrif->eidelivery);
+ imsic_mrif_atomic_write(smrif, &smrif->eithreshold, mrif->eithreshold);
+ for (i = 0; i < imsic->nr_eix; i++) {
+ seix = &smrif->eix[i];
+ eix = &mrif->eix[i];
+ imsic_mrif_atomic_or(smrif, &seix->eip[0], eix->eip[0]);
+ imsic_mrif_atomic_or(smrif, &seix->eie[0], eix->eie[0]);
+#ifdef CONFIG_32BIT
+ imsic_mrif_atomic_or(smrif, &seix->eip[1], eix->eip[1]);
+ imsic_mrif_atomic_or(smrif, &seix->eie[1], eix->eie[1]);
+#endif
+ }
+
+ imsic_swfile_extirq_update(vcpu);
+}
+
+void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+ struct imsic_mrif tmrif;
+ int old_vsfile_hgei, old_vsfile_cpu;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /* Read and clear IMSIC VS-file details */
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ imsic->vsfile_cpu = imsic->vsfile_hgei = -1;
+ imsic->vsfile_va = NULL;
+ imsic->vsfile_pa = 0;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /* Do nothing, if no IMSIC VS-file to release */
+ if (old_vsfile_cpu < 0)
+ return;
+
+ /*
+ * At this point, all interrupt producers are still using
+ * the old IMSIC VS-file so we first re-direct all interrupt
+ * producers.
+ */
+
+ /* Purge the G-stage mapping */
+ kvm_riscv_gstage_iounmap(vcpu->kvm,
+ vcpu->arch.aia_context.imsic_addr,
+ IMSIC_MMIO_PAGE_SZ);
+
+ /* TODO: Purge the IOMMU mapping ??? */
+
+ /*
+ * At this point, all interrupt producers have been re-directed
+ * to somewhere else so we move register state from the old IMSIC
+ * VS-file to the IMSIC SW-file.
+ */
+
+ /* Read and clear register state from old IMSIC VS-file */
+ memset(&tmrif, 0, sizeof(tmrif));
+ imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu, imsic->nr_hw_eix,
+ true, &tmrif);
+
+ /* Update register state in IMSIC SW-file */
+ imsic_swfile_update(vcpu, &tmrif);
+
+ /* Free-up old IMSIC VS-file */
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+}
+
+int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
+{
+ unsigned long flags;
+ phys_addr_t new_vsfile_pa;
+ struct imsic_mrif tmrif;
+ void __iomem *new_vsfile_va;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context;
+ struct imsic *imsic = vaia->imsic_state;
+ int ret = 0, new_vsfile_hgei = -1, old_vsfile_hgei, old_vsfile_cpu;
+
+ /* Do nothing for emulation mode */
+ if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
+ return 1;
+
+ /* Read old IMSIC VS-file details */
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+ old_vsfile_hgei = imsic->vsfile_hgei;
+ old_vsfile_cpu = imsic->vsfile_cpu;
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /* Do nothing if we are continuing on same CPU */
+ if (old_vsfile_cpu == vcpu->cpu)
+ return 1;
+
+ /* Allocate new IMSIC VS-file */
+ ret = kvm_riscv_aia_alloc_hgei(vcpu->cpu, vcpu,
+ &new_vsfile_va, &new_vsfile_pa);
+ if (ret <= 0) {
+ /* For HW acceleration mode, we can't continue */
+ if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_HWACCEL) {
+ run->fail_entry.hardware_entry_failure_reason =
+ CSR_HSTATUS;
+ run->fail_entry.cpu = vcpu->cpu;
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ return 0;
+ }
+
+ /* Release old IMSIC VS-file */
+ if (old_vsfile_cpu >= 0)
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ /* For automatic mode, we continue */
+ goto done;
+ }
+ new_vsfile_hgei = ret;
+
+ /*
+ * At this point, all interrupt producers are still using
+ * to the old IMSIC VS-file so we first move all interrupt
+ * producers to the new IMSIC VS-file.
+ */
+
+ /* Zero-out new IMSIC VS-file */
+ imsic_vsfile_local_clear(new_vsfile_hgei, imsic->nr_hw_eix);
+
+ /* Update G-stage mapping for the new IMSIC VS-file */
+ ret = kvm_riscv_gstage_ioremap(kvm, vcpu->arch.aia_context.imsic_addr,
+ new_vsfile_pa, IMSIC_MMIO_PAGE_SZ,
+ true, true);
+ if (ret)
+ goto fail_free_vsfile_hgei;
+
+ /* TODO: Update the IOMMU mapping ??? */
+
+ /* Update new IMSIC VS-file details in IMSIC context */
+ write_lock_irqsave(&imsic->vsfile_lock, flags);
+ imsic->vsfile_hgei = new_vsfile_hgei;
+ imsic->vsfile_cpu = vcpu->cpu;
+ imsic->vsfile_va = new_vsfile_va;
+ imsic->vsfile_pa = new_vsfile_pa;
+ write_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ /*
+ * At this point, all interrupt producers have been moved
+ * to the new IMSIC VS-file so we move register state from
+ * the old IMSIC VS/SW-file to the new IMSIC VS-file.
+ */
+
+ memset(&tmrif, 0, sizeof(tmrif));
+ if (old_vsfile_cpu >= 0) {
+ /* Read and clear register state from old IMSIC VS-file */
+ imsic_vsfile_read(old_vsfile_hgei, old_vsfile_cpu,
+ imsic->nr_hw_eix, true, &tmrif);
+
+ /* Free-up old IMSIC VS-file */
+ kvm_riscv_aia_free_hgei(old_vsfile_cpu, old_vsfile_hgei);
+ } else {
+ /* Read and clear register state from IMSIC SW-file */
+ imsic_swfile_read(vcpu, true, &tmrif);
+ }
+
+ /* Restore register state in the new IMSIC VS-file */
+ imsic_vsfile_local_update(new_vsfile_hgei, imsic->nr_hw_eix, &tmrif);
+
+done:
+ /* Set VCPU HSTATUS.VGEIN to new IMSIC VS-file */
+ vcpu->arch.guest_context.hstatus &= ~HSTATUS_VGEIN;
+ if (new_vsfile_hgei > 0)
+ vcpu->arch.guest_context.hstatus |=
+ ((unsigned long)new_vsfile_hgei) << HSTATUS_VGEIN_SHIFT;
+
+ /* Continue run-loop */
+ return 1;
+
+fail_free_vsfile_hgei:
+ kvm_riscv_aia_free_hgei(vcpu->cpu, new_vsfile_hgei);
+ return ret;
+}
+
+int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask)
+{
+ u32 topei;
+ struct imsic_mrif_eix *eix;
+ int r, rc = KVM_INSN_CONTINUE_NEXT_SEPC;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (isel == KVM_RISCV_AIA_IMSIC_TOPEI) {
+ /* Read pending and enabled interrupt with highest priority */
+ topei = imsic_mrif_topei(imsic->swfile, imsic->nr_eix,
+ imsic->nr_msis);
+ if (val)
+ *val = topei;
+
+ /* Writes ignore value and clear top pending interrupt */
+ if (topei && wr_mask) {
+ topei >>= TOPEI_ID_SHIFT;
+ if (topei) {
+ eix = &imsic->swfile->eix[topei /
+ BITS_PER_TYPE(u64)];
+ clear_bit(topei & (BITS_PER_TYPE(u64) - 1),
+ eix->eip);
+ }
+ }
+ } else {
+ r = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix, isel,
+ val, new_val, wr_mask);
+ /* Forward unknown IMSIC register to user-space */
+ if (r)
+ rc = (r == -ENOENT) ? 0 : KVM_INSN_ILLEGAL_TRAP;
+ }
+
+ if (wr_mask)
+ imsic_swfile_extirq_update(vcpu);
+
+ return rc;
+}
+
+int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
+ bool write, unsigned long *val)
+{
+ u32 isel, vcpu_id;
+ unsigned long flags;
+ struct imsic *imsic;
+ struct kvm_vcpu *vcpu;
+ int rc, vsfile_hgei, vsfile_cpu;
+
+ if (!kvm_riscv_aia_initialized(kvm))
+ return -ENODEV;
+
+ vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ if (!vcpu)
+ return -ENODEV;
+
+ isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
+ imsic = vcpu->arch.aia_context.imsic_state;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+
+ rc = 0;
+ vsfile_hgei = imsic->vsfile_hgei;
+ vsfile_cpu = imsic->vsfile_cpu;
+ if (vsfile_cpu < 0) {
+ if (write) {
+ rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
+ isel, NULL, *val, -1UL);
+ imsic_swfile_extirq_update(vcpu);
+ } else
+ rc = imsic_mrif_rmw(imsic->swfile, imsic->nr_eix,
+ isel, val, 0, 0);
+ }
+
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ if (!rc && vsfile_cpu >= 0)
+ rc = imsic_vsfile_rw(vsfile_hgei, vsfile_cpu, imsic->nr_eix,
+ isel, write, val);
+
+ return rc;
+}
+
+int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
+{
+ u32 isel, vcpu_id;
+ struct imsic *imsic;
+ struct kvm_vcpu *vcpu;
+
+ if (!kvm_riscv_aia_initialized(kvm))
+ return -ENODEV;
+
+ vcpu_id = KVM_DEV_RISCV_AIA_IMSIC_GET_VCPU(type);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ if (!vcpu)
+ return -ENODEV;
+
+ isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
+ imsic = vcpu->arch.aia_context.imsic_state;
+ return imsic_mrif_isel_check(imsic->nr_eix, isel);
+}
+
+void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu)
+{
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (!imsic)
+ return;
+
+ kvm_riscv_vcpu_aia_imsic_release(vcpu);
+
+ memset(imsic->swfile, 0, sizeof(*imsic->swfile));
+}
+
+int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
+ u32 guest_index, u32 offset, u32 iid)
+{
+ unsigned long flags;
+ struct imsic_mrif_eix *eix;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ /* We only emulate one IMSIC MMIO page for each Guest VCPU */
+ if (!imsic || !iid || guest_index ||
+ (offset != IMSIC_MMIO_SETIPNUM_LE &&
+ offset != IMSIC_MMIO_SETIPNUM_BE))
+ return -ENODEV;
+
+ iid = (offset == IMSIC_MMIO_SETIPNUM_BE) ? __swab32(iid) : iid;
+ if (imsic->nr_msis <= iid)
+ return -EINVAL;
+
+ read_lock_irqsave(&imsic->vsfile_lock, flags);
+
+ if (imsic->vsfile_cpu >= 0) {
+ writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
+ set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);
+ imsic_swfile_extirq_update(vcpu);
+ }
+
+ read_unlock_irqrestore(&imsic->vsfile_lock, flags);
+
+ return 0;
+}
+
+static int imsic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ if (len != 4 || (addr & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ *((u32 *)val) = 0;
+
+ return 0;
+}
+
+static int imsic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ struct kvm_msi msi = { 0 };
+
+ if (len != 4 || (addr & 0x3) != 0)
+ return -EOPNOTSUPP;
+
+ msi.address_hi = addr >> 32;
+ msi.address_lo = (u32)addr;
+ msi.data = *((const u32 *)val);
+ kvm_riscv_aia_inject_msi(vcpu->kvm, &msi);
+
+ return 0;
+};
+
+static struct kvm_io_device_ops imsic_iodoev_ops = {
+ .read = imsic_mmio_read,
+ .write = imsic_mmio_write,
+};
+
+int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+ struct imsic *imsic;
+ struct page *swfile_page;
+ struct kvm *kvm = vcpu->kvm;
+
+ /* Fail if we have zero IDs */
+ if (!kvm->arch.aia.nr_ids)
+ return -EINVAL;
+
+ /* Allocate IMSIC context */
+ imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
+ if (!imsic)
+ return -ENOMEM;
+ vcpu->arch.aia_context.imsic_state = imsic;
+
+ /* Setup IMSIC context */
+ imsic->nr_msis = kvm->arch.aia.nr_ids + 1;
+ rwlock_init(&imsic->vsfile_lock);
+ imsic->nr_eix = BITS_TO_U64(imsic->nr_msis);
+ imsic->nr_hw_eix = BITS_TO_U64(kvm_riscv_aia_max_ids);
+ imsic->vsfile_hgei = imsic->vsfile_cpu = -1;
+
+ /* Setup IMSIC SW-file */
+ swfile_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(sizeof(*imsic->swfile)));
+ if (!swfile_page) {
+ ret = -ENOMEM;
+ goto fail_free_imsic;
+ }
+ imsic->swfile = page_to_virt(swfile_page);
+ imsic->swfile_pa = page_to_phys(swfile_page);
+
+ /* Setup IO device */
+ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ vcpu->arch.aia_context.imsic_addr,
+ KVM_DEV_RISCV_IMSIC_SIZE,
+ &imsic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret)
+ goto fail_free_swfile;
+
+ return 0;
+
+fail_free_swfile:
+ free_pages((unsigned long)imsic->swfile,
+ get_order(sizeof(*imsic->swfile)));
+fail_free_imsic:
+ vcpu->arch.aia_context.imsic_state = NULL;
+ kfree(imsic);
+ return ret;
+}
+
+void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct imsic *imsic = vcpu->arch.aia_context.imsic_state;
+
+ if (!imsic)
+ return;
+
+ imsic_vsfile_cleanup(imsic);
+
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &imsic->iodev);
+ mutex_unlock(&kvm->slots_lock);
+
+ free_pages((unsigned long)imsic->swfile,
+ get_order(sizeof(*imsic->swfile)));
+
+ vcpu->arch.aia_context.imsic_state = NULL;
+ kfree(imsic);
+}
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index a7112d583637..48ae0d4b3932 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -116,7 +116,8 @@ static int __init riscv_kvm_init(void)
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
if (kvm_riscv_aia_available())
- kvm_info("AIA available\n");
+ kvm_info("AIA available with %d guest external interrupts\n",
+ kvm_riscv_aia_nr_hgei);
rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (rc) {
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 0e5479600695..44bc324aeeb0 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -296,7 +296,7 @@ static void make_xfence_request(struct kvm *kvm,
unsigned int actual_req = req;
DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
- bitmap_clear(vcpu_mask, 0, KVM_MAX_VCPUS);
+ bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (hbase != -1UL) {
if (vcpu->vcpu_id < hbase)
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 8bd9f2a8a0b9..d12ef99901fc 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -22,6 +22,8 @@
#include <asm/cacheflush.h>
#include <asm/hwcap.h>
#include <asm/sbi.h>
+#include <asm/vector.h>
+#include <asm/kvm_vcpu_vector.h>
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
@@ -57,10 +59,12 @@ static const unsigned long kvm_isa_ext_arr[] = {
[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+ [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
@@ -85,6 +89,8 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
+ case KVM_RISCV_ISA_EXT_V:
+ return riscv_v_vstate_ctrl_user_allowed();
default:
break;
}
@@ -102,6 +108,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_SSAIA:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZBB:
return false;
@@ -138,6 +145,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_fp_reset(vcpu);
+ kvm_riscv_vcpu_vector_reset(vcpu);
+
kvm_riscv_vcpu_timer_reset(vcpu);
kvm_riscv_vcpu_aia_reset(vcpu);
@@ -198,6 +207,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
+ return -ENOMEM;
+
/* By default, make CY, TM, and IR counters accessible in VU mode */
reset_csr->scounteren = 0x7;
@@ -241,6 +253,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Free unused pages pre-allocated for G-stage page table mappings */
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+
+ /* Free vector context space for host and guest kernel */
+ kvm_riscv_vcpu_free_vector_context(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -250,10 +265,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
+ kvm_riscv_aia_wakeon_hgei(vcpu, true);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
+ kvm_riscv_aia_wakeon_hgei(vcpu, false);
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
@@ -679,6 +696,9 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
+ KVM_REG_RISCV_VECTOR);
default:
break;
}
@@ -708,6 +728,9 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
+ KVM_REG_RISCV_VECTOR);
default:
break;
}
@@ -1002,6 +1025,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
vcpu->arch.isa);
+ kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
+ kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
kvm_riscv_vcpu_aia_load(vcpu, cpu);
@@ -1021,6 +1047,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
kvm_riscv_vcpu_timer_save(vcpu);
+ kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
+ vcpu->arch.isa);
+ kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
csr->vsstatus = csr_read(CSR_VSSTATUS);
csr->vsie = csr_read(CSR_VSIE);
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 4ea101a73d8b..2415722c01b8 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -183,6 +183,8 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
run->exit_reason = KVM_EXIT_UNKNOWN;
switch (trap->scause) {
case EXC_INST_ILLEGAL:
+ case EXC_LOAD_MISALIGNED:
+ case EXC_STORE_MISALIGNED:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) {
kvm_riscv_vcpu_trap_redirect(vcpu, trap);
ret = 1;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index e52fde504433..7b46e04fb667 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -20,9 +20,7 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
};
#endif
-#ifdef CONFIG_RISCV_PMU_SBI
-extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu;
-#else
+#ifndef CONFIG_RISCV_PMU_SBI
static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
.extid_start = -1UL,
.extid_end = -1UL,
@@ -31,49 +29,49 @@ static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
#endif
struct kvm_riscv_sbi_extension_entry {
- enum KVM_RISCV_SBI_EXT_ID dis_idx;
+ enum KVM_RISCV_SBI_EXT_ID ext_idx;
const struct kvm_vcpu_sbi_extension *ext_ptr;
};
static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
{
- .dis_idx = KVM_RISCV_SBI_EXT_V01,
+ .ext_idx = KVM_RISCV_SBI_EXT_V01,
.ext_ptr = &vcpu_sbi_ext_v01,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_MAX, /* Can't be disabled */
+ .ext_idx = KVM_RISCV_SBI_EXT_MAX, /* Can't be disabled */
.ext_ptr = &vcpu_sbi_ext_base,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_TIME,
+ .ext_idx = KVM_RISCV_SBI_EXT_TIME,
.ext_ptr = &vcpu_sbi_ext_time,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_IPI,
+ .ext_idx = KVM_RISCV_SBI_EXT_IPI,
.ext_ptr = &vcpu_sbi_ext_ipi,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_RFENCE,
+ .ext_idx = KVM_RISCV_SBI_EXT_RFENCE,
.ext_ptr = &vcpu_sbi_ext_rfence,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_SRST,
+ .ext_idx = KVM_RISCV_SBI_EXT_SRST,
.ext_ptr = &vcpu_sbi_ext_srst,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_HSM,
+ .ext_idx = KVM_RISCV_SBI_EXT_HSM,
.ext_ptr = &vcpu_sbi_ext_hsm,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_PMU,
+ .ext_idx = KVM_RISCV_SBI_EXT_PMU,
.ext_ptr = &vcpu_sbi_ext_pmu,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+ .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
.ext_ptr = &vcpu_sbi_ext_experimental,
},
{
- .dis_idx = KVM_RISCV_SBI_EXT_VENDOR,
+ .ext_idx = KVM_RISCV_SBI_EXT_VENDOR,
.ext_ptr = &vcpu_sbi_ext_vendor,
},
};
@@ -147,7 +145,7 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].dis_idx == reg_num) {
+ if (sbi_ext[i].ext_idx == reg_num) {
sext = &sbi_ext[i];
break;
}
@@ -155,7 +153,15 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
if (!sext)
return -ENOENT;
- scontext->extension_disabled[sext->dis_idx] = !reg_val;
+ /*
+ * We can't set the extension status to available here, since it may
+ * have a probe() function which needs to confirm availability first,
+ * but it may be too early to call that here. We can set the status to
+ * unavailable, though.
+ */
+ if (!reg_val)
+ scontext->ext_status[sext->ext_idx] =
+ KVM_RISCV_SBI_EXT_UNAVAILABLE;
return 0;
}
@@ -172,7 +178,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].dis_idx == reg_num) {
+ if (sbi_ext[i].ext_idx == reg_num) {
sext = &sbi_ext[i];
break;
}
@@ -180,7 +186,15 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
if (!sext)
return -ENOENT;
- *reg_val = !scontext->extension_disabled[sext->dis_idx];
+ /*
+ * If the extension status is still uninitialized, then we should probe
+ * to determine if it's available, but it may be too early to do that
+ * here. The best we can do is report that the extension has not been
+ * disabled, i.e. we return 1 when the extension is available and also
+ * when it only may be available.
+ */
+ *reg_val = scontext->ext_status[sext->ext_idx] !=
+ KVM_RISCV_SBI_EXT_UNAVAILABLE;
return 0;
}
@@ -307,18 +321,32 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid)
{
- int i;
- const struct kvm_riscv_sbi_extension_entry *sext;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *entry;
+ const struct kvm_vcpu_sbi_extension *ext;
+ int i;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- sext = &sbi_ext[i];
- if (sext->ext_ptr->extid_start <= extid &&
- sext->ext_ptr->extid_end >= extid) {
- if (sext->dis_idx < KVM_RISCV_SBI_EXT_MAX &&
- scontext->extension_disabled[sext->dis_idx])
+ entry = &sbi_ext[i];
+ ext = entry->ext_ptr;
+
+ if (ext->extid_start <= extid && ext->extid_end >= extid) {
+ if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX ||
+ scontext->ext_status[entry->ext_idx] ==
+ KVM_RISCV_SBI_EXT_AVAILABLE)
+ return ext;
+ if (scontext->ext_status[entry->ext_idx] ==
+ KVM_RISCV_SBI_EXT_UNAVAILABLE)
return NULL;
- return sbi_ext[i].ext_ptr;
+ if (ext->probe && !ext->probe(vcpu)) {
+ scontext->ext_status[entry->ext_idx] =
+ KVM_RISCV_SBI_EXT_UNAVAILABLE;
+ return NULL;
+ }
+
+ scontext->ext_status[entry->ext_idx] =
+ KVM_RISCV_SBI_EXT_AVAILABLE;
+ return ext;
}
}
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
new file mode 100644
index 000000000000..edd2eecbddc2
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 SiFive
+ *
+ * Authors:
+ * Vincent Chen <vincent.chen@sifive.com>
+ * Greentime Hu <greentime.hu@sifive.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/hwcap.h>
+#include <asm/kvm_vcpu_vector.h>
+#include <asm/vector.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu)
+{
+ unsigned long *isa = vcpu->arch.isa;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ cntx->sstatus &= ~SR_VS;
+ if (riscv_isa_extension_available(isa, v)) {
+ cntx->sstatus |= SR_VS_INITIAL;
+ WARN_ON(!cntx->vector.datap);
+ memset(cntx->vector.datap, 0, riscv_v_vsize);
+ } else {
+ cntx->sstatus |= SR_VS_OFF;
+ }
+}
+
+static void kvm_riscv_vcpu_vector_clean(struct kvm_cpu_context *cntx)
+{
+ cntx->sstatus &= ~SR_VS;
+ cntx->sstatus |= SR_VS_CLEAN;
+}
+
+void kvm_riscv_vcpu_guest_vector_save(struct kvm_cpu_context *cntx,
+ unsigned long *isa)
+{
+ if ((cntx->sstatus & SR_VS) == SR_VS_DIRTY) {
+ if (riscv_isa_extension_available(isa, v))
+ __kvm_riscv_vector_save(cntx);
+ kvm_riscv_vcpu_vector_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx,
+ unsigned long *isa)
+{
+ if ((cntx->sstatus & SR_VS) != SR_VS_OFF) {
+ if (riscv_isa_extension_available(isa, v))
+ __kvm_riscv_vector_restore(cntx);
+ kvm_riscv_vcpu_vector_clean(cntx);
+ }
+}
+
+void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx)
+{
+ /* No need to check host sstatus as it can be modified outside */
+ if (riscv_isa_extension_available(NULL, v))
+ __kvm_riscv_vector_save(cntx);
+}
+
+void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx)
+{
+ if (riscv_isa_extension_available(NULL, v))
+ __kvm_riscv_vector_restore(cntx);
+}
+
+int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_context *cntx)
+{
+ cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL);
+ if (!cntx->vector.datap)
+ return -ENOMEM;
+
+ vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+ if (!vcpu->arch.host_context.vector.datap)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
+{
+ kfree(vcpu->arch.guest_reset_context.vector.datap);
+ kfree(vcpu->arch.host_context.vector.datap);
+}
+#endif
+
+static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ size_t reg_size)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ void *reg_val;
+ size_t vlenb = riscv_v_vsize / 32;
+
+ if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
+ if (reg_size != sizeof(unsigned long))
+ return NULL;
+ switch (reg_num) {
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ reg_val = &cntx->vector.vstart;
+ break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ reg_val = &cntx->vector.vl;
+ break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ reg_val = &cntx->vector.vtype;
+ break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ reg_val = &cntx->vector.vcsr;
+ break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
+ default:
+ return NULL;
+ }
+ } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
+ if (reg_size != vlenb)
+ return NULL;
+ reg_val = cntx->vector.datap
+ + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
+ } else {
+ return NULL;
+ }
+
+ return reg_val;
+}
+
+int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ unsigned long *isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val = NULL;
+ size_t reg_size = KVM_REG_SIZE(reg->id);
+
+ if (rtype == KVM_REG_RISCV_VECTOR &&
+ riscv_isa_extension_available(isa, v)) {
+ reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
+ }
+
+ if (!reg_val)
+ return -EINVAL;
+
+ if (copy_to_user(uaddr, reg_val, reg_size))
+ return -EFAULT;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ unsigned long rtype)
+{
+ unsigned long *isa = vcpu->arch.isa;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ rtype);
+ void *reg_val = NULL;
+ size_t reg_size = KVM_REG_SIZE(reg->id);
+
+ if (rtype == KVM_REG_RISCV_VECTOR &&
+ riscv_isa_extension_available(isa, v)) {
+ reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
+ }
+
+ if (!reg_val)
+ return -EINVAL;
+
+ if (copy_from_user(reg_val, uaddr, reg_size))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 6ef15f78e80f..7e2b50c692c1 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -55,11 +55,129 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_riscv_aia_destroy_vm(kvm);
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irql,
+ bool line_status)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ return kvm_riscv_aia_inject_irq(kvm, irql->irq, irql->level);
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ struct kvm_msi msi;
+
+ if (!level)
+ return -1;
+
+ msi.address_lo = e->msi.address_lo;
+ msi.address_hi = e->msi.address_hi;
+ msi.data = e->msi.data;
+ msi.flags = e->msi.flags;
+ msi.devid = e->msi.devid;
+
+ return kvm_riscv_aia_inject_msi(kvm, &msi);
+}
+
+static int kvm_riscv_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ return kvm_riscv_aia_inject_irq(kvm, e->irqchip.pin, level);
+}
+
+int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines)
+{
+ struct kvm_irq_routing_entry *ents;
+ int i, rc;
+
+ ents = kcalloc(lines, sizeof(*ents), GFP_KERNEL);
+ if (!ents)
+ return -ENOMEM;
+
+ for (i = 0; i < lines; i++) {
+ ents[i].gsi = i;
+ ents[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ ents[i].u.irqchip.irqchip = 0;
+ ents[i].u.irqchip.pin = i;
+ }
+ rc = kvm_set_irq_routing(kvm, ents, lines, 0);
+ kfree(ents);
+
+ return rc;
+}
+
+bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm);
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int r = -EINVAL;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ e->set = kvm_riscv_set_irq;
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin;
+ if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
+ (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
+ goto out;
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ e->msi.flags = ue->flags;
+ e->msi.devid = ue->u.msi.devid;
+ break;
+ default:
+ goto out;
+ }
+ r = 0;
+out:
+ return r;
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ if (!level)
+ return -EWOULDBLOCK;
+
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_MSI:
+ return kvm_set_msi(e, kvm, irq_source_id, level, line_status);
+
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ return kvm_riscv_set_irq(e, kvm, irq_source_id,
+ level, line_status);
+ }
+
+ return -EWOULDBLOCK;
+}
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ r = kvm_riscv_aia_available();
+ break;
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index b85e9e82f082..9c454f90fd3d 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -13,8 +13,7 @@ endif
KCOV_INSTRUMENT_init.o := n
obj-y += init.o
-obj-y += extable.o
-obj-$(CONFIG_MMU) += fault.o pageattr.o
+obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o
obj-y += cacheflush.o
obj-y += context.o
obj-y += pgtable.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index fca532ddf3ec..fbc59b3f69f2 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -104,9 +104,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
unsigned int riscv_cboz_block_size;
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
-static void cbo_get_block_size(struct device_node *node,
- const char *name, u32 *block_size,
- unsigned long *first_hartid)
+static void __init cbo_get_block_size(struct device_node *node,
+ const char *name, u32 *block_size,
+ unsigned long *first_hartid)
{
unsigned long hartid;
u32 val;
@@ -126,7 +126,7 @@ static void cbo_get_block_size(struct device_node *node,
}
}
-void riscv_init_cbo_blocksizes(void)
+void __init riscv_init_cbo_blocksizes(void)
{
unsigned long cbom_hartid, cboz_hartid;
u32 cbom_block_size = 0, cboz_block_size = 0;
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index d919efab6eba..d51a75864e53 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -10,7 +10,7 @@
#include <linux/mm.h>
#include <asm/cacheflush.h>
-static bool noncoherent_supported;
+static bool noncoherent_supported __ro_after_init;
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 8685f85a7474..6ea2cce4cc17 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -84,13 +84,13 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f
BUG();
}
-static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+static inline void
+bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
- mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
@@ -100,6 +100,15 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code
no_context(regs, addr);
}
+static inline void
+bad_area(struct pt_regs *regs, struct mm_struct *mm, int code,
+ unsigned long addr)
+{
+ mmap_read_unlock(mm);
+
+ bad_area_nosemaphore(regs, code, addr);
+}
+
static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
{
pgd_t *pgd, *pgd_k;
@@ -238,24 +247,12 @@ void handle_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
+ if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) &&
+ unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) {
vmalloc_fault(regs, code, addr);
return;
}
-#ifdef CONFIG_64BIT
- /*
- * Modules in 64bit kernels lie in their own virtual region which is not
- * in the vmalloc region, but dealing with page faults in this region
- * or the vmalloc region amounts to doing the same thing: checking that
- * the mapping exists in init_mm.pgd and updating user page table, so
- * just use vmalloc_fault.
- */
- if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
- vmalloc_fault(regs, code, addr);
- return;
- }
-#endif
/* Enable interrupts if they were enabled in the parent context. */
if (!regs_irqs_disabled(regs))
local_irq_enable();
@@ -286,24 +283,41 @@ void handle_page_fault(struct pt_regs *regs)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
-retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, addr);
- if (unlikely(!vma)) {
- tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
- return;
+#ifdef CONFIG_PER_VMA_LOCK
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (unlikely(access_error(cause, vma))) {
+ vma_end_read(vma);
+ goto lock_mmap;
}
- if (likely(vma->vm_start <= addr))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+
+ fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, addr);
return;
}
- if (unlikely(expand_stack(vma, addr))) {
+lock_mmap:
+#endif /* CONFIG_PER_VMA_LOCK */
+
+retry:
+ vma = lock_mm_and_find_vma(mm, addr, regs);
+ if (unlikely(!vma)) {
tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+ bad_area_nosemaphore(regs, code, addr);
return;
}
@@ -311,7 +325,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
-good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
@@ -355,6 +368,9 @@ good_area:
mmap_read_unlock(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+done:
+#endif
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index a163a3e0f0d4..96225a8533ad 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -3,6 +3,30 @@
#include <linux/err.h>
#ifdef CONFIG_RISCV_ISA_SVNAPOT
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long pte_num;
+ int i;
+ pte_t orig_pte = ptep_get(ptep);
+
+ if (!pte_present(orig_pte) || !pte_napot(orig_pte))
+ return orig_pte;
+
+ pte_num = napot_pte_num(napot_cont_order(orig_pte));
+
+ for (i = 0; i < pte_num; i++, ptep++) {
+ pte_t pte = ptep_get(ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+
+ return orig_pte;
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr,
@@ -43,13 +67,17 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
- pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order));
+ pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order));
break;
}
}
out:
- WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte));
+ if (pte) {
+ pte_t pteval = ptep_get_lockless(pte);
+
+ WARN_ON_ONCE(pte_present(pteval) && !pte_huge(pteval));
+ }
return pte;
}
@@ -90,7 +118,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
- pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order));
+ pte = pte_offset_huge(pmd, addr & napot_cont_mask(order));
break;
}
}
@@ -218,6 +246,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
{
pte_t pte = ptep_get(ptep);
unsigned long order;
+ pte_t orig_pte;
int i, pte_num;
if (!pte_napot(pte)) {
@@ -228,9 +257,12 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
order = napot_cont_order(pte);
pte_num = napot_pte_num(order);
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+ orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
+
+ orig_pte = pte_wrprotect(orig_pte);
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
- ptep_set_wrprotect(mm, addr, ptep);
+ set_pte_at(mm, addr, ptep, orig_pte);
}
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 747e5b1ef02d..9ce504737d18 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -23,6 +23,7 @@
#ifdef CONFIG_RELOCATABLE
#include <linux/elf.h>
#endif
+#include <linux/kfence.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -266,7 +267,6 @@ static void __init setup_bootmem(void)
dma_contiguous_reserve(dma32_phys_limit);
if (IS_ENABLED(CONFIG_64BIT))
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
- memblock_allow_resize();
}
#ifdef CONFIG_MMU
@@ -293,7 +293,7 @@ static const pgprot_t protection_map[16] = {
[VM_EXEC] = PAGE_EXEC,
[VM_EXEC | VM_READ] = PAGE_READ_EXEC,
[VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC,
- [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC,
[VM_SHARED] = PAGE_NONE,
[VM_SHARED | VM_READ] = PAGE_READ,
[VM_SHARED | VM_WRITE] = PAGE_SHARED,
@@ -356,7 +356,7 @@ static phys_addr_t __init alloc_pte_late(uintptr_t va)
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)));
+ BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page((void *)vaddr)));
return __pa(vaddr);
}
@@ -439,7 +439,7 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va)
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));
+ BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page((void *)vaddr)));
return __pa(vaddr);
}
@@ -659,18 +659,19 @@ void __init create_pgd_mapping(pgd_t *pgdp,
create_pgd_next_mapping(nextp, va, pa, sz, prot);
}
-static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
+static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
+ phys_addr_t size)
{
- if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
+ if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
return PGDIR_SIZE;
- if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+ if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
return P4D_SIZE;
- if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+ if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
return PUD_SIZE;
- if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
+ if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
return PMD_SIZE;
return PAGE_SIZE;
@@ -922,9 +923,9 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
uintptr_t dtb_pa)
{
+#ifndef CONFIG_BUILTIN_DTB
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
-#ifndef CONFIG_BUILTIN_DTB
/* Make sure the fdt fixmap address is always aligned on PMD size */
BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
@@ -1167,14 +1168,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
}
static void __init create_linear_mapping_range(phys_addr_t start,
- phys_addr_t end)
+ phys_addr_t end,
+ uintptr_t fixed_map_size)
{
phys_addr_t pa;
uintptr_t va, map_size;
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
- map_size = best_map_size(pa, end - pa);
+ map_size = fixed_map_size ? fixed_map_size :
+ best_map_size(pa, va, end - pa);
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
pgprot_from_va(va));
@@ -1184,6 +1187,7 @@ static void __init create_linear_mapping_range(phys_addr_t start,
static void __init create_linear_mapping_page_table(void)
{
phys_addr_t start, end;
+ phys_addr_t kfence_pool __maybe_unused;
u64 i;
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -1197,6 +1201,19 @@ static void __init create_linear_mapping_page_table(void)
memblock_mark_nomap(krodata_start, krodata_size);
#endif
+#ifdef CONFIG_KFENCE
+ /*
+ * kfence pool must be backed by PAGE_SIZE mappings, so allocate it
+ * before we setup the linear mapping so that we avoid using hugepages
+ * for this region.
+ */
+ kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+ BUG_ON(!kfence_pool);
+
+ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = __va(kfence_pool);
+#endif
+
/* Map all memory banks in the linear mapping */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -1207,17 +1224,25 @@ static void __init create_linear_mapping_page_table(void)
if (end >= __pa(PAGE_OFFSET) + memory_limit)
end = __pa(PAGE_OFFSET) + memory_limit;
- create_linear_mapping_range(start, end);
+ create_linear_mapping_range(start, end, 0);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
- create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
+ create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0);
create_linear_mapping_range(krodata_start,
- krodata_start + krodata_size);
+ krodata_start + krodata_size, 0);
memblock_clear_nomap(ktext_start, ktext_size);
memblock_clear_nomap(krodata_start, krodata_size);
#endif
+
+#ifdef CONFIG_KFENCE
+ create_linear_mapping_range(kfence_pool,
+ kfence_pool + KFENCE_POOL_SIZE,
+ PAGE_SIZE);
+
+ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+#endif
}
static void __init setup_vm_final(void)
@@ -1321,7 +1346,7 @@ static void __init reserve_crashkernel(void)
*/
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
search_start,
- min(search_end, (unsigned long) SZ_4G));
+ min(search_end, (unsigned long)(SZ_4G - 1)));
if (crash_base == 0) {
/* Try again without restricting region to 32bit addressible memory */
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
@@ -1344,6 +1369,9 @@ void __init paging_init(void)
{
setup_bootmem();
setup_vm_final();
+
+ /* Depend on that Linear Mapping is ready */
+ memblock_allow_resize();
}
void __init misc_mem_init(void)
@@ -1363,3 +1391,61 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return vmemmap_populate_basepages(start, end, node, NULL);
}
#endif
+
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+/*
+ * Pre-allocates page-table pages for a specific area in the kernel
+ * page-table. Only the level which needs to be synchronized between
+ * all page-tables is allocated because the synchronization can be
+ * expensive.
+ */
+static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end,
+ const char *area)
+{
+ unsigned long addr;
+ const char *lvl;
+
+ for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ lvl = "p4d";
+ p4d = p4d_alloc(&init_mm, pgd, addr);
+ if (!p4d)
+ goto failed;
+
+ if (pgtable_l5_enabled)
+ continue;
+
+ lvl = "pud";
+ pud = pud_alloc(&init_mm, p4d, addr);
+ if (!pud)
+ goto failed;
+
+ if (pgtable_l4_enabled)
+ continue;
+
+ lvl = "pmd";
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ if (!pmd)
+ goto failed;
+ }
+ return;
+
+failed:
+ /*
+ * The pages have to be there now or they will be missing in
+ * process page-tables later.
+ */
+ panic("Failed to pre-allocate %s pages for %s area\n", lvl, area);
+}
+
+void __init pgtable_cache_init(void)
+{
+ preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc");
+ if (IS_ENABLED(CONFIG_MODULES))
+ preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules");
+}
+#endif
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index bf9802a63061..2717f5490428 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -69,7 +69,7 @@ struct rv_jit_context {
struct bpf_prog *prog;
u16 *insns; /* RV insns */
int ninsns;
- int body_len;
+ int prologue_len;
int epilogue_offset;
int *offset; /* BPF to RV */
int nexentries;
@@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
int from, to;
off++; /* BPF branch is from PC+1, RV is from PC */
- from = (insn > 0) ? ctx->offset[insn - 1] : 0;
- to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
+ from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len;
+ to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len;
return ninsns_rvoff(to - from);
}
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 737baf8715da..7a26a3e1c73c 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int prog_size = 0, extable_size = 0;
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
- int pass = 0, prev_ninsns = 0, prologue_len, i;
+ int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
struct rv_jit_context *ctx;
@@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
goto out_offset;
}
+
+ if (build_body(ctx, extra_pass, NULL)) {
+ prog = orig_prog;
+ goto out_offset;
+ }
+
for (i = 0; i < prog->len; i++) {
prev_ninsns += 32;
ctx->offset[i] = prev_ninsns;
@@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
for (i = 0; i < NR_JIT_ITERATIONS; i++) {
pass++;
ctx->ninsns = 0;
+
+ bpf_jit_build_prologue(ctx);
+ ctx->prologue_len = ctx->ninsns;
+
if (build_body(ctx, extra_pass, ctx->offset)) {
prog = orig_prog;
goto out_offset;
}
- ctx->body_len = ctx->ninsns;
- bpf_jit_build_prologue(ctx);
+
ctx->epilogue_offset = ctx->ninsns;
bpf_jit_build_epilogue(ctx);
@@ -162,10 +171,8 @@ skip_init_ctx:
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(jit_data->header);
- prologue_len = ctx->epilogue_offset - ctx->body_len;
for (i = 0; i < prog->len; i++)
- ctx->offset[i] = ninsns_rvoff(prologue_len +
- ctx->offset[i]);
+ ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
bpf_prog_fill_jited_linfo(prog, ctx->offset);
out_offset:
kfree(ctx->offset);
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 5730797a6b40..dc20e166983e 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -31,10 +31,15 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib